/[wait]/trunk/lib/WAIT/Table.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk/lib/WAIT/Table.pm

Parent Directory | Revision Log | View Patch Patch

-revision 10 by ulpfr,
Fri Apr 28 15:40:52 2000 UTC
+revision 34 by ulpfr,
Sun Nov 12 14:22:40 2000 UTC
 Line 1
- #                              -*- Mode: Perl -*-
+ #                              -*- Mode: Cperl -*-
  # Table.pm --
  # ITIID           : $ITI$ $Header $__Header$
  # Author          : Ulrich Pfeifer
  # Created On      : Thu Aug  8 13:05:10 1996
  # Last Modified By: Ulrich Pfeifer
- # Last Modified On: Sun Nov 22 18:44:37 1998
+ # Last Modified On: Sun Nov 12 15:21:19 2000
  # Language        : CPerl
- # Update Count    : 51
+ # Update Count    : 135
  # Status          : Unknown, Use with caution!
  #
  # Copyright (c) 1996-1997, Ulrich Pfeifer
  #
  =head1 NAME
 Line 25 
 WAIT::Table -- Module for maintaining Ta
  =cut
  package WAIT::Table;
+ use WAIT::Table::Handle ();
  require WAIT::Parse::Base;
  use strict;
  use Carp;
+ # use autouse Carp => qw( croak($) );
  use DB_File;
  use Fcntl;
+ use LockFile::Simple ();
  my $USE_RECNO = 0;
  =head2 Creating a Table.
- The constructor WAIT::Table-<gt>new is normally called via the
+ The constructor WAIT::Table-E<gt>new is normally called via the
  create_table method of a database handle. This is not enforced, but
- creating a table doesn not make any sense unless the table is
+ creating a table does not make any sense unless the table is
  registered by the database because the latter implements persistence
  of the meta data. Registering is done automatically by letting the
- database handle create a table.
+ database handle the creation of a table.
-   my $db = create WAIT::Database name => 'sample';
+   my $db = WAIT::Database->create(name => 'sample');
-   my $tb = $db->create_table (name     => 'test',
+   my $tb = $db->create_table(name     => 'test',
-                               attr     => ['docid', 'headline'],
+                              access   => $access,
-                               layout   => $layout,
+                              layout   => $layout,
-                               access   => $access,
+                              attr     => ['docid', 'headline'],
-                              );
+                             );
  The constructor returns a handle for the table. This handle is hidden by the
  table module, to prevent direct access if called via Table.
  =over 10
- =item C<access> => I<accesobj>
+ =item C<access> => I<accessobj>
- A reference to a acces object for the external parts (attributes) of
+ A reference to an access object for the external parts (attributes) of
  tuples. As you may remember, the WAIT System does not enforce that
  objects are completely stored inside the system to avoid duplication.
- There is no (strong) point in storing all you HTML-Documents inside
+ There is no (strong) point in storing all your HTML documents inside
  the system when indexing your WWW-Server.
+ The access object is designed to work like as a tied hash. You pass
+ the refernce to the object, not the tied hash though. An example
+ implementation of an access class that works for manpages is
+ WAIT::Document::Nroff.
+ The implementation needs to take into account that WAIT will keep this
+ object in a Data::Dumper or Storable database and re-use it when sman
+ is run. So it is not good enough if we can produce the index with it
+ now, when we create or actively access the table, WAIT also must be
+ able to retrieve documents on its own, when we are in a different
+ context. This happens specifically in a retrieval. To get this working
+ seemlessly, the access-defining class must implement a close method.
+ This method will be called before the Data::Dumper dump takes place.
+ In that moment the access-defining class must get rid of all data
+ structures that cannot be reconstructed via the Data::Dumper dump,
+ such as database handles or C pointers.
  =item C<file> => I<fname>
  The filename of the records file. Files for indexes will have I<fname>
- as prefix. I<Mandatory>
+ as prefix. I<Mandatory>, but usually taken care of by the
+ WAIT::Database handle when the constructor is called via
+ WAIT::Database::create_table().
  =item C<name> => I<name>
-Line 73 
 The name of this table. I<Mandatory>
+Line 97 
 The name of this table. I<Mandatory>
  =item C<attr> => [ I<attr> ... ]
- A reference to an array of attribute names. I<Mandatory>
+ A reference to an array of attribute names. WAIT will keep the
+ contents of these attributes in its table. I<Mandatory>
  =item C<djk> => [ I<attr> ... ]
  A reference to an array of attribute names which make up the
- I<disjointness key>. Don't think about it - i's of no use yet;
+ I<disjointness key>. Don't think about it - it's of no use yet;
  =item C<layout> => I<layoutobj>
- A reference to an external parser object. Defaults to anew instance of
+ A reference to an external parser object. Defaults to a new instance
- C<WAIT::Parse::Base>
+ of C<WAIT::Parse::Base>. For an example implementation see
+ WAIT::Parse::Nroff. A layout class can be implemented as a singleton
+ class if you so like.
+ =item C<keyset> => I<keyset>
- =item C<access> => I<accesobj>
+ The set of attributes needed to identify a record. Defaults to all
+ attributes.
- A reference to a acces object for the external parts of tuples.
+ =item C<invindex> => I<inverted index>
+ A reference to an anon array defining attributes of each record that
+ need to be indexed. See the source of smakewhatis for how to set this
+ up.
  =back
-Line 98 
 sub new {
+Line 132 
 sub new {
    my %parm = @_;
    my $self = {};
+   # Check for mandatory attrs early
+   $self->{name}     = $parm{name}     or croak "No name specified";
+   $self->{attr}     = $parm{attr}     or croak "No attributes specified";
    # Do that before we eventually add '_weight' to attributes.
    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];
    $self->{mode}     = O_CREAT | O_RDWR;
    # Determine and set up subclass
    $type = ref($type) || $type;
    if (defined $parm{djk}) {
-Line 119 
 sub new {
+Line 159 
 sub new {
    }
    $self->{file}     = $parm{file}     or croak "No file specified";
-   if (-d  $self->{file} or !mkdir($self->{file}, 0775)) {
+   if (-d  $self->{file}){
+     warn "Warning: Directory '$self->{file}' already exists\n";
+   } elsif (!mkdir($self->{file}, 0775)) {
      croak "Could not 'mkdir $self->{file}': $!\n";
    }
-   $self->{name}     = $parm{name}     or croak "No name specified";
-   $self->{attr}     = $parm{attr}     or croak "No attributes specified";
+   my $lockmgr = LockFile::Simple->make(-autoclean => 1);
+   # Aquire a write lock, since we are creating the table, no readers
+   # could possibly be active.
+   $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
+     or die "Can't lock '$self->{file}/write'";
    $self->{djk}      = $parm{djk}      if defined $parm{djk};
    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;
    $self->{access}   = $parm{access} if defined $parm{access};
-Line 142 
 sub new {
+Line 189 
 sub new {
      my $att  = shift @{$parm{invindex}};
      my @spec = @{shift @{$parm{invindex}}};
      my @opt;
      if (ref($spec[0])) {
        carp "Secondary pipelines are deprecated\n";
        @opt = %{shift @spec};
      }
      $self->create_inverted_index(attribute => $att, pipeline  => \@spec, @opt);
    }
    $self;
    # end of backwarn compatibility stuff
  }
-Line 168 
 table!
+Line 216 
 table!
  sub create_index {
    my $self= shift;
    croak "Cannot create index for table aready populated"
      if $self->{nextk} > 1;
    require WAIT::Index;
    my $name = join '-', @_;
    $self->{indexes}->{$name} =
      new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
-Line 196 
 set attributes specified when the table
+Line 244 
 set attributes specified when the table
  =item C<pipeline>
- A piplines specification is a reference to and array of method names
+ A piplines specification is a reference to an array of method names
- (from package C<WAIT::Filter>) which are to applied in sequence to the
+ (from package C<WAIT::Filter>) which are to be applied in sequence to
- contents of the named attribute. The attribute name may not be in the
+ the contents of the named attribute. The attribute name may not be in
- attribute list.
+ the attribute list.
  =item C<predicate>
  An indication which predicate the index implements. This may be
  e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
  query processing. Currently there is no standard set of predicate
- names. The predicate defaults to the last member of the ppline if
+ names. The predicate defaults to the last member of the pipeline if
  omitted.
  =back
-Line 224 
 sub create_inverted_index {
+Line 272 
 sub create_inverted_index {
    croak "No pipeline specified"  unless $parm{pipeline};
    $parm{predicate} ||= $parm{pipeline}->[-1];
    croak "Cannot create index for table aready populated"
      if $self->{nextk} > 1;
    require WAIT::InvertedIndex;
    # backward compatibility stuff
-Line 235 
 sub create_inverted_index {
+Line 283 
 sub create_inverted_index {
    for (qw(attribute pipeline predicate)) {
      delete $opt{$_};
    }
    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
    my $idx = new WAIT::InvertedIndex(file   => $self->{file}.'/'.$name,
                                      filter => [@{$parm{pipeline}}], # clone
-Line 284 
 sub drop {
+Line 332 
 sub drop {
        $_->drop;
      }
      unlink "$file/records";
+     # $self->unlock;
      ! (!-e $file or rmdir $file);
    } else {
      croak ref($self)."::drop called directly";
-Line 334 
 sub open {
+Line 383 
 sub open {
                           $self->{mode}, 0664, $DB_BTREE);
      }
    }
+   # Locking
+   #
+   # We allow multiple readers to coexists.  But write access excludes
+   # all read access and vice versa.  In practice read access on tables
+   # open for writing will mostly work ;-)
+   my $lockmgr = LockFile::Simple->make(-autoclean => 1);
+   my $lockdir = $self->{file} . '/read';
+   unless (-d $lockdir) {
+     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
+   }
+   if ($self->{mode} & O_RDWR) {
+     # Get a write lock.  Release it again and die if there is any
+     # valid reader.
+     # this is a hack.  We do not check for reopening ...
+     return $self if $self->{write_lock};
+     if ($self->{read_lock}) {
+       # We are a becoming a writer now. So we release the read lock to
+       # avoid blocking ourselves.
+       $self->{read_lock}->release;
+       delete $self->{read_lock};
+     }
+     # Get the preliminary write lock
+     $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
+       or die "Can't lock '$self->{file}/write'";
+     # If we actually want to write we must check if there are any
+     # readers.  The write lock is confirmed if wen cannot find any
+     # valid readers.
+     local *DIR;
+     opendir DIR, $lockdir or
+       die "Could not opendir '$lockdir': $!";
+     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
+       # check if the locks are still valid.
+       # Since we are protected by a write lock, we could use a plain file.
+       # But we want to use the stale testing from LockFile::Simple.
+       if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
+         warn "Removing stale lockfile '$lockdir/$lockfile'";
+         $lck->release;
+       } else {
+         $self->{write_lock}->release;
+         die "Cannot write table '$file' while it's in use";
+       }
+     }
+     closedir DIR;
+   } else {
+     # this is a hack.  We do not check for reopening ...
+     return $self if $self->{read_lock};
+     # Get the preliminary write lock to protect the directory
+     # operations.
+     $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
+       or die "Can't lock '$self->{file}/write'";
+     # find a new read slot
+     my $id = time;
+     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
+       $id++;
+     }
+     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
+       or die "Can't lock '$lockdir/$id'";
+     # We are a reader now. So we release the write lock
+     $self->{write_lock}->release;
+     delete $self->{write_lock};
+   }
    $self;
  }
  sub fetch_extern {
    my $self  = shift;
-   print "#@_", $self->{'access'}->{Mode}, "\n";
+   # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
    if (exists $self->{'access'}) {
      mrequire ref($self->{'access'});
      $self->{'access'}->FETCH(@_);
-Line 358 
 sub _find_index {
+Line 483 
 sub _find_index {
    my (@att) = @_;
    my %att;
    my $name;
    @att{@att} = @att;
    KEY: for $name (keys %{$self->{indexes}}) {
-Line 375 
 sub have {
+Line 500 
 sub have {
    my $self  = shift;
    my %parm  = @_;
-   my $index = $self->_find_index(keys %parm);
+   my $index = $self->_find_index(keys %parm) or return; # no index-no have
-   croak "No index found" unless $index;
    defined $self->{db} or $self->open;
    return $index->have(@_);
  }
-Line 387 
 sub insert {
+Line 512 
 sub insert {
    defined $self->{db} or $self->open;
+   # We should move all writing methods to a subclass to check only once
+   $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
    my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
    my $key;
    my @deleted = keys %{$self->{deleted}};
+   my $gotkey = 0;
    if (@deleted) {
      $key = pop @deleted;
      delete $self->{deleted}->{$key};
+     # Sanity check
+     if ($key && $key>0) {
+       $gotkey=1;
    } else {
+       warn(sprintf("WAIT database inconsistency during insert ".
+                    "key[%s]: Please rebuild index\n",
+                    $key
+                   ));
+     }
+   }
+   unless ($gotkey) {
      $key = $self->{nextk}++;
    }
    if ($USE_RECNO) {
-Line 408 
 sub insert {
+Line 547 
 sub insert {
        if ($key == $self->{nextk}-1) {
          $self->{nextk}--;
        } else {
+         # warn "setting key[$key] deleted during insert";
          $self->{deleted}->{$key}=1;
        }
        my $idx;
-Line 416 
 sub insert {
+Line 556 
 sub insert {
          $idx->remove($key, %parm);
        }
        return undef;
      }
    }
    if (defined $self->{inverted}) {
      my $att;
-Line 432 
 sub insert {
+Line 572 
 sub insert {
  sub sync {
    my $self  = shift;
    for (values %{$self->{indexes}}) {
      map $_->sync, $_;
    }
-Line 449 
 sub fetch {
+Line 589 
 sub fetch {
    my $key   = shift;
    return () if exists $self->{deleted}->{$key};
    defined $self->{db} or $self->open;
    if ($USE_RECNO) {
      $self->unpack($self->{db}->[$key]);
-Line 462 
 sub delete_by_key {
+Line 602 
 sub delete_by_key {
    my $self  = shift;
    my $key   = shift;
+   unless ($key) {
+     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
+     return;
+   }
    return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
    my %tuple = $self->fetch($key);
    for (values %{$self->{indexes}}) {
-Line 478 
 sub delete_by_key {
+Line 623 
 sub delete_by_key {
        }
      }
    }
+   # warn "setting key[$key] deleted during delete_by_key";
    ++$self->{deleted}->{$key};
  }
  sub delete {
    my $self  = shift;
    my $tkey = $self->have(@_);
+   # warn "tkey[$tkey]\@_[@_]";
    defined $tkey && $self->delete_by_key($tkey, @_);
  }
  sub unpack {
-   my $self = shift;
+   my($self, $tuple) = @_;
-   my $tuple = shift;
+   unless (defined $tuple){
+     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
+     warn("Debug: somebody called unpack without argument tuple!");
+     return;
+   }
    my $att;
    my @result;
-Line 502 
 sub unpack {
+Line 653 
 sub unpack {
    @result;
  }
+ sub set {
+   my ($self, $iattr, $value) = @_;
+   unless ($self->{write_lock}){
+     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
+     return;
+   }
+   for my $att (keys %{$self->{inverted}}) {
+     if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
+       my $idx;
+       for $idx (@{$self->{inverted}->{$att}}) {
+         $idx->set($iattr, $value);
+       }
+     } else {
+       map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
+     }
+   }
+;
+ }
  sub close {
    my $self = shift;
    if (exists $self->{'access'}) {
      eval {$self->{'access'}->close}; # dont bother if not opened
    }
-   for (values %{$self->{indexes}}) {
+   if ($WAIT::Index::VERSION) {
-     $_->close();
+     for (values %{$self->{indexes}}) {
+       $_->close();
+     }
    }
-   if (defined $self->{inverted}) {
+   if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
+     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
+     # if WAIT::InvertedIndex has not been loaded, they cannot have
+     # been altered so far
      my $att;
      for $att (keys %{$self->{inverted}}) {
        if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
-Line 535 
 sub close {
+Line 712 
 sub close {
      delete $self->{db};
    }
+   $self->unlock;
 ;
  }
+ sub unlock {
+   my $self = shift;
+   # Either we have a read or a write lock (or we close the table already)
+   # unless ($self->{read_lock} || $self->{write_lock}) {
+   #   warn "WAIT::Table::unlock: Table aparently hold's no lock"
+   # }
+   if ($self->{write_lock}) {
+     $self->{write_lock}->release();
+     delete $self->{write_lock};
+   }
+   if ($self->{read_lock}) {
+     $self->{read_lock}->release();
+     delete $self->{read_lock};
+   }
+ }
+ sub DESTROY {
+   my $self = shift;
+   warn "Table handle destroyed without closing it first"
+     if $self->{write_lock} || $self->{read_lock};
+ }
  sub open_scan {
    my $self = shift;
    my $code = shift;
-Line 593 
 sub intervall {
+Line 797 
 sub intervall {
  }
  sub search {
-   my $self = shift;
+   my $self  = shift;
-   my $attr = shift;
+   my ($query, $attr, $cont, $raw);
-   my $cont = shift;
+   if (ref $_[0]) {
-   my $raw  = shift;
+     $query = shift;
+     $attr = $query->{attr};
+     $cont = $query->{cont};
+     $raw  = $query->{raw};
+   } else {
+     require Carp;
+     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
+     $attr = shift;
+     $cont = shift;
+     $raw  = shift;
+     $query = {
+               attr => $attr,
+               cont => $cont,
+               raw  => $raw,
+              };
+   }
    my %result;
    defined $self->{db} or $self->open; # require layout
-Line 606 
 sub search {
+Line 827 
 sub search {
        my $name = $_->name;
        if (exists $raw->{$name} and @{$raw->{$name}}) {
          my $scale = 1/scalar(@{$raw->{$name}});
-         my %r = $_->search_raw(@{$raw->{$name}});
+         my %r = $_->search_raw($query, @{$raw->{$name}});
          my ($key, $val);
          while (($key, $val) = each %r) {
            if (exists $result{$key}) {
-Line 620 
 sub search {
+Line 841 
 sub search {
    }
    if (defined $cont and $cont ne '') {
      for (@{$self->{inverted}->{$attr}}) {
-       my %r = $_->search($cont);
+       my %r = $_->search($query, $cont);
        my ($key, $val);
        while (($key, $val) = each %r) {
          if (exists $result{$key}) {
-Line 644 
 sub hilight_positions {
+Line 865 
 sub hilight_positions {
    my %pos;
    if (defined $raw) {
-     for (@{$self->{inverted}->{$attr}}) {
+     for (@{$self->{inverted}->{$attr}}) { # objects of type
+                                           # WAIT::InvertedIndex for
+                                           # this index field $attr
        my $name = $_->name;
        if (exists $raw->{$name}) {
          my %qt;
-Line 678 
 sub hilight_positions {
+Line 901 
 sub hilight_positions {
  }
  sub hilight {
-   my ($tb, $text, $query, $raw) = @_;
+   my ($tb, $buf, $qplain, $qraw) = @_;
-   my $type = $tb->layout();
+   my $layout = $tb->layout();
    my @result;
-   $query ||= {};
+   $qplain ||= {};
-   $raw   ||= {};
+   $qraw   ||= {};
-   my @ttxt = $type->tag($text);
+   my @ttxt = $layout->tag($buf);
    while (@ttxt) {
      no strict 'refs';
      my %tag = %{shift @ttxt};
-Line 692 
 sub hilight {
+Line 916 
 sub hilight {
      my $fld;
      my %hl;
-     for $fld (grep defined $tag{$_}, keys %$query, keys %$raw) {
+     for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
        my $hp = $tb->hilight_positions($fld, $txt,
-                                       $query->{$fld}, $raw->{$fld});
+                                       $qplain->{$fld}, $qraw->{$fld});
        for (keys %$hp) {
          if (exists $hl{$_}) {   # -w ;-(
            $hl{$_} = max($hl{$_}, $hp->{$_});
-Line 720 
 sub hilight {
+Line 944 
 sub hilight {
  }
 ;

 Legend:



Removed from v.10
 


changed lines


 
Added in v.34
 Legend:



Removed from v.10
 


changed lines


 
Added in v.34
-Removed from v.10
+Added in v.34

	ViewVC Help
Powered by ViewVC 1.1.26