/[wait]/trunk/lib/WAIT/Table.pm

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /trunk/lib/WAIT/Table.pm

Parent Directory | Revision Log | View Patch Patch

-cvs-head/lib/WAIT/Table.pm
revision 41 by laperla,
Mon Nov 13 20:25:49 2000 UTC
+trunk/lib/WAIT/Table.pm
revision 115 by dpavlin,
Wed Jul 14 07:35:56 2004 UTC
 Line 4
  # Author          : Ulrich Pfeifer
  # Created On      : Thu Aug  8 13:05:10 1996
  # Last Modified By: Ulrich Pfeifer
- # Last Modified On: Sun Nov 12 17:51:56 2000
+ # Last Modified On: Wed Jan 23 14:15:15 2002
  # Language        : CPerl
- # Update Count    : 148
+ # Update Count    : 152
  # Status          : Unknown, Use with caution!
  #
  # Copyright (c) 1996-1997, Ulrich Pfeifer
 Line 25 
 WAIT::Table -- Module for maintaining Ta
  =cut
  package WAIT::Table;
+ our $VERSION = "2.000";
  use WAIT::Table::Handle ();
  require WAIT::Parse::Base;
  use strict;
- use Carp;
+ use Carp qw(cluck croak confess);
  # use autouse Carp => qw( croak($) );
- use DB_File;
+ use BerkeleyDB;
  use Fcntl;
- use LockFile::Simple ();
- my $USE_RECNO = 0;
  =head2 Creating a Table.
-Line 84 
 In that moment the access-defining class
+Line 82 
 In that moment the access-defining class
  structures that cannot be reconstructed via the Data::Dumper dump,
  such as database handles or C pointers.
- =item C<file> => I<fname>
+ =item C<path> => I<dir>
- The filename of the records file. Files for indexes will have I<fname>
+ The path to database. Files for indexes will have I<path>
  as prefix. I<Mandatory>, but usually taken care of by the
  WAIT::Database handle when the constructor is called via
  WAIT::Database::create_table().
-Line 133 
 sub new {
+Line 131 
 sub new {
    my $self = {};
    # Check for mandatory attrs early
-   $self->{name}     = $parm{name}     or croak "No name specified";
+   for my $x (qw(name attr env maindbfile tablename)) {
-   $self->{attr}     = $parm{attr}     or croak "No attributes specified";
+     $self->{$x}     = $parm{$x}     or croak "No $x specified";
+   }
    # Do that before we eventually add '_weight' to attributes.
    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];
-Line 158 
 sub new {
+Line 157 
 sub new {
      unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
    }
-   $self->{file}     = $parm{file}     or croak "No file specified";
+   $self->{path}     = $parm{path}     or croak "No path specified";
-   if (-d  $self->{file}){
+   bless $self, $type;
-     warn "Warning: Directory '$self->{file}' already exists\n";
-   } elsif (!mkdir($self->{file}, 0775)) {
-     croak "Could not 'mkdir $self->{file}': $!\n";
-   }
    $self->{djk}      = $parm{djk}      if defined $parm{djk};
    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;
-Line 172 
 sub new {
+Line 167 
 sub new {
    $self->{deleted}  = {};       # no deleted records yet
    $self->{indexes}  = {};
-   bless $self, $type;
    # Checking for readers is not necessary, but let's go with the
    # generic method.
-   $self->getlock(O_RDWR|O_CREAT); # dies when failing
    # Call create_index() and create_index() for compatibility
    for (@{$self->{keyset}||[]}) {
-Line 187 
 sub new {
+Line 179 
 sub new {
      # carp "Specification of inverted indexes at table create time is deprecated";
      my $att  = shift @{$parm{invindex}};
      my @spec = @{shift @{$parm{invindex}}};
-     my @opt;
+     my @opt  = ();
      if (ref($spec[0])) {
-       carp "Secondary pipelines are deprecated\n";
+       warn "Secondary pipelines are deprecated";
        @opt = %{shift @spec};
      }
-     $self->create_inverted_index(attribute => $att, pipeline  => \@spec, @opt);
+     $self->create_inverted_index(attribute => $att,
+                                  pipeline  => \@spec,
+                                  @opt);
    }
    $self;
    # end of backwarn compatibility stuff
  }
+ for my $accessor (qw(maindbfile tablename)) {
+   no strict 'refs';
+   *{$accessor} = sub {
+     my($self) = @_;
+     return $self->{$accessor} if $self->{$accessor};
+     require Carp;
+     Carp::confess("accessor $accessor not there");
+   }
+ }
  =head2 Creating an index
    $tb->create_index('docid');
- =item C<create_index>
+ C<create_index>
  must be called with a list of attributes. This must be a subset of the
  attributes specified when the table was created. Currently this
  method must be called before the first tuple is inserted in the
-Line 222 
 sub create_index {
+Line 225 
 sub create_index {
    require WAIT::Index;
    my $name = join '-', @_;
+   #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
    $self->{indexes}->{$name} =
-     new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
+     WAIT::Index->new(
+                      path => $self->path.'/'.$name,
+                      subname => $name,
+                      env  => $self->{env},
+                      maindbfile => $self->maindbfile,
+                      tablename => $self->tablename,
+                      attr => $_,
+                     );
  }
  =head2 Creating an inverted index
-Line 284 
 sub create_inverted_index {
+Line 295 
 sub create_inverted_index {
    }
    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
-   my $idx = new WAIT::InvertedIndex(file   => $self->{file}.'/'.$name,
+   my $idx = WAIT::InvertedIndex->new(path   => $self->path.'/'.$name,
-                                     filter => [@{$parm{pipeline}}], # clone
+                                      subname=> $name,
-                                     name   => $name,
+                                      env    => $self->{env},
-                                     attr   => $parm{attribute},
+                                      maindbfile => $self->maindbfile,
-                                     %opt, # backward compatibility stuff
+                                      tablename => $self->tablename,
-                                    );
+                                      filter => [@{$parm{pipeline}}], # clone
+                                      name   => $name,
+                                      attr   => $parm{attribute},
+                                      %opt, # backward compatibility stuff
+                                     );
    # We will have to use $parm{predicate} here
    push @{$self->{inverted}->{$parm{attribute}}}, $idx;
  }
  sub dir {
-   $_[0]->{file};
+   $_[0]->path;
  }
  =head2 C<$tb-E<gt>layout>
-Line 324 
 Must be called via C<WAIT::Database::dro
+Line 339 
 Must be called via C<WAIT::Database::dro
  sub drop {
    my $self = shift;
-   unless ($self->{write_lock}){
-     warn "Cannot drop table without write lock. Nothing done";
-     return;
-   }
    if ((caller)[0] eq 'WAIT::Database') { # database knows about this
      $self->close;               # just make sure
-     my $file = $self->{file};
+ #    my $path = $self->path;
      for (values %{$self->{indexes}}) {
        $_->drop;
      }
-     unlink "$file/records";
+ #    unlink "$path/records";
-     rmdir "$file/read" or warn "Could not rmdir '$file/read'";
+ #    rmdir "$path/read" or warn "Could not rmdir '$path/read'";
-     # $self->unlock;
-     ! (!-e $file or rmdir $file);
    } else {
-     croak ref($self)."::drop called directly";
+     confess ref($self)."::drop called directly";
    }
  }
-Line 355 
 sub mrequire ($) {
+Line 363 
 sub mrequire ($) {
    require $module;
  }
+ sub path {
+   my($self) = @_;
+   return $self->{path} if $self->{path};
+   require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
+   require Carp;
+   confess("NO path attr");
+ }
  sub open {
    my $self = shift;
-   my $file = $self->{file} . '/records';
+   my $path = $self->path . '/records';
    mrequire ref($self);           # that's tricky eh?
    if (defined $self->{'layout'}) {
-Line 368 
 sub open {
+Line 384 
 sub open {
    }
    if (exists $self->{indexes}) {
      require WAIT::Index;
-     for (values %{$self->{indexes}}) {
+     for my $Ind (values %{$self->{indexes}}) {
-       $_->{mode} = $self->{mode};
+       for my $x (qw(mode env maindbfile)) {
+         $Ind->{$x} = $self->{$x};
+       }
      }
    }
    if (exists $self->{inverted}) {
      my ($att, $idx);
      for $att (keys %{$self->{inverted}}) {
        for $idx (@{$self->{inverted}->{$att}}) {
-         $idx->{mode} = $self->{mode};
+         for my $x (qw(mode env maindbfile)) {
+           $idx->{$x} = $self->{$x};
+         }
        }
      }
      require WAIT::InvertedIndex;
    }
+   # CONFUSION: WAIT knows two *modes*: read-only or read-write.
+   # BerkeleyDB means file permissions when talking about Mode.
+   # BerkeleyDB has the "Flags" attribute to specify
+   # read/write/lock/etc subsystems.
+   my $flags;
+   if ($self->{mode} & O_RDWR) {
+     $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
+     #warn "DEBUG: Flags on table $path set to 'writing'";
+   } else {
+     $flags = DB_RDONLY;
+     #warn "DEBUG: Flags on table $path set to 'readonly'";
+   }
    unless (defined $self->{dbh}) {
-     if ($USE_RECNO) {
+     my $subname = $self->tablename . "/records";
-       $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
+     $self->{dbh} =
-                          $self->{mode}, 0664, $DB_RECNO);
+         tie(%{$self->{db}}, 'BerkeleyDB::Btree',
-     } else {
+             $self->{env} ? (Env => $self->{env}) : (),
-       $self->{dbh} =
+             # Filename => $file,
-         tie(%{$self->{db}}, 'DB_File', $file,
+             Filename => $self->maindbfile,
-                          $self->{mode}, 0664, $DB_BTREE);
+             Subname => $subname,
-     }
+             Mode => 0664,
+             Flags => $flags,
+             $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
+             $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
+            )
+             or confess "Cannot tie: $BerkeleyDB::Error\nDEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
    }
-   $self->getlock($self->{mode});
    $self;
  }
-Line 471 
 sub insert {
+Line 507 
 sub insert {
    unless ($gotkey) {
      $key = $self->{nextk}++;
    }
-   if ($USE_RECNO) {
+   $self->{db}->{$key} = $tuple;
-     $self->{db}->[$key] = $tuple;
-   } else {
-     $self->{db}->{$key} = $tuple;
-   }
    for (values %{$self->{indexes}}) {
      unless ($_->insert($key, %parm)) {
        # duplicate key, undo changes
-Line 526 
 sub fetch {
+Line 558 
 sub fetch {
    return () if exists $self->{deleted}->{$key};
    defined $self->{db} or $self->open;
-   if ($USE_RECNO) {
+   $self->unpack($self->{db}->{$key});
-     $self->unpack($self->{db}->[$key]);
-   } else {
-     $self->unpack($self->{db}->{$key});
-   }
  }
  sub delete_by_key {
-Line 538 
 sub delete_by_key {
+Line 566 
 sub delete_by_key {
    my $key   = shift;
    unless ($key) {
-     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
+     cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
      return;
    }
-Line 590 
 sub unpack {
+Line 618 
 sub unpack {
  sub set {
    my ($self, $iattr, $value) = @_;
+   # in the rare case that they haven't written a single record yet, we
-   unless ($self->{write_lock}){
+   # make sure, the inverted inherits our $self->{mode}:
-     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
+   defined $self->{db} or $self->open;
-     return;
-   }
    for my $att (keys %{$self->{inverted}}) {
      if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
        my $idx;
-Line 612 
 sub set {
+Line 639 
 sub set {
  sub close {
    my $self = shift;
+   #cluck("DEBUG: Closing A Table");
    if (exists $self->{'access'}) {
      eval {$self->{'access'}->close}; # dont bother if not opened
    }
-Line 638 
 sub close {
+Line 667 
 sub close {
    }
    if ($self->{dbh}) {
      delete $self->{dbh};
-     if ($USE_RECNO) {
-       untie @{$self->{db}};
-     } else {
-       untie %{$self->{db}};
-     }
-     delete $self->{db};
    }
+   untie %{$self->{db}};
-   $self->unlock;
+   for my $att (qw(env db path maindbfile)) {
+     delete $self->{$att};
-;
+     #cluck "DEBUG: Deleted att $att";
- }
- # Locking
- #
- # We allow multiple readers to coexists.  But write access excludes
- # all read access and vice versa.  In practice read access on tables
- # open for writing will mostly work ;-)
- # If a "write" lock is requested, an existing "read" lock will be
- # released.  If a "read" lock ist requested, an existing "write" lock
- # will be released.  Requiring a lock already hold has no effect.
- sub getlock {
-   my ($self, $mode) = @_;
-   # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
-   #
-   my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
-   my $file    = $self->{file} . '/records';
-   my $lockdir = $self->{file} . '/read';
-   unless (-d $lockdir) {
-     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
    }
-   if ($mode & O_RDWR) {         # Get a write lock.  Release it again
-                                 # and die if there is any valid
-                                 # readers.
-     # Have a write lock already
-     return $self if $self->{write_lock};
-     if ($self->{read_lock}) {   # We are a becoming a writer now. So
-                                 # we release the read lock to avoid
-                                 # blocking ourselves.
-       $self->{read_lock}->release;
-       delete $self->{read_lock};
-     }
-     # Get the preliminary write lock
-     $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
-       or die "Can't lock '$self->{file}/write'";
-     # If we actually want to write we must check if there are any
-     # readers.  The write lock is confirmed if wen cannot find any
-     # valid readers.
-     local *DIR;
-     opendir DIR, $lockdir or
-       die "Could not opendir '$lockdir': $!";
-     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
-       # Check if the locks are still valid.  Since we are protected by
-       # a write lock, we could use a plain file.  But we want to use
-       # the stale testing from LockFile::Simple.
-       if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
-         warn "Removing stale lockfile '$lockdir/$lockfile'";
-         $lck->release;
-       } else {                  # Found an active reader, rats!
-         $self->{write_lock}->release;
-         die "Cannot write table '$file' while it's in use";
-       }
-     }
-     closedir DIR;
-   } else {
-     # Have a read lock already
-     return $self if $self->{read_lock};
-     # Get the preliminary write lock to protect the directory
-     # operations.  If we already have a write lock, it will go.
-     $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
-       or die "Can't lock '$self->{file}/write'";
-     # Find a new read slot.  Maybe the plain file would be better?
-     my $id = time;
-     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
-       $id++;
-     }
-     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
+;
-       or die "Can't lock '$lockdir/$id'";
-     # We are a reader now. So we release the write lock
-     $self->{write_lock}->release;
-     delete $self->{write_lock};
-   }
-   return $self;
  }
- sub unlock {
+ sub DESTROY {
    my $self = shift;
-   # Either we have a read or a write lock (or we close the table already)
+   delete $self->{env};
-   # unless ($self->{read_lock} || $self->{write_lock}) {
-   #   warn "WAIT::Table::unlock: Table aparently hold's no lock"
-   # }
-   if ($self->{write_lock}) {
-     $self->{write_lock}->release();
-     delete $self->{write_lock};
-   }
-   if ($self->{read_lock}) {
-     $self->{read_lock}->release();
-     delete $self->{read_lock};
-   }
- }
+   # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
- sub DESTROY {
-   my $self = shift;
-   if ($self->{write_lock} || $self->{read_lock}) {
-     warn "Table handle destroyed without closing it first";
-     $self->unlock;
-   }
  }
  sub open_scan {
-Line 820 
 sub intervall {
+Line 740 
 sub intervall {
    bless \%result, 'WAIT::Query::Raw';
  }
- sub search {
+ sub search_ref {
    my $self  = shift;
    my ($query, $attr, $cont, $raw);
    if (ref $_[0]) {
      $query = shift;
+     # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
      $attr = $query->{attr};
      $cont = $query->{cont};
      $raw  = $query->{raw};
    } else {
-     require Carp;
+     cluck("Using three argument search interface is deprecated, use hashref interface instead");
-     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
      $attr = shift;
      $cont = shift;
      $raw  = shift;
-Line 865 
 sub search {
+Line 785 
 sub search {
    }
    if (defined $cont and $cont ne '') {
      for (@{$self->{inverted}->{$attr}}) {
-       my %r = $_->search($query, $cont);
+       my $r = $_->search_ref($query, $cont);
        my ($key, $val);
-       while (($key, $val) = each %r) {
+       while (($key, $val) = each %$r) {
          if (exists $result{$key}) {
            $result{$key} += $val;
          } else {
-Line 881 
 sub search {
+Line 801 
 sub search {
    for (keys %result) {
      delete $result{$_} if $self->{deleted}->{$_}
    }
-   %result;
+   \%result;
+ }
+ sub parse_query {
+   my($self, $attr, $query) = @_;
+   return unless defined $query && length $query;
+   my %qt;
+   for (@{$self->{inverted}->{$attr}}) {
+     grep $qt{$_}++, $_->parse($query);
+   }
+   [keys %qt];
  }
  sub hilight_positions {

 Legend:



Removed from v.41
 


changed lines


 
Added in v.115
 Legend:



Removed from v.41
 


changed lines


 
Added in v.115
-Removed from v.41
+Added in v.115

	ViewVC Help
Powered by ViewVC 1.1.26