/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/CPAN/lib/WAIT/Table.pm revision 13 by ulpfr, Fri Apr 28 15:42:44 2000 UTC trunk/lib/WAIT/Table.pm revision 109 by dpavlin, Tue Jul 13 17:50:27 2004 UTC
# Line 4  Line 4 
4  # Author          : Ulrich Pfeifer  # Author          : Ulrich Pfeifer
5  # Created On      : Thu Aug  8 13:05:10 1996  # Created On      : Thu Aug  8 13:05:10 1996
6  # Last Modified By: Ulrich Pfeifer  # Last Modified By: Ulrich Pfeifer
7  # Last Modified On: Sun May 30 20:42:30 1999  # Last Modified On: Wed Jan 23 14:15:15 2002
8  # Language        : CPerl  # Language        : CPerl
9  # Update Count    : 56  # Update Count    : 152
10  # Status          : Unknown, Use with caution!  # Status          : Unknown, Use with caution!
11  #  #
12  # Copyright (c) 1996-1997, Ulrich Pfeifer  # Copyright (c) 1996-1997, Ulrich Pfeifer
# Line 25  WAIT::Table -- Module for maintaining Ta Line 25  WAIT::Table -- Module for maintaining Ta
25  =cut  =cut
26    
27  package WAIT::Table;  package WAIT::Table;
28    our $VERSION = "2.000";
29    
30  use WAIT::Table::Handle ();  use WAIT::Table::Handle ();
31  require WAIT::Parse::Base;  require WAIT::Parse::Base;
# Line 32  require WAIT::Parse::Base; Line 33  require WAIT::Parse::Base;
33  use strict;  use strict;
34  use Carp;  use Carp;
35  # use autouse Carp => qw( croak($) );  # use autouse Carp => qw( croak($) );
36  use DB_File;  use BerkeleyDB;
37  use Fcntl;  use Fcntl;
38    
 my $USE_RECNO = 0;  
   
39  =head2 Creating a Table.  =head2 Creating a Table.
40    
41  The constructor WAIT::Table-E<gt>new is normally called via the  The constructor WAIT::Table-E<gt>new is normally called via the
# Line 132  sub new { Line 131  sub new {
131    my $self = {};    my $self = {};
132    
133    # Check for mandatory attrs early    # Check for mandatory attrs early
134    $self->{name}     = $parm{name}     or croak "No name specified";    for my $x (qw(name attr env maindbfile tablename)) {
135    $self->{attr}     = $parm{attr}     or croak "No attributes specified";      $self->{$x}     = $parm{$x}     or croak "No $x specified";
136      }
137    
138    # Do that before we eventually add '_weight' to attributes.    # Do that before we eventually add '_weight' to attributes.
139    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];    $self->{keyset}   = $parm{keyset}   || [[@{$parm{attr}}]];
# Line 157  sub new { Line 157  sub new {
157      unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};      unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158    }    }
159    
160    $self->{file}     = $parm{file}     or croak "No file specified";    $self->{path}     = $parm{path}     or croak "No path specified";
161    if (-d  $self->{file}){    bless $self, $type;
162      warn "Warning: Directory '$self->{file}' already exists\n";  
   } elsif (!mkdir($self->{file}, 0775)) {  
     croak "Could not 'mkdir $self->{file}': $!\n";  
   }  
163    $self->{djk}      = $parm{djk}      if defined $parm{djk};    $self->{djk}      = $parm{djk}      if defined $parm{djk};
164    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;    $self->{layout}   = $parm{layout} || new WAIT::Parse::Base;
165    $self->{access}   = $parm{access} if defined $parm{access};    $self->{access}   = $parm{access} if defined $parm{access};
# Line 170  sub new { Line 167  sub new {
167    $self->{deleted}  = {};       # no deleted records yet    $self->{deleted}  = {};       # no deleted records yet
168    $self->{indexes}  = {};    $self->{indexes}  = {};
169    
170    bless $self, $type;    # Checking for readers is not necessary, but let's go with the
171      # generic method.
172      
173    # Call create_index() and create_index() for compatibility    # Call create_index() and create_index() for compatibility
174    for (@{$self->{keyset}||[]}) {    for (@{$self->{keyset}||[]}) {
175      #carp "Specification of indexes at table create time is deprecated";      #carp "Specification of indexes at table create time is deprecated";
# Line 180  sub new { Line 179  sub new {
179      # carp "Specification of inverted indexes at table create time is deprecated";      # carp "Specification of inverted indexes at table create time is deprecated";
180      my $att  = shift @{$parm{invindex}};      my $att  = shift @{$parm{invindex}};
181      my @spec = @{shift @{$parm{invindex}}};      my @spec = @{shift @{$parm{invindex}}};
182      my @opt;      my @opt  = ();
183    
184      if (ref($spec[0])) {      if (ref($spec[0])) {
185        carp "Secondary pipelines are deprecated\n";        warn "Secondary pipelines are deprecated";
186        @opt = %{shift @spec};        @opt = %{shift @spec};
187      }      }
188      $self->create_inverted_index(attribute => $att, pipeline  => \@spec, @opt);      $self->create_inverted_index(attribute => $att,
189                                     pipeline  => \@spec,
190                                     @opt);
191    }    }
192    
193    $self;    $self;
194    # end of backwarn compatibility stuff    # end of backwarn compatibility stuff
195  }  }
196    
197    for my $accessor (qw(maindbfile tablename)) {
198      no strict 'refs';
199      *{$accessor} = sub {
200        my($self) = @_;
201        return $self->{$accessor} if $self->{$accessor};
202        require Carp;
203        Carp::confess("accessor $accessor not there");
204      }
205    }
206    
207  =head2 Creating an index  =head2 Creating an index
208    
209    $tb->create_index('docid');    $tb->create_index('docid');
210    
211  =item C<create_index>  C<create_index>
   
212  must be called with a list of attributes. This must be a subset of the  must be called with a list of attributes. This must be a subset of the
213  attributes specified when the table was created. Currently this  attributes specified when the table was created. Currently this
214  method must be called before the first tuple is inserted in the  method must be called before the first tuple is inserted in the
# Line 214  sub create_index { Line 225  sub create_index {
225    require WAIT::Index;    require WAIT::Index;
226    
227    my $name = join '-', @_;    my $name = join '-', @_;
228      #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
229    $self->{indexes}->{$name} =    $self->{indexes}->{$name} =
230      new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;      WAIT::Index->new(
231                         file => $self->file.'/'.$name,
232                         subname => $name,
233                         env  => $self->{env},
234                         maindbfile => $self->maindbfile,
235                         tablename => $self->tablename,
236                         attr => $_,
237                        );
238  }  }
239    
240  =head2 Creating an inverted index  =head2 Creating an inverted index
# Line 276  sub create_inverted_index { Line 295  sub create_inverted_index {
295    }    }
296    
297    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});    my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
298    my $idx = new WAIT::InvertedIndex(file   => $self->{file}.'/'.$name,    my $idx = WAIT::InvertedIndex->new(file   => $self->file.'/'.$name,
299                                      filter => [@{$parm{pipeline}}], # clone                                       subname=> $name,
300                                      name   => $name,                                       env    => $self->{env},
301                                      attr   => $parm{attribute},                                       maindbfile => $self->maindbfile,
302                                      %opt, # backward compatibility stuff                                       tablename => $self->tablename,
303                                     );                                       filter => [@{$parm{pipeline}}], # clone
304                                         name   => $name,
305                                         attr   => $parm{attribute},
306                                         %opt, # backward compatibility stuff
307                                        );
308    # We will have to use $parm{predicate} here    # We will have to use $parm{predicate} here
309    push @{$self->{inverted}->{$parm{attribute}}}, $idx;    push @{$self->{inverted}->{$parm{attribute}}}, $idx;
310  }  }
311    
312  sub dir {  sub dir {
313    $_[0]->{file};    $_[0]->file;
314  }  }
315    
316  =head2 C<$tb-E<gt>layout>  =head2 C<$tb-E<gt>layout>
# Line 315  Must be called via C<WAIT::Database::dro Line 338  Must be called via C<WAIT::Database::dro
338    
339  sub drop {  sub drop {
340    my $self = shift;    my $self = shift;
341    
342    if ((caller)[0] eq 'WAIT::Database') { # database knows about this    if ((caller)[0] eq 'WAIT::Database') { # database knows about this
343      $self->close;               # just make sure      $self->close;               # just make sure
344      my $file = $self->{file};  
345        my $file = $self->file;
346    
347      for (values %{$self->{indexes}}) {      for (values %{$self->{indexes}}) {
348        $_->drop;        $_->drop;
349      }      }
350      unlink "$file/records";      unlink "$file/records";
351      ! (!-e $file or rmdir $file);      rmdir "$file/read" or warn "Could not rmdir '$file/read'";
352    
353    } else {    } else {
354      croak ref($self)."::drop called directly";      croak ref($self)."::drop called directly";
355    }    }
# Line 337  sub mrequire ($) { Line 363  sub mrequire ($) {
363    require $module;    require $module;
364  }  }
365    
366    sub path {
367      my($self) = @_;
368      return $self->{path} if $self->{path};
369      require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
370      require Carp;
371      Carp::confess("NO file attr");
372    }
373    
374  sub open {  sub open {
375    my $self = shift;    my $self = shift;
376    my $file = $self->{file} . '/records';    my $file = $self->file . '/records';
377    
378    mrequire ref($self);           # that's tricky eh?    mrequire ref($self);           # that's tricky eh?
379    if (defined $self->{'layout'}) {    if (defined $self->{'layout'}) {
# Line 350  sub open { Line 384  sub open {
384    }    }
385    if (exists $self->{indexes}) {    if (exists $self->{indexes}) {
386      require WAIT::Index;      require WAIT::Index;
387      for (values %{$self->{indexes}}) {      for my $Ind (values %{$self->{indexes}}) {
388        $_->{mode} = $self->{mode};        for my $x (qw(mode env maindbfile)) {
389            $Ind->{$x} = $self->{$x};
390          }
391      }      }
392    }    }
393    if (exists $self->{inverted}) {    if (exists $self->{inverted}) {
394      my ($att, $idx);      my ($att, $idx);
395      for $att (keys %{$self->{inverted}}) {      for $att (keys %{$self->{inverted}}) {
396        for $idx (@{$self->{inverted}->{$att}}) {        for $idx (@{$self->{inverted}->{$att}}) {
397          $idx->{mode} = $self->{mode};          for my $x (qw(mode env maindbfile)) {
398              $idx->{$x} = $self->{$x};
399            }
400        }        }
401      }      }
402      require WAIT::InvertedIndex;      require WAIT::InvertedIndex;
403    }    }
404    
405      # CONFUSION: WAIT knows two *modes*: read-only or read-write.
406      # BerkeleyDB means file permissions when talking about Mode.
407      # BerkeleyDB has the "Flags" attribute to specify
408      # read/write/lock/etc subsystems.
409    
410      my $flags;
411      if ($self->{mode} & O_RDWR) {
412        $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
413        warn "Flags on table $file set to 'writing'";
414      } else {
415        $flags = DB_RDONLY;
416        # warn "Flags on table $file set to 'readonly'";
417      }
418    unless (defined $self->{dbh}) {    unless (defined $self->{dbh}) {
419      if ($USE_RECNO) {      my $subname = $self->tablename . "/records";
420        $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,      $self->{dbh} =
421                           $self->{mode}, 0664, $DB_RECNO);          tie(%{$self->{db}}, 'BerkeleyDB::Btree',
422      } else {              $self->{env} ? (Env => $self->{env}) : (),
423        $self->{dbh} =              # Filename => $file,
424          tie(%{$self->{db}}, 'DB_File', $file,              Filename => $self->maindbfile,
425                           $self->{mode}, 0664, $DB_BTREE);              Subname => $subname,
426      }              Mode => 0664,
427                Flags => $flags,
428                $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
429                $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
430               )
431                or die "Cannot tie: $BerkeleyDB::Error;
432     DEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
433    }    }
434    $self;    $self;
435  }  }
# Line 432  sub insert { Line 490  sub insert {
490    my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));    my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
491    my $key;    my $key;
492    my @deleted = keys %{$self->{deleted}};    my @deleted = keys %{$self->{deleted}};
493      my $gotkey = 0;
494    
495    if (@deleted) {    if (@deleted) {
496      $key = pop @deleted;      $key = pop @deleted;
497      delete $self->{deleted}->{$key};      delete $self->{deleted}->{$key};
498        # Sanity check
499        if ($key && $key>0) {
500          $gotkey=1;
501    } else {    } else {
502      $key = $self->{nextk}++;        warn(sprintf("WAIT database inconsistency during insert ".
503                       "key[%s]: Please rebuild index\n",
504                       $key
505                      ));
506        }
507    }    }
508    if ($USE_RECNO) {    unless ($gotkey) {
509      $self->{db}->[$key] = $tuple;      $key = $self->{nextk}++;
   } else {  
     $self->{db}->{$key} = $tuple;  
510    }    }
511      $self->{db}->{$key} = $tuple;
512    for (values %{$self->{indexes}}) {    for (values %{$self->{indexes}}) {
513      unless ($_->insert($key, %parm)) {      unless ($_->insert($key, %parm)) {
514        # duplicate key, undo changes        # duplicate key, undo changes
515        if ($key == $self->{nextk}-1) {        if ($key == $self->{nextk}-1) {
516          $self->{nextk}--;          $self->{nextk}--;
517        } else {        } else {
518            # warn "setting key[$key] deleted during insert";
519          $self->{deleted}->{$key}=1;          $self->{deleted}->{$key}=1;
520        }        }
521        my $idx;        my $idx;
# Line 493  sub fetch { Line 559  sub fetch {
559    return () if exists $self->{deleted}->{$key};    return () if exists $self->{deleted}->{$key};
560    
561    defined $self->{db} or $self->open;    defined $self->{db} or $self->open;
562    if ($USE_RECNO) {    $self->unpack($self->{db}->{$key});
     $self->unpack($self->{db}->[$key]);  
   } else {  
     $self->unpack($self->{db}->{$key});  
   }  
563  }  }
564    
565  sub delete_by_key {  sub delete_by_key {
566    my $self  = shift;    my $self  = shift;
567    my $key   = shift;    my $key   = shift;
568    
569      unless ($key) {
570        Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
571        return;
572      }
573    
574    return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};    return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
575    my %tuple = $self->fetch($key);    my %tuple = $self->fetch($key);
576    for (values %{$self->{indexes}}) {    for (values %{$self->{indexes}}) {
# Line 520  sub delete_by_key { Line 587  sub delete_by_key {
587        }        }
588      }      }
589    }    }
590      # warn "setting key[$key] deleted during delete_by_key";
591    ++$self->{deleted}->{$key};    ++$self->{deleted}->{$key};
592  }  }
593    
594  sub delete {  sub delete {
595    my $self  = shift;    my $self  = shift;
596    my $tkey = $self->have(@_);    my $tkey = $self->have(@_);
597      # warn "tkey[$tkey]\@_[@_]";
598    defined $tkey && $self->delete_by_key($tkey, @_);    defined $tkey && $self->delete_by_key($tkey, @_);
599  }  }
600    
601  sub unpack {  sub unpack {
602    my $self = shift;    my($self, $tuple) = @_;
603    my $tuple = shift;  
604      unless (defined $tuple){
605        # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
606        warn("Debug: somebody called unpack without argument tuple!");
607        return;
608      }
609    
610    my $att;    my $att;
611    my @result;    my @result;
# Line 544  sub unpack { Line 617  sub unpack {
617    @result;    @result;
618  }  }
619    
620    sub set {
621      my ($self, $iattr, $value) = @_;
622      # in the rare case that they haven't written a single record yet, we
623      # make sure, the inverted inherits our $self->{mode}:
624      defined $self->{db} or $self->open;
625    
626      for my $att (keys %{$self->{inverted}}) {
627        if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
628          my $idx;
629          for $idx (@{$self->{inverted}->{$att}}) {
630            $idx->set($iattr, $value);
631          }
632        } else {
633          map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
634        }
635      }
636    
637      1;
638    }
639    
640  sub close {  sub close {
641    my $self = shift;    my $self = shift;
642    
643      require Carp; Carp::cluck("------->Closing A Table<-------");
644    
645    if (exists $self->{'access'}) {    if (exists $self->{'access'}) {
646      eval {$self->{'access'}->close}; # dont bother if not opened      eval {$self->{'access'}->close}; # dont bother if not opened
647    }    }
648    for (values %{$self->{indexes}}) {    if ($WAIT::Index::VERSION) {
649      $_->close();      for (values %{$self->{indexes}}) {
650          $_->close();
651        }
652    }    }
653    if (defined $self->{inverted}) {    if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
654        # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
655        # if WAIT::InvertedIndex has not been loaded, they cannot have
656        # been altered so far
657      my $att;      my $att;
658      for $att (keys %{$self->{inverted}}) {      for $att (keys %{$self->{inverted}}) {
659        if ($] > 5.003) {         # avoid bug in perl up to 5.003_05        if ($] > 5.003) {         # avoid bug in perl up to 5.003_05
# Line 568  sub close { Line 668  sub close {
668    }    }
669    if ($self->{dbh}) {    if ($self->{dbh}) {
670      delete $self->{dbh};      delete $self->{dbh};
671      }
672      if ($USE_RECNO) {    untie %{$self->{db}};
673        untie @{$self->{db}};    for my $att (qw(env db file maindbfile)) {
674      } else {      delete $self->{$att};
675        untie %{$self->{db}};      warn "----->Deleted att $att<-----";
     }  
     delete $self->{db};  
676    }    }
677    
678    1;    1;
# Line 583  sub close { Line 681  sub close {
681  sub DESTROY {  sub DESTROY {
682    my $self = shift;    my $self = shift;
683    
684    warn "Table handle destroyed without closing it first"    delete $self->{env};
685      if $self->{db} and $self->{mode}&O_RDWR;  
686      # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
687    
688  }  }
689    
690  sub open_scan {  sub open_scan {
# Line 641  sub intervall { Line 741  sub intervall {
741    bless \%result, 'WAIT::Query::Raw';    bless \%result, 'WAIT::Query::Raw';
742  }  }
743    
744  sub search {  sub search_ref {
745    my $self = shift;    my $self  = shift;
746    my $attr = shift;    my ($query, $attr, $cont, $raw);
747    my $cont = shift;    if (ref $_[0]) {
748    my $raw  = shift;      $query = shift;
749        # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
750    
751        $attr = $query->{attr};
752        $cont = $query->{cont};
753        $raw  = $query->{raw};
754      } else {
755        require Carp;
756        Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
757        $attr = shift;
758        $cont = shift;
759        $raw  = shift;
760        $query = {
761                  attr => $attr,
762                  cont => $cont,
763                  raw  => $raw,
764                 };
765      }
766    
767    my %result;    my %result;
768    
769    defined $self->{db} or $self->open; # require layout    defined $self->{db} or $self->open; # require layout
# Line 655  sub search { Line 773  sub search {
773        my $name = $_->name;        my $name = $_->name;
774        if (exists $raw->{$name} and @{$raw->{$name}}) {        if (exists $raw->{$name} and @{$raw->{$name}}) {
775          my $scale = 1/scalar(@{$raw->{$name}});          my $scale = 1/scalar(@{$raw->{$name}});
776          my %r = $_->search_raw(@{$raw->{$name}});          my %r = $_->search_raw($query, @{$raw->{$name}});
777          my ($key, $val);          my ($key, $val);
778          while (($key, $val) = each %r) {          while (($key, $val) = each %r) {
779            if (exists $result{$key}) {            if (exists $result{$key}) {
# Line 669  sub search { Line 787  sub search {
787    }    }
788    if (defined $cont and $cont ne '') {    if (defined $cont and $cont ne '') {
789      for (@{$self->{inverted}->{$attr}}) {      for (@{$self->{inverted}->{$attr}}) {
790        my %r = $_->search($cont);        my $r = $_->search_ref($query, $cont);
791        my ($key, $val);        my ($key, $val);
792        while (($key, $val) = each %r) {        while (($key, $val) = each %$r) {
793          if (exists $result{$key}) {          if (exists $result{$key}) {
794            $result{$key} += $val;            $result{$key} += $val;
795          } else {          } else {
# Line 685  sub search { Line 803  sub search {
803    for (keys %result) {    for (keys %result) {
804      delete $result{$_} if $self->{deleted}->{$_}      delete $result{$_} if $self->{deleted}->{$_}
805    }    }
806    %result;    \%result;
807    }
808    
809    sub parse_query {
810      my($self, $attr, $query) = @_;
811      return unless defined $query && length $query;
812      my %qt;
813      for (@{$self->{inverted}->{$attr}}) {
814        grep $qt{$_}++, $_->parse($query);
815      }
816      [keys %qt];
817  }  }
818    
819  sub hilight_positions {  sub hilight_positions {

Legend:
Removed from v.13  
changed lines
  Added in v.109

  ViewVC Help
Powered by ViewVC 1.1.26