/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 34 - (hide annotations)
Sun Nov 12 14:22:40 2000 UTC (23 years, 6 months ago) by ulpfr
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 24474 byte(s)
Opening a table twice should not be a problem any more.

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 34 # Last Modified On: Sun Nov 12 15:21:19 2000
8 ulpfr 10 # Language : CPerl
9 ulpfr 34 # Update Count : 135
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 ulpfr 10 use DB_File;
36     use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 ulpfr 13 if (-d $self->{file}){
163     warn "Warning: Directory '$self->{file}' already exists\n";
164     } elsif (!mkdir($self->{file}, 0775)) {
165 ulpfr 10 croak "Could not 'mkdir $self->{file}': $!\n";
166     }
167 ulpfr 19
168     my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169 ulpfr 34 # Aquire a write lock, since we are creating the table, no readers
170     # could possibly be active.
171 ulpfr 19 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
172     or die "Can't lock '$self->{file}/write'";
173    
174 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
175     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
176     $self->{access} = $parm{access} if defined $parm{access};
177     $self->{nextk} = 1; # next record to insert; first record unused
178     $self->{deleted} = {}; # no deleted records yet
179     $self->{indexes} = {};
180    
181     bless $self, $type;
182     # Call create_index() and create_index() for compatibility
183     for (@{$self->{keyset}||[]}) {
184     #carp "Specification of indexes at table create time is deprecated";
185     $self->create_index(@$_);
186     }
187     while (@{$parm{invindex}||[]}) {
188     # carp "Specification of inverted indexes at table create time is deprecated";
189     my $att = shift @{$parm{invindex}};
190     my @spec = @{shift @{$parm{invindex}}};
191     my @opt;
192 ulpfr 13
193 ulpfr 10 if (ref($spec[0])) {
194     carp "Secondary pipelines are deprecated\n";
195     @opt = %{shift @spec};
196     }
197     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
198     }
199 ulpfr 19
200 ulpfr 10 $self;
201     # end of backwarn compatibility stuff
202     }
203    
204     =head2 Creating an index
205    
206     $tb->create_index('docid');
207    
208     =item C<create_index>
209    
210     must be called with a list of attributes. This must be a subset of the
211     attributes specified when the table was created. Currently this
212     method must be called before the first tuple is inserted in the
213     table!
214    
215     =cut
216    
217     sub create_index {
218     my $self= shift;
219 ulpfr 13
220 ulpfr 10 croak "Cannot create index for table aready populated"
221     if $self->{nextk} > 1;
222 ulpfr 13
223 ulpfr 10 require WAIT::Index;
224 ulpfr 13
225 ulpfr 10 my $name = join '-', @_;
226     $self->{indexes}->{$name} =
227     new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
228     }
229    
230     =head2 Creating an inverted index
231    
232     $tb->create_inverted_index
233     (attribute => 'au',
234     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
235     predicate => 'plain',
236     );
237    
238     =over 5
239    
240     =item C<attribute>
241    
242     The attribute to build the index on. This attribute may not be in the
243     set attributes specified when the table was created.
244    
245     =item C<pipeline>
246    
247 ulpfr 13 A piplines specification is a reference to an array of method names
248     (from package C<WAIT::Filter>) which are to be applied in sequence to
249     the contents of the named attribute. The attribute name may not be in
250     the attribute list.
251 ulpfr 10
252     =item C<predicate>
253    
254     An indication which predicate the index implements. This may be
255     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
256     query processing. Currently there is no standard set of predicate
257 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
258 ulpfr 10 omitted.
259    
260     =back
261    
262     Currently this method must be called before the first tuple is
263     inserted in the table!
264    
265     =cut
266    
267     sub create_inverted_index {
268     my $self = shift;
269     my %parm = @_;
270    
271     croak "No attribute specified" unless $parm{attribute};
272     croak "No pipeline specified" unless $parm{pipeline};
273    
274     $parm{predicate} ||= $parm{pipeline}->[-1];
275 ulpfr 13
276 ulpfr 10 croak "Cannot create index for table aready populated"
277     if $self->{nextk} > 1;
278 ulpfr 13
279 ulpfr 10 require WAIT::InvertedIndex;
280    
281     # backward compatibility stuff
282     my %opt = %parm;
283     for (qw(attribute pipeline predicate)) {
284     delete $opt{$_};
285     }
286 ulpfr 13
287 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
288     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
289     filter => [@{$parm{pipeline}}], # clone
290     name => $name,
291     attr => $parm{attribute},
292     %opt, # backward compatibility stuff
293     );
294     # We will have to use $parm{predicate} here
295     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
296     }
297    
298     sub dir {
299     $_[0]->{file};
300     }
301    
302     =head2 C<$tb-E<gt>layout>
303    
304     Returns the reference to the associated parser object.
305    
306     =cut
307    
308     sub layout { $_[0]->{layout} }
309    
310     =head2 C<$tb-E<gt>fields>
311    
312     Returns the array of attribute names.
313    
314     =cut
315    
316    
317     sub fields { keys %{$_[0]->{inverted}}}
318    
319     =head2 C<$tb-E<gt>drop>
320    
321     Must be called via C<WAIT::Database::drop_table>
322    
323     =cut
324    
325     sub drop {
326     my $self = shift;
327     if ((caller)[0] eq 'WAIT::Database') { # database knows about this
328     $self->close; # just make sure
329     my $file = $self->{file};
330    
331     for (values %{$self->{indexes}}) {
332     $_->drop;
333     }
334     unlink "$file/records";
335 ulpfr 19 # $self->unlock;
336 ulpfr 10 ! (!-e $file or rmdir $file);
337     } else {
338     croak ref($self)."::drop called directly";
339     }
340     }
341    
342     sub mrequire ($) {
343     my $module = shift;
344    
345     $module =~ s{::}{/}g;
346     $module .= '.pm';
347     require $module;
348     }
349    
350     sub open {
351     my $self = shift;
352     my $file = $self->{file} . '/records';
353    
354     mrequire ref($self); # that's tricky eh?
355     if (defined $self->{'layout'}) {
356     mrequire ref($self->{'layout'});
357     }
358     if (defined $self->{'access'}) {
359     mrequire ref($self->{'access'});
360     }
361     if (exists $self->{indexes}) {
362     require WAIT::Index;
363     for (values %{$self->{indexes}}) {
364     $_->{mode} = $self->{mode};
365     }
366     }
367     if (exists $self->{inverted}) {
368     my ($att, $idx);
369     for $att (keys %{$self->{inverted}}) {
370     for $idx (@{$self->{inverted}->{$att}}) {
371     $idx->{mode} = $self->{mode};
372     }
373     }
374     require WAIT::InvertedIndex;
375     }
376     unless (defined $self->{dbh}) {
377     if ($USE_RECNO) {
378     $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
379     $self->{mode}, 0664, $DB_RECNO);
380     } else {
381     $self->{dbh} =
382     tie(%{$self->{db}}, 'DB_File', $file,
383     $self->{mode}, 0664, $DB_BTREE);
384     }
385     }
386 ulpfr 19
387     # Locking
388     #
389     # We allow multiple readers to coexists. But write access excludes
390 ulpfr 34 # all read access and vice versa. In practice read access on tables
391 ulpfr 19 # open for writing will mostly work ;-)
392    
393     my $lockmgr = LockFile::Simple->make(-autoclean => 1);
394    
395     my $lockdir = $self->{file} . '/read';
396     unless (-d $lockdir) {
397     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
398     }
399    
400     if ($self->{mode} & O_RDWR) {
401 ulpfr 34 # Get a write lock. Release it again and die if there is any
402     # valid reader.
403    
404 ulpfr 19 # this is a hack. We do not check for reopening ...
405     return $self if $self->{write_lock};
406 ulpfr 34
407     if ($self->{read_lock}) {
408     # We are a becoming a writer now. So we release the read lock to
409     # avoid blocking ourselves.
410     $self->{read_lock}->release;
411     delete $self->{read_lock};
412     }
413    
414     # Get the preliminary write lock
415     $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
416     or die "Can't lock '$self->{file}/write'";
417 ulpfr 19
418 ulpfr 34 # If we actually want to write we must check if there are any
419     # readers. The write lock is confirmed if wen cannot find any
420     # valid readers.
421    
422 laperla 31 local *DIR;
423 ulpfr 19 opendir DIR, $lockdir or
424     die "Could not opendir '$lockdir': $!";
425     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
426     # check if the locks are still valid.
427 ulpfr 34 # Since we are protected by a write lock, we could use a plain file.
428 ulpfr 19 # But we want to use the stale testing from LockFile::Simple.
429     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
430     warn "Removing stale lockfile '$lockdir/$lockfile'";
431     $lck->release;
432     } else {
433     $self->{write_lock}->release;
434     die "Cannot write table '$file' while it's in use";
435     }
436     }
437 laperla 31 closedir DIR;
438 ulpfr 19 } else {
439     # this is a hack. We do not check for reopening ...
440     return $self if $self->{read_lock};
441 ulpfr 34
442     # Get the preliminary write lock to protect the directory
443     # operations.
444 ulpfr 19
445 ulpfr 34 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
446     or die "Can't lock '$self->{file}/write'";
447    
448     # find a new read slot
449 ulpfr 19 my $id = time;
450     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
451     $id++;
452     }
453 ulpfr 34
454     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
455     or die "Can't lock '$lockdir/$id'";
456    
457     # We are a reader now. So we release the write lock
458 ulpfr 19 $self->{write_lock}->release;
459     delete $self->{write_lock};
460     }
461    
462 ulpfr 10 $self;
463     }
464    
465     sub fetch_extern {
466     my $self = shift;
467    
468 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
469 ulpfr 10 if (exists $self->{'access'}) {
470     mrequire ref($self->{'access'});
471     $self->{'access'}->FETCH(@_);
472     }
473     }
474    
475     sub fetch_extern_by_id {
476     my $self = shift;
477    
478     $self->fetch_extern($self->fetch(@_));
479     }
480    
481     sub _find_index {
482     my $self = shift;
483     my (@att) = @_;
484     my %att;
485     my $name;
486 ulpfr 13
487 ulpfr 10 @att{@att} = @att;
488    
489     KEY: for $name (keys %{$self->{indexes}}) {
490     my @iat = split /-/, $name;
491     for (@iat) {
492     next KEY unless exists $att{$_};
493     }
494     return $self->{indexes}->{$name};
495     }
496     return undef;
497     }
498    
499     sub have {
500     my $self = shift;
501     my %parm = @_;
502    
503 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
504    
505 ulpfr 10 defined $self->{db} or $self->open;
506     return $index->have(@_);
507     }
508    
509     sub insert {
510     my $self = shift;
511     my %parm = @_;
512    
513     defined $self->{db} or $self->open;
514    
515 ulpfr 13 # We should move all writing methods to a subclass to check only once
516     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
517    
518 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
519     my $key;
520     my @deleted = keys %{$self->{deleted}};
521 ulpfr 19 my $gotkey = 0;
522 ulpfr 10
523     if (@deleted) {
524     $key = pop @deleted;
525     delete $self->{deleted}->{$key};
526 ulpfr 19 # Sanity check
527     if ($key && $key>0) {
528     $gotkey=1;
529 ulpfr 10 } else {
530 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
531     "key[%s]: Please rebuild index\n",
532     $key
533     ));
534     }
535     }
536     unless ($gotkey) {
537 ulpfr 10 $key = $self->{nextk}++;
538     }
539     if ($USE_RECNO) {
540     $self->{db}->[$key] = $tuple;
541     } else {
542     $self->{db}->{$key} = $tuple;
543     }
544     for (values %{$self->{indexes}}) {
545     unless ($_->insert($key, %parm)) {
546     # duplicate key, undo changes
547     if ($key == $self->{nextk}-1) {
548     $self->{nextk}--;
549     } else {
550 ulpfr 19 # warn "setting key[$key] deleted during insert";
551 ulpfr 10 $self->{deleted}->{$key}=1;
552     }
553     my $idx;
554     for $idx (values %{$self->{indexes}}) {
555     last if $idx eq $_;
556     $idx->remove($key, %parm);
557     }
558     return undef;
559 ulpfr 13 }
560 ulpfr 10 }
561     if (defined $self->{inverted}) {
562     my $att;
563     for $att (keys %{$self->{inverted}}) {
564     if (defined $parm{$att}) {
565     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
566     #map $_->sync, @{$self->{inverted}->{$att}}
567     }
568     }
569     }
570     $key
571     }
572    
573     sub sync {
574     my $self = shift;
575 ulpfr 13
576 ulpfr 10 for (values %{$self->{indexes}}) {
577     map $_->sync, $_;
578     }
579     if (defined $self->{inverted}) {
580     my $att;
581     for $att (keys %{$self->{inverted}}) {
582     map $_->sync, @{$self->{inverted}->{$att}}
583     }
584     }
585     }
586    
587     sub fetch {
588     my $self = shift;
589     my $key = shift;
590    
591     return () if exists $self->{deleted}->{$key};
592 ulpfr 13
593 ulpfr 10 defined $self->{db} or $self->open;
594     if ($USE_RECNO) {
595     $self->unpack($self->{db}->[$key]);
596     } else {
597     $self->unpack($self->{db}->{$key});
598     }
599     }
600    
601     sub delete_by_key {
602     my $self = shift;
603     my $key = shift;
604    
605 ulpfr 19 unless ($key) {
606     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
607     return;
608     }
609    
610 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
611     my %tuple = $self->fetch($key);
612     for (values %{$self->{indexes}}) {
613     $_->delete($key, %tuple);
614     }
615     if (defined $self->{inverted}) {
616     # User *must* provide the full record for this or the entries
617     # in the inverted index will not be removed
618     %tuple = (%tuple, @_);
619     my $att;
620     for $att (keys %{$self->{inverted}}) {
621     if (defined $tuple{$att}) {
622     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
623     }
624     }
625     }
626 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
627 ulpfr 10 ++$self->{deleted}->{$key};
628     }
629    
630     sub delete {
631     my $self = shift;
632     my $tkey = $self->have(@_);
633 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
634 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
635     }
636    
637     sub unpack {
638 laperla 31 my($self, $tuple) = @_;
639 ulpfr 10
640 laperla 31 unless (defined $tuple){
641     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
642     warn("Debug: somebody called unpack without argument tuple!");
643     return;
644     }
645    
646 ulpfr 10 my $att;
647     my @result;
648     my @tuple = split /$;/, $tuple;
649    
650     for $att (@{$self->{attr}}) {
651     push @result, $att, shift @tuple;
652     }
653     @result;
654     }
655    
656 ulpfr 19 sub set {
657     my ($self, $iattr, $value) = @_;
658    
659 laperla 31 unless ($self->{write_lock}){
660     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
661     return;
662 ulpfr 24 }
663 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
664     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
665     my $idx;
666     for $idx (@{$self->{inverted}->{$att}}) {
667     $idx->set($iattr, $value);
668     }
669     } else {
670     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
671     }
672     }
673    
674     1;
675     }
676    
677 ulpfr 10 sub close {
678     my $self = shift;
679    
680     if (exists $self->{'access'}) {
681     eval {$self->{'access'}->close}; # dont bother if not opened
682     }
683 laperla 31 if ($WAIT::Index::VERSION) {
684     for (values %{$self->{indexes}}) {
685     $_->close();
686     }
687 ulpfr 10 }
688 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
689     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
690     # if WAIT::InvertedIndex has not been loaded, they cannot have
691     # been altered so far
692 ulpfr 10 my $att;
693     for $att (keys %{$self->{inverted}}) {
694     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
695     my $idx;
696     for $idx (@{$self->{inverted}->{$att}}) {
697     $idx->close;
698     }
699     } else {
700     map $_->close(), @{$self->{inverted}->{$att}};
701     }
702     }
703     }
704     if ($self->{dbh}) {
705     delete $self->{dbh};
706    
707     if ($USE_RECNO) {
708     untie @{$self->{db}};
709     } else {
710     untie %{$self->{db}};
711     }
712     delete $self->{db};
713     }
714    
715 ulpfr 19 $self->unlock;
716    
717 ulpfr 10 1;
718     }
719    
720 ulpfr 19 sub unlock {
721     my $self = shift;
722    
723     # Either we have a read or a write lock (or we close the table already)
724     # unless ($self->{read_lock} || $self->{write_lock}) {
725     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
726     # }
727     if ($self->{write_lock}) {
728     $self->{write_lock}->release();
729     delete $self->{write_lock};
730     }
731     if ($self->{read_lock}) {
732     $self->{read_lock}->release();
733     delete $self->{read_lock};
734     }
735    
736     }
737    
738 ulpfr 13 sub DESTROY {
739     my $self = shift;
740    
741     warn "Table handle destroyed without closing it first"
742 ulpfr 19 if $self->{write_lock} || $self->{read_lock};
743 ulpfr 13 }
744    
745 ulpfr 10 sub open_scan {
746     my $self = shift;
747     my $code = shift;
748    
749     $self->{dbh} or $self->open;
750     require WAIT::Scan;
751     new WAIT::Scan $self, $self->{nextk}-1, $code;
752     }
753    
754     sub open_index_scan {
755     my $self = shift;
756     my $attr = shift;
757     my $code = shift;
758     my $name = join '-', @$attr;
759    
760     if (defined $self->{indexes}->{$name}) {
761     $self->{indexes}->{$name}->open_scan($code);
762     } else {
763     croak "No such index '$name'";
764     }
765     }
766    
767     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
768    
769     sub prefix {
770     my ($self , $attr, $prefix) = @_;
771     my %result;
772    
773     defined $self->{db} or $self->open; # require layout
774    
775     for (@{$self->{inverted}->{$attr}}) {
776     my $result = $_->prefix($prefix);
777     if (defined $result) {
778     $result{$_->name} = $result;
779     }
780     }
781     bless \%result, 'WAIT::Query::Raw';
782     }
783    
784     sub intervall {
785     my ($self, $attr, $lb, $ub) = @_;
786     my %result;
787    
788     defined $self->{db} or $self->open; # require layout
789    
790     for (@{$self->{inverted}->{$attr}}) {
791     my $result = $_->intervall($lb, $ub);
792     if (defined $result) {
793     $result{$_->name} = $result;
794     }
795     }
796     bless \%result, 'WAIT::Query::Raw';
797     }
798    
799     sub search {
800 ulpfr 19 my $self = shift;
801     my ($query, $attr, $cont, $raw);
802     if (ref $_[0]) {
803     $query = shift;
804    
805     $attr = $query->{attr};
806     $cont = $query->{cont};
807     $raw = $query->{raw};
808     } else {
809     require Carp;
810     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
811     $attr = shift;
812     $cont = shift;
813     $raw = shift;
814     $query = {
815     attr => $attr,
816     cont => $cont,
817     raw => $raw,
818     };
819     }
820    
821 ulpfr 10 my %result;
822    
823     defined $self->{db} or $self->open; # require layout
824    
825     if ($raw) {
826     for (@{$self->{inverted}->{$attr}}) {
827     my $name = $_->name;
828     if (exists $raw->{$name} and @{$raw->{$name}}) {
829     my $scale = 1/scalar(@{$raw->{$name}});
830 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
831 ulpfr 10 my ($key, $val);
832     while (($key, $val) = each %r) {
833     if (exists $result{$key}) {
834     $result{$key} += $val*$scale;
835     } else {
836     $result{$key} = $val*$scale;
837     }
838     }
839     }
840     }
841     }
842     if (defined $cont and $cont ne '') {
843     for (@{$self->{inverted}->{$attr}}) {
844 ulpfr 19 my %r = $_->search($query, $cont);
845 ulpfr 10 my ($key, $val);
846     while (($key, $val) = each %r) {
847     if (exists $result{$key}) {
848     $result{$key} += $val;
849     } else {
850     $result{$key} = $val;
851     }
852     }
853     }
854     }
855     # sanity check for deleted documents.
856     # this should not be necessary !@#$
857     for (keys %result) {
858     delete $result{$_} if $self->{deleted}->{$_}
859     }
860     %result;
861     }
862    
863     sub hilight_positions {
864     my ($self, $attr, $text, $query, $raw) = @_;
865     my %pos;
866    
867     if (defined $raw) {
868 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
869     # WAIT::InvertedIndex for
870     # this index field $attr
871 ulpfr 10 my $name = $_->name;
872     if (exists $raw->{$name}) {
873     my %qt;
874     grep $qt{$_}++, @{$raw->{$name}};
875     for ($_->parse_pos($text)) {
876     if (exists $qt{$_->[0]}) {
877     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
878     }
879     }
880     }
881     }
882     }
883     if (defined $query) {
884     for (@{$self->{inverted}->{$attr}}) {
885     my %qt;
886    
887     grep $qt{$_}++, $_->parse($query);
888     for ($_->parse_pos($text)) {
889     if (exists $qt{$_->[0]}) {
890     if (exists $pos{$_->[1]}) { # perl -w ;-)
891     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
892     } else {
893     $pos{$_->[1]} = length($_->[0]);
894     }
895     }
896     }
897     }
898     }
899    
900     \%pos;
901     }
902    
903     sub hilight {
904 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
905     my $layout = $tb->layout();
906    
907 ulpfr 10 my @result;
908    
909 ulpfr 13 $qplain ||= {};
910     $qraw ||= {};
911     my @ttxt = $layout->tag($buf);
912 ulpfr 10 while (@ttxt) {
913     no strict 'refs';
914     my %tag = %{shift @ttxt};
915     my $txt = shift @ttxt;
916     my $fld;
917    
918     my %hl;
919 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
920 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
921 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
922 ulpfr 10 for (keys %$hp) {
923     if (exists $hl{$_}) { # -w ;-(
924     $hl{$_} = max($hl{$_}, $hp->{$_});
925     } else {
926     $hl{$_} = $hp->{$_};
927     }
928     }
929     }
930     my $pos;
931     my $qt = {_qt => 1, %tag};
932     my $pl = \%tag;
933     my $last = length($txt);
934     my @tmp;
935     for $pos (sort {$b <=> $a} keys %hl) {
936     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
937     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
938     $last = $pos;
939     }
940     push @result, $pl, substr($txt,0,$last);
941     push @result, @tmp;
942     }
943     @result; # no speed necessary
944     }
945    
946     1;

Properties

Name Value
cvs2svn:cvs-rev 1.4

  ViewVC Help
Powered by ViewVC 1.1.26