/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (hide annotations)
Fri May 3 16:16:10 2002 UTC (22 years ago) by ulpfr
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 25272 byte(s)
First stab at moving backend from DB_File to BerkeleyDB.

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 85 # Last Modified On: Sat Apr 27 17:20:31 2002
8 ulpfr 10 # Language : CPerl
9 ulpfr 85 # Update Count : 172
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 ulpfr 85 use BerkeleyDB;
36 ulpfr 10 use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 ulpfr 85 if (-e $self->{file}){
163     warn "Warning: file '$self->{file}' already exists\n";
164 ulpfr 10 }
165 ulpfr 19
166 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
167     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
168     $self->{access} = $parm{access} if defined $parm{access};
169     $self->{nextk} = 1; # next record to insert; first record unused
170     $self->{deleted} = {}; # no deleted records yet
171     $self->{indexes} = {};
172    
173     bless $self, $type;
174 ulpfr 35
175     # Checking for readers is not necessary, but let's go with the
176     # generic method.
177     $self->getlock(O_RDWR|O_CREAT); # dies when failing
178    
179 ulpfr 10 # Call create_index() and create_index() for compatibility
180     for (@{$self->{keyset}||[]}) {
181     #carp "Specification of indexes at table create time is deprecated";
182     $self->create_index(@$_);
183     }
184     while (@{$parm{invindex}||[]}) {
185     # carp "Specification of inverted indexes at table create time is deprecated";
186     my $att = shift @{$parm{invindex}};
187     my @spec = @{shift @{$parm{invindex}}};
188     my @opt;
189 ulpfr 13
190 ulpfr 10 if (ref($spec[0])) {
191     carp "Secondary pipelines are deprecated\n";
192     @opt = %{shift @spec};
193     }
194     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
195     }
196 ulpfr 19
197 ulpfr 10 $self;
198     # end of backwarn compatibility stuff
199     }
200    
201     =head2 Creating an index
202    
203     $tb->create_index('docid');
204    
205     =item C<create_index>
206    
207     must be called with a list of attributes. This must be a subset of the
208     attributes specified when the table was created. Currently this
209     method must be called before the first tuple is inserted in the
210     table!
211    
212     =cut
213    
214     sub create_index {
215     my $self= shift;
216 ulpfr 13
217 ulpfr 10 croak "Cannot create index for table aready populated"
218     if $self->{nextk} > 1;
219 ulpfr 13
220 ulpfr 10 require WAIT::Index;
221 ulpfr 13
222 ulpfr 10 my $name = join '-', @_;
223     $self->{indexes}->{$name} =
224 ulpfr 85 new WAIT::Index file => $self->{file}, name => $name, attr => $_;
225 ulpfr 10 }
226    
227     =head2 Creating an inverted index
228    
229     $tb->create_inverted_index
230     (attribute => 'au',
231     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
232     predicate => 'plain',
233     );
234    
235     =over 5
236    
237     =item C<attribute>
238    
239     The attribute to build the index on. This attribute may not be in the
240     set attributes specified when the table was created.
241    
242     =item C<pipeline>
243    
244 ulpfr 13 A piplines specification is a reference to an array of method names
245     (from package C<WAIT::Filter>) which are to be applied in sequence to
246     the contents of the named attribute. The attribute name may not be in
247     the attribute list.
248 ulpfr 10
249     =item C<predicate>
250    
251     An indication which predicate the index implements. This may be
252     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
253     query processing. Currently there is no standard set of predicate
254 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
255 ulpfr 10 omitted.
256    
257     =back
258    
259     Currently this method must be called before the first tuple is
260     inserted in the table!
261    
262     =cut
263    
264     sub create_inverted_index {
265     my $self = shift;
266     my %parm = @_;
267    
268     croak "No attribute specified" unless $parm{attribute};
269     croak "No pipeline specified" unless $parm{pipeline};
270    
271     $parm{predicate} ||= $parm{pipeline}->[-1];
272 ulpfr 13
273 ulpfr 10 croak "Cannot create index for table aready populated"
274     if $self->{nextk} > 1;
275 ulpfr 13
276 ulpfr 10 require WAIT::InvertedIndex;
277    
278     # backward compatibility stuff
279     my %opt = %parm;
280     for (qw(attribute pipeline predicate)) {
281     delete $opt{$_};
282     }
283 ulpfr 13
284 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
285     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
286     filter => [@{$parm{pipeline}}], # clone
287     name => $name,
288     attr => $parm{attribute},
289     %opt, # backward compatibility stuff
290     );
291     # We will have to use $parm{predicate} here
292     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
293     }
294    
295     sub dir {
296     $_[0]->{file};
297     }
298    
299     =head2 C<$tb-E<gt>layout>
300    
301     Returns the reference to the associated parser object.
302    
303     =cut
304    
305     sub layout { $_[0]->{layout} }
306    
307     =head2 C<$tb-E<gt>fields>
308    
309     Returns the array of attribute names.
310    
311     =cut
312    
313    
314     sub fields { keys %{$_[0]->{inverted}}}
315    
316     =head2 C<$tb-E<gt>drop>
317    
318     Must be called via C<WAIT::Database::drop_table>
319    
320     =cut
321    
322     sub drop {
323     my $self = shift;
324 ulpfr 35
325     unless ($self->{write_lock}){
326     warn "Cannot drop table without write lock. Nothing done";
327     return;
328     }
329    
330 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
331     $self->close; # just make sure
332 ulpfr 35
333 ulpfr 10 my $file = $self->{file};
334    
335     for (values %{$self->{indexes}}) {
336     $_->drop;
337     }
338 ulpfr 85 rmdir "$file.read" or warn "Could not rmdir '$file/read'";
339     unlink "$file";
340    
341 ulpfr 10 } else {
342     croak ref($self)."::drop called directly";
343     }
344     }
345    
346     sub mrequire ($) {
347     my $module = shift;
348    
349     $module =~ s{::}{/}g;
350     $module .= '.pm';
351     require $module;
352     }
353    
354     sub open {
355     my $self = shift;
356     my $file = $self->{file} . '/records';
357    
358     mrequire ref($self); # that's tricky eh?
359     if (defined $self->{'layout'}) {
360     mrequire ref($self->{'layout'});
361     }
362     if (defined $self->{'access'}) {
363     mrequire ref($self->{'access'});
364     }
365     if (exists $self->{indexes}) {
366     require WAIT::Index;
367     for (values %{$self->{indexes}}) {
368     $_->{mode} = $self->{mode};
369     }
370     }
371     if (exists $self->{inverted}) {
372     my ($att, $idx);
373     for $att (keys %{$self->{inverted}}) {
374     for $idx (@{$self->{inverted}->{$att}}) {
375     $idx->{mode} = $self->{mode};
376     }
377     }
378     require WAIT::InvertedIndex;
379     }
380 ulpfr 42
381     $self->getlock($self->{mode});
382    
383 ulpfr 85 my $dbmode = ($self->{mode} & O_CREAT) ? DB_CREATE : 0;
384 ulpfr 10 unless (defined $self->{dbh}) {
385     if ($USE_RECNO) {
386 ulpfr 85 tie(%{$self->{db}}, 'BerkeleyDB::Recno',
387     -Filename => $self->{file},
388     -Subname => 'records',
389     -Flags => $dbmode);
390 ulpfr 10 } else {
391     $self->{dbh} =
392 ulpfr 85 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
393     -Filename => $self->{file},
394     -Subname => 'records',
395     -Mode => 0664,
396     -Flags => $dbmode);
397 ulpfr 10 }
398     }
399 ulpfr 35
400    
401 ulpfr 10 $self;
402     }
403    
404     sub fetch_extern {
405     my $self = shift;
406    
407 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
408 ulpfr 10 if (exists $self->{'access'}) {
409     mrequire ref($self->{'access'});
410     $self->{'access'}->FETCH(@_);
411     }
412     }
413    
414     sub fetch_extern_by_id {
415     my $self = shift;
416    
417     $self->fetch_extern($self->fetch(@_));
418     }
419    
420     sub _find_index {
421     my $self = shift;
422     my (@att) = @_;
423     my %att;
424     my $name;
425 ulpfr 13
426 ulpfr 10 @att{@att} = @att;
427    
428     KEY: for $name (keys %{$self->{indexes}}) {
429     my @iat = split /-/, $name;
430     for (@iat) {
431     next KEY unless exists $att{$_};
432     }
433     return $self->{indexes}->{$name};
434     }
435     return undef;
436     }
437    
438     sub have {
439     my $self = shift;
440     my %parm = @_;
441    
442 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
443    
444 ulpfr 10 defined $self->{db} or $self->open;
445     return $index->have(@_);
446     }
447    
448     sub insert {
449     my $self = shift;
450     my %parm = @_;
451    
452     defined $self->{db} or $self->open;
453    
454 ulpfr 13 # We should move all writing methods to a subclass to check only once
455     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
456    
457 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
458     my $key;
459     my @deleted = keys %{$self->{deleted}};
460 ulpfr 19 my $gotkey = 0;
461 ulpfr 10
462     if (@deleted) {
463     $key = pop @deleted;
464     delete $self->{deleted}->{$key};
465 ulpfr 19 # Sanity check
466     if ($key && $key>0) {
467     $gotkey=1;
468 ulpfr 10 } else {
469 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
470     "key[%s]: Please rebuild index\n",
471     $key
472     ));
473     }
474     }
475     unless ($gotkey) {
476 ulpfr 10 $key = $self->{nextk}++;
477     }
478     if ($USE_RECNO) {
479     $self->{db}->[$key] = $tuple;
480     } else {
481     $self->{db}->{$key} = $tuple;
482     }
483     for (values %{$self->{indexes}}) {
484     unless ($_->insert($key, %parm)) {
485     # duplicate key, undo changes
486     if ($key == $self->{nextk}-1) {
487     $self->{nextk}--;
488     } else {
489 ulpfr 19 # warn "setting key[$key] deleted during insert";
490 ulpfr 10 $self->{deleted}->{$key}=1;
491     }
492     my $idx;
493     for $idx (values %{$self->{indexes}}) {
494     last if $idx eq $_;
495     $idx->remove($key, %parm);
496     }
497     return undef;
498 ulpfr 13 }
499 ulpfr 10 }
500     if (defined $self->{inverted}) {
501     my $att;
502     for $att (keys %{$self->{inverted}}) {
503     if (defined $parm{$att}) {
504     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
505     #map $_->sync, @{$self->{inverted}->{$att}}
506     }
507     }
508     }
509     $key
510     }
511    
512     sub sync {
513     my $self = shift;
514 ulpfr 13
515 ulpfr 10 for (values %{$self->{indexes}}) {
516     map $_->sync, $_;
517     }
518     if (defined $self->{inverted}) {
519     my $att;
520     for $att (keys %{$self->{inverted}}) {
521     map $_->sync, @{$self->{inverted}->{$att}}
522     }
523     }
524     }
525    
526     sub fetch {
527     my $self = shift;
528     my $key = shift;
529    
530     return () if exists $self->{deleted}->{$key};
531 ulpfr 13
532 ulpfr 10 defined $self->{db} or $self->open;
533     if ($USE_RECNO) {
534     $self->unpack($self->{db}->[$key]);
535     } else {
536     $self->unpack($self->{db}->{$key});
537     }
538     }
539    
540     sub delete_by_key {
541     my $self = shift;
542     my $key = shift;
543    
544 ulpfr 19 unless ($key) {
545     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
546     return;
547     }
548    
549 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
550     my %tuple = $self->fetch($key);
551     for (values %{$self->{indexes}}) {
552     $_->delete($key, %tuple);
553     }
554     if (defined $self->{inverted}) {
555     # User *must* provide the full record for this or the entries
556     # in the inverted index will not be removed
557     %tuple = (%tuple, @_);
558     my $att;
559     for $att (keys %{$self->{inverted}}) {
560     if (defined $tuple{$att}) {
561     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
562     }
563     }
564     }
565 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
566 ulpfr 10 ++$self->{deleted}->{$key};
567     }
568    
569     sub delete {
570     my $self = shift;
571     my $tkey = $self->have(@_);
572 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
573 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
574     }
575    
576     sub unpack {
577 laperla 31 my($self, $tuple) = @_;
578 ulpfr 10
579 laperla 31 unless (defined $tuple){
580     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
581     warn("Debug: somebody called unpack without argument tuple!");
582     return;
583     }
584    
585 ulpfr 10 my $att;
586     my @result;
587     my @tuple = split /$;/, $tuple;
588    
589     for $att (@{$self->{attr}}) {
590     push @result, $att, shift @tuple;
591     }
592     @result;
593     }
594    
595 ulpfr 19 sub set {
596     my ($self, $iattr, $value) = @_;
597    
598 laperla 31 unless ($self->{write_lock}){
599     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
600     return;
601 ulpfr 24 }
602 laperla 77
603     # in the rare case that they haven't written a single record yet, we
604     # make sure, the inverted inherits our $self->{mode}:
605     defined $self->{db} or $self->open;
606    
607 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
608     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
609     my $idx;
610     for $idx (@{$self->{inverted}->{$att}}) {
611     $idx->set($iattr, $value);
612     }
613     } else {
614     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
615     }
616     }
617    
618     1;
619     }
620    
621 ulpfr 10 sub close {
622     my $self = shift;
623    
624     if (exists $self->{'access'}) {
625     eval {$self->{'access'}->close}; # dont bother if not opened
626     }
627 laperla 31 if ($WAIT::Index::VERSION) {
628     for (values %{$self->{indexes}}) {
629     $_->close();
630     }
631 ulpfr 10 }
632 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
633     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
634     # if WAIT::InvertedIndex has not been loaded, they cannot have
635     # been altered so far
636 ulpfr 10 my $att;
637     for $att (keys %{$self->{inverted}}) {
638     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
639     my $idx;
640     for $idx (@{$self->{inverted}->{$att}}) {
641     $idx->close;
642     }
643     } else {
644     map $_->close(), @{$self->{inverted}->{$att}};
645     }
646     }
647     }
648     if ($self->{dbh}) {
649     delete $self->{dbh};
650    
651     if ($USE_RECNO) {
652     untie @{$self->{db}};
653     } else {
654     untie %{$self->{db}};
655     }
656     delete $self->{db};
657     }
658    
659 ulpfr 19 $self->unlock;
660    
661 ulpfr 10 1;
662     }
663    
664 ulpfr 35 # Locking
665     #
666     # We allow multiple readers to coexists. But write access excludes
667     # all read access and vice versa. In practice read access on tables
668     # open for writing will mostly work ;-)
669    
670     # If a "write" lock is requested, an existing "read" lock will be
671     # released. If a "read" lock ist requested, an existing "write" lock
672     # will be released. Requiring a lock already hold has no effect.
673 laperla 41
674 ulpfr 35 sub getlock {
675     my ($self, $mode) = @_;
676 laperla 41
677     # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
678     #
679     my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
680 ulpfr 85 my $file = $self->{file};
681     my $lockdir = $self->{file} . '.read';
682 ulpfr 35
683     unless (-d $lockdir) {
684     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
685     }
686    
687     if ($mode & O_RDWR) { # Get a write lock. Release it again
688     # and die if there is any valid
689     # readers.
690    
691     # Have a write lock already
692     return $self if $self->{write_lock};
693    
694     if ($self->{read_lock}) { # We are a becoming a writer now. So
695     # we release the read lock to avoid
696     # blocking ourselves.
697     $self->{read_lock}->release;
698     delete $self->{read_lock};
699     }
700    
701     # Get the preliminary write lock
702 ulpfr 85 $self->{write_lock} = $lockmgr->lock($self->{file} . '.write')
703     or die "Can't lock '$self->{file}.write'";
704 ulpfr 35
705     # If we actually want to write we must check if there are any
706     # readers. The write lock is confirmed if wen cannot find any
707     # valid readers.
708    
709     local *DIR;
710     opendir DIR, $lockdir or
711     die "Could not opendir '$lockdir': $!";
712     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
713     # Check if the locks are still valid. Since we are protected by
714     # a write lock, we could use a plain file. But we want to use
715     # the stale testing from LockFile::Simple.
716     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
717     warn "Removing stale lockfile '$lockdir/$lockfile'";
718     $lck->release;
719     } else { # Found an active reader, rats!
720     $self->{write_lock}->release;
721     die "Cannot write table '$file' while it's in use";
722     }
723     }
724     closedir DIR;
725     } else {
726     # Have a read lock already
727     return $self if $self->{read_lock};
728    
729     # Get the preliminary write lock to protect the directory
730 ulpfr 66 # operations.
731 ulpfr 35
732 ulpfr 85 my $write_lock = $lockmgr->lock($self->{file} . '.read/write')
733     or die "Can't lock '$self->{file}.read/write'";
734 ulpfr 35
735     # Find a new read slot. Maybe the plain file would be better?
736     my $id = time;
737     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
738     $id++;
739     }
740    
741     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
742     or die "Can't lock '$lockdir/$id'";
743    
744     # We are a reader now. So we release the write lock
745 ulpfr 66 $write_lock->release;
746 ulpfr 35 }
747     return $self;
748     }
749    
750 ulpfr 19 sub unlock {
751     my $self = shift;
752    
753     # Either we have a read or a write lock (or we close the table already)
754     # unless ($self->{read_lock} || $self->{write_lock}) {
755     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
756     # }
757     if ($self->{write_lock}) {
758     $self->{write_lock}->release();
759     delete $self->{write_lock};
760     }
761     if ($self->{read_lock}) {
762     $self->{read_lock}->release();
763     delete $self->{read_lock};
764     }
765    
766     }
767    
768 ulpfr 13 sub DESTROY {
769     my $self = shift;
770    
771 ulpfr 35 if ($self->{write_lock} || $self->{read_lock}) {
772     warn "Table handle destroyed without closing it first";
773     $self->unlock;
774     }
775 ulpfr 13 }
776    
777 ulpfr 10 sub open_scan {
778     my $self = shift;
779     my $code = shift;
780    
781     $self->{dbh} or $self->open;
782     require WAIT::Scan;
783     new WAIT::Scan $self, $self->{nextk}-1, $code;
784     }
785    
786     sub open_index_scan {
787     my $self = shift;
788     my $attr = shift;
789     my $code = shift;
790     my $name = join '-', @$attr;
791    
792     if (defined $self->{indexes}->{$name}) {
793     $self->{indexes}->{$name}->open_scan($code);
794     } else {
795     croak "No such index '$name'";
796     }
797     }
798    
799     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
800    
801     sub prefix {
802     my ($self , $attr, $prefix) = @_;
803     my %result;
804    
805     defined $self->{db} or $self->open; # require layout
806    
807     for (@{$self->{inverted}->{$attr}}) {
808     my $result = $_->prefix($prefix);
809     if (defined $result) {
810     $result{$_->name} = $result;
811     }
812     }
813     bless \%result, 'WAIT::Query::Raw';
814     }
815    
816     sub intervall {
817     my ($self, $attr, $lb, $ub) = @_;
818     my %result;
819    
820     defined $self->{db} or $self->open; # require layout
821    
822     for (@{$self->{inverted}->{$attr}}) {
823     my $result = $_->intervall($lb, $ub);
824     if (defined $result) {
825     $result{$_->name} = $result;
826     }
827     }
828     bless \%result, 'WAIT::Query::Raw';
829     }
830    
831     sub search {
832 ulpfr 19 my $self = shift;
833     my ($query, $attr, $cont, $raw);
834     if (ref $_[0]) {
835     $query = shift;
836    
837     $attr = $query->{attr};
838     $cont = $query->{cont};
839     $raw = $query->{raw};
840     } else {
841     require Carp;
842     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
843     $attr = shift;
844     $cont = shift;
845     $raw = shift;
846     $query = {
847     attr => $attr,
848     cont => $cont,
849     raw => $raw,
850     };
851     }
852    
853 ulpfr 10 my %result;
854    
855     defined $self->{db} or $self->open; # require layout
856    
857     if ($raw) {
858     for (@{$self->{inverted}->{$attr}}) {
859     my $name = $_->name;
860     if (exists $raw->{$name} and @{$raw->{$name}}) {
861     my $scale = 1/scalar(@{$raw->{$name}});
862 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
863 ulpfr 10 my ($key, $val);
864     while (($key, $val) = each %r) {
865     if (exists $result{$key}) {
866     $result{$key} += $val*$scale;
867     } else {
868     $result{$key} = $val*$scale;
869     }
870     }
871     }
872     }
873     }
874     if (defined $cont and $cont ne '') {
875     for (@{$self->{inverted}->{$attr}}) {
876 ulpfr 19 my %r = $_->search($query, $cont);
877 ulpfr 10 my ($key, $val);
878     while (($key, $val) = each %r) {
879     if (exists $result{$key}) {
880     $result{$key} += $val;
881     } else {
882     $result{$key} = $val;
883     }
884     }
885     }
886     }
887     # sanity check for deleted documents.
888     # this should not be necessary !@#$
889     for (keys %result) {
890     delete $result{$_} if $self->{deleted}->{$_}
891     }
892     %result;
893     }
894    
895     sub hilight_positions {
896     my ($self, $attr, $text, $query, $raw) = @_;
897     my %pos;
898    
899     if (defined $raw) {
900 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
901     # WAIT::InvertedIndex for
902     # this index field $attr
903 ulpfr 10 my $name = $_->name;
904     if (exists $raw->{$name}) {
905     my %qt;
906     grep $qt{$_}++, @{$raw->{$name}};
907     for ($_->parse_pos($text)) {
908     if (exists $qt{$_->[0]}) {
909     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
910     }
911     }
912     }
913     }
914     }
915     if (defined $query) {
916     for (@{$self->{inverted}->{$attr}}) {
917     my %qt;
918    
919     grep $qt{$_}++, $_->parse($query);
920     for ($_->parse_pos($text)) {
921     if (exists $qt{$_->[0]}) {
922     if (exists $pos{$_->[1]}) { # perl -w ;-)
923     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
924     } else {
925     $pos{$_->[1]} = length($_->[0]);
926     }
927     }
928     }
929     }
930     }
931    
932     \%pos;
933     }
934    
935     sub hilight {
936 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
937     my $layout = $tb->layout();
938    
939 ulpfr 10 my @result;
940    
941 ulpfr 13 $qplain ||= {};
942     $qraw ||= {};
943     my @ttxt = $layout->tag($buf);
944 ulpfr 10 while (@ttxt) {
945     no strict 'refs';
946     my %tag = %{shift @ttxt};
947     my $txt = shift @ttxt;
948     my $fld;
949    
950     my %hl;
951 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
952 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
953 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
954 ulpfr 10 for (keys %$hp) {
955     if (exists $hl{$_}) { # -w ;-(
956     $hl{$_} = max($hl{$_}, $hp->{$_});
957     } else {
958     $hl{$_} = $hp->{$_};
959     }
960     }
961     }
962     my $pos;
963     my $qt = {_qt => 1, %tag};
964     my $pl = \%tag;
965     my $last = length($txt);
966     my @tmp;
967     for $pos (sort {$b <=> $a} keys %hl) {
968     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
969     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
970     $last = $pos;
971     }
972     push @result, $pl, substr($txt,0,$last);
973     push @result, @tmp;
974     }
975     @result; # no speed necessary
976     }
977    
978     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26