/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (hide annotations)
Mon May 24 13:41:28 2004 UTC (19 years, 11 months ago) by dpavlin
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 25308 byte(s)
moved local changes in WAIT-1.800 to latest CVS checkout, save fixed
branch as WAIT-1.800+fixes

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 85 # Last Modified On: Sat Apr 27 17:20:31 2002
8 ulpfr 10 # Language : CPerl
9 ulpfr 85 # Update Count : 172
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 ulpfr 85 use BerkeleyDB;
36 ulpfr 10 use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 ulpfr 85 if (-e $self->{file}){
163     warn "Warning: file '$self->{file}' already exists\n";
164 ulpfr 10 }
165 ulpfr 19
166 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
167     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
168     $self->{access} = $parm{access} if defined $parm{access};
169     $self->{nextk} = 1; # next record to insert; first record unused
170     $self->{deleted} = {}; # no deleted records yet
171     $self->{indexes} = {};
172    
173     bless $self, $type;
174 ulpfr 35
175     # Checking for readers is not necessary, but let's go with the
176     # generic method.
177     $self->getlock(O_RDWR|O_CREAT); # dies when failing
178    
179 ulpfr 10 # Call create_index() and create_index() for compatibility
180     for (@{$self->{keyset}||[]}) {
181     #carp "Specification of indexes at table create time is deprecated";
182     $self->create_index(@$_);
183     }
184     while (@{$parm{invindex}||[]}) {
185     # carp "Specification of inverted indexes at table create time is deprecated";
186     my $att = shift @{$parm{invindex}};
187     my @spec = @{shift @{$parm{invindex}}};
188     my @opt;
189 ulpfr 13
190 ulpfr 10 if (ref($spec[0])) {
191     carp "Secondary pipelines are deprecated\n";
192     @opt = %{shift @spec};
193     }
194     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
195     }
196 ulpfr 19
197 ulpfr 10 $self;
198     # end of backwarn compatibility stuff
199     }
200    
201     =head2 Creating an index
202    
203     $tb->create_index('docid');
204    
205     =item C<create_index>
206    
207     must be called with a list of attributes. This must be a subset of the
208     attributes specified when the table was created. Currently this
209     method must be called before the first tuple is inserted in the
210     table!
211    
212     =cut
213    
214     sub create_index {
215     my $self= shift;
216 ulpfr 13
217 ulpfr 10 croak "Cannot create index for table aready populated"
218     if $self->{nextk} > 1;
219 ulpfr 13
220 ulpfr 10 require WAIT::Index;
221 ulpfr 13
222 ulpfr 10 my $name = join '-', @_;
223     $self->{indexes}->{$name} =
224 ulpfr 85 new WAIT::Index file => $self->{file}, name => $name, attr => $_;
225 ulpfr 10 }
226    
227     =head2 Creating an inverted index
228    
229     $tb->create_inverted_index
230     (attribute => 'au',
231     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
232     predicate => 'plain',
233     );
234    
235     =over 5
236    
237     =item C<attribute>
238    
239     The attribute to build the index on. This attribute may not be in the
240     set attributes specified when the table was created.
241    
242     =item C<pipeline>
243    
244 ulpfr 13 A piplines specification is a reference to an array of method names
245     (from package C<WAIT::Filter>) which are to be applied in sequence to
246     the contents of the named attribute. The attribute name may not be in
247     the attribute list.
248 ulpfr 10
249     =item C<predicate>
250    
251     An indication which predicate the index implements. This may be
252     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
253     query processing. Currently there is no standard set of predicate
254 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
255 ulpfr 10 omitted.
256    
257     =back
258    
259     Currently this method must be called before the first tuple is
260     inserted in the table!
261    
262     =cut
263    
264     sub create_inverted_index {
265     my $self = shift;
266     my %parm = @_;
267    
268     croak "No attribute specified" unless $parm{attribute};
269     croak "No pipeline specified" unless $parm{pipeline};
270    
271     $parm{predicate} ||= $parm{pipeline}->[-1];
272 ulpfr 13
273 ulpfr 10 croak "Cannot create index for table aready populated"
274     if $self->{nextk} > 1;
275 ulpfr 13
276 ulpfr 10 require WAIT::InvertedIndex;
277    
278     # backward compatibility stuff
279     my %opt = %parm;
280     for (qw(attribute pipeline predicate)) {
281     delete $opt{$_};
282     }
283 ulpfr 13
284 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
285     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
286     filter => [@{$parm{pipeline}}], # clone
287     name => $name,
288     attr => $parm{attribute},
289     %opt, # backward compatibility stuff
290     );
291     # We will have to use $parm{predicate} here
292     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
293     }
294    
295     sub dir {
296     $_[0]->{file};
297     }
298    
299     =head2 C<$tb-E<gt>layout>
300    
301     Returns the reference to the associated parser object.
302    
303     =cut
304    
305     sub layout { $_[0]->{layout} }
306    
307     =head2 C<$tb-E<gt>fields>
308    
309     Returns the array of attribute names.
310    
311     =cut
312    
313    
314     sub fields { keys %{$_[0]->{inverted}}}
315    
316     =head2 C<$tb-E<gt>drop>
317    
318     Must be called via C<WAIT::Database::drop_table>
319    
320     =cut
321    
322     sub drop {
323     my $self = shift;
324 ulpfr 35
325     unless ($self->{write_lock}){
326     warn "Cannot drop table without write lock. Nothing done";
327     return;
328     }
329    
330 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
331     $self->close; # just make sure
332 ulpfr 35
333 ulpfr 10 my $file = $self->{file};
334    
335     for (values %{$self->{indexes}}) {
336     $_->drop;
337     }
338 ulpfr 85 rmdir "$file.read" or warn "Could not rmdir '$file/read'";
339     unlink "$file";
340    
341 ulpfr 10 } else {
342     croak ref($self)."::drop called directly";
343     }
344     }
345    
346     sub mrequire ($) {
347     my $module = shift;
348    
349     $module =~ s{::}{/}g;
350     $module .= '.pm';
351     require $module;
352     }
353    
354     sub open {
355     my $self = shift;
356     my $file = $self->{file} . '/records';
357    
358     mrequire ref($self); # that's tricky eh?
359     if (defined $self->{'layout'}) {
360     mrequire ref($self->{'layout'});
361     }
362     if (defined $self->{'access'}) {
363     mrequire ref($self->{'access'});
364     }
365     if (exists $self->{indexes}) {
366     require WAIT::Index;
367     for (values %{$self->{indexes}}) {
368     $_->{mode} = $self->{mode};
369     }
370     }
371     if (exists $self->{inverted}) {
372     my ($att, $idx);
373     for $att (keys %{$self->{inverted}}) {
374     for $idx (@{$self->{inverted}->{$att}}) {
375     $idx->{mode} = $self->{mode};
376     }
377     }
378     require WAIT::InvertedIndex;
379     }
380 ulpfr 42
381     $self->getlock($self->{mode});
382    
383 ulpfr 85 my $dbmode = ($self->{mode} & O_CREAT) ? DB_CREATE : 0;
384 ulpfr 10 unless (defined $self->{dbh}) {
385     if ($USE_RECNO) {
386 ulpfr 85 tie(%{$self->{db}}, 'BerkeleyDB::Recno',
387     -Filename => $self->{file},
388     -Subname => 'records',
389     -Flags => $dbmode);
390 ulpfr 10 } else {
391     $self->{dbh} =
392 ulpfr 85 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
393     -Filename => $self->{file},
394     -Subname => 'records',
395     -Mode => 0664,
396     -Flags => $dbmode);
397 ulpfr 10 }
398     }
399 ulpfr 35
400    
401 ulpfr 10 $self;
402     }
403    
404     sub fetch_extern {
405     my $self = shift;
406    
407 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
408 ulpfr 10 if (exists $self->{'access'}) {
409     mrequire ref($self->{'access'});
410     $self->{'access'}->FETCH(@_);
411     }
412     }
413    
414     sub fetch_extern_by_id {
415     my $self = shift;
416    
417     $self->fetch_extern($self->fetch(@_));
418     }
419    
420     sub _find_index {
421     my $self = shift;
422     my (@att) = @_;
423     my %att;
424     my $name;
425 ulpfr 13
426 ulpfr 10 @att{@att} = @att;
427    
428     KEY: for $name (keys %{$self->{indexes}}) {
429     my @iat = split /-/, $name;
430     for (@iat) {
431     next KEY unless exists $att{$_};
432     }
433     return $self->{indexes}->{$name};
434     }
435     return undef;
436     }
437    
438     sub have {
439     my $self = shift;
440     my %parm = @_;
441    
442 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
443    
444 ulpfr 10 defined $self->{db} or $self->open;
445     return $index->have(@_);
446     }
447    
448     sub insert {
449     my $self = shift;
450     my %parm = @_;
451    
452     defined $self->{db} or $self->open;
453    
454 ulpfr 13 # We should move all writing methods to a subclass to check only once
455     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
456    
457 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
458     my $key;
459     my @deleted = keys %{$self->{deleted}};
460 ulpfr 19 my $gotkey = 0;
461 ulpfr 10
462     if (@deleted) {
463     $key = pop @deleted;
464     delete $self->{deleted}->{$key};
465 ulpfr 19 # Sanity check
466     if ($key && $key>0) {
467     $gotkey=1;
468 ulpfr 10 } else {
469 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
470     "key[%s]: Please rebuild index\n",
471     $key
472     ));
473     }
474     }
475     unless ($gotkey) {
476 ulpfr 10 $key = $self->{nextk}++;
477     }
478     if ($USE_RECNO) {
479     $self->{db}->[$key] = $tuple;
480     } else {
481     $self->{db}->{$key} = $tuple;
482     }
483     for (values %{$self->{indexes}}) {
484     unless ($_->insert($key, %parm)) {
485     # duplicate key, undo changes
486     if ($key == $self->{nextk}-1) {
487     $self->{nextk}--;
488     } else {
489 ulpfr 19 # warn "setting key[$key] deleted during insert";
490 ulpfr 10 $self->{deleted}->{$key}=1;
491     }
492     my $idx;
493     for $idx (values %{$self->{indexes}}) {
494     last if $idx eq $_;
495     $idx->remove($key, %parm);
496     }
497     return undef;
498 ulpfr 13 }
499 ulpfr 10 }
500     if (defined $self->{inverted}) {
501     my $att;
502     for $att (keys %{$self->{inverted}}) {
503     if (defined $parm{$att}) {
504     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
505     #map $_->sync, @{$self->{inverted}->{$att}}
506     }
507     }
508     }
509     $key
510     }
511    
512     sub sync {
513     my $self = shift;
514 ulpfr 13
515 ulpfr 10 for (values %{$self->{indexes}}) {
516     map $_->sync, $_;
517     }
518     if (defined $self->{inverted}) {
519     my $att;
520     for $att (keys %{$self->{inverted}}) {
521     map $_->sync, @{$self->{inverted}->{$att}}
522     }
523     }
524     }
525    
526     sub fetch {
527     my $self = shift;
528     my $key = shift;
529    
530     return () if exists $self->{deleted}->{$key};
531 ulpfr 13
532 ulpfr 10 defined $self->{db} or $self->open;
533     if ($USE_RECNO) {
534     $self->unpack($self->{db}->[$key]);
535     } else {
536     $self->unpack($self->{db}->{$key});
537     }
538     }
539    
540     sub delete_by_key {
541     my $self = shift;
542     my $key = shift;
543    
544 ulpfr 19 unless ($key) {
545     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
546     return;
547     }
548    
549 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
550     my %tuple = $self->fetch($key);
551     for (values %{$self->{indexes}}) {
552     $_->delete($key, %tuple);
553     }
554     if (defined $self->{inverted}) {
555     # User *must* provide the full record for this or the entries
556     # in the inverted index will not be removed
557     %tuple = (%tuple, @_);
558     my $att;
559     for $att (keys %{$self->{inverted}}) {
560     if (defined $tuple{$att}) {
561     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
562     }
563     }
564     }
565 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
566 ulpfr 10 ++$self->{deleted}->{$key};
567     }
568    
569     sub delete {
570     my $self = shift;
571     my $tkey = $self->have(@_);
572 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
573 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
574     }
575    
576     sub unpack {
577 laperla 31 my($self, $tuple) = @_;
578 ulpfr 10
579 laperla 31 unless (defined $tuple){
580     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
581     warn("Debug: somebody called unpack without argument tuple!");
582     return;
583     }
584    
585 ulpfr 10 my $att;
586     my @result;
587     my @tuple = split /$;/, $tuple;
588    
589     for $att (@{$self->{attr}}) {
590     push @result, $att, shift @tuple;
591     }
592     @result;
593     }
594    
595 ulpfr 19 sub set {
596     my ($self, $iattr, $value) = @_;
597    
598 laperla 31 unless ($self->{write_lock}){
599     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
600     return;
601 ulpfr 24 }
602 laperla 77
603     # in the rare case that they haven't written a single record yet, we
604     # make sure, the inverted inherits our $self->{mode}:
605     defined $self->{db} or $self->open;
606    
607 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
608 dpavlin 86 require WAIT::InvertedIndex;
609     if ($^V gt v5.003) { # avoid bug in perl up to 5.003_05
610 ulpfr 19 my $idx;
611     for $idx (@{$self->{inverted}->{$att}}) {
612     $idx->set($iattr, $value);
613     }
614     } else {
615     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
616     }
617     }
618    
619     1;
620     }
621    
622 ulpfr 10 sub close {
623     my $self = shift;
624    
625     if (exists $self->{'access'}) {
626     eval {$self->{'access'}->close}; # dont bother if not opened
627     }
628 laperla 31 if ($WAIT::Index::VERSION) {
629     for (values %{$self->{indexes}}) {
630     $_->close();
631     }
632 ulpfr 10 }
633 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
634     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
635     # if WAIT::InvertedIndex has not been loaded, they cannot have
636     # been altered so far
637 ulpfr 10 my $att;
638     for $att (keys %{$self->{inverted}}) {
639     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
640     my $idx;
641     for $idx (@{$self->{inverted}->{$att}}) {
642     $idx->close;
643     }
644     } else {
645     map $_->close(), @{$self->{inverted}->{$att}};
646     }
647     }
648     }
649     if ($self->{dbh}) {
650     delete $self->{dbh};
651    
652     if ($USE_RECNO) {
653     untie @{$self->{db}};
654     } else {
655     untie %{$self->{db}};
656     }
657     delete $self->{db};
658     }
659    
660 ulpfr 19 $self->unlock;
661    
662 ulpfr 10 1;
663     }
664    
665 ulpfr 35 # Locking
666     #
667     # We allow multiple readers to coexists. But write access excludes
668     # all read access and vice versa. In practice read access on tables
669     # open for writing will mostly work ;-)
670    
671     # If a "write" lock is requested, an existing "read" lock will be
672     # released. If a "read" lock ist requested, an existing "write" lock
673     # will be released. Requiring a lock already hold has no effect.
674 laperla 41
675 ulpfr 35 sub getlock {
676     my ($self, $mode) = @_;
677 laperla 41
678     # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
679     #
680     my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
681 ulpfr 85 my $file = $self->{file};
682     my $lockdir = $self->{file} . '.read';
683 ulpfr 35
684     unless (-d $lockdir) {
685     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
686     }
687    
688     if ($mode & O_RDWR) { # Get a write lock. Release it again
689     # and die if there is any valid
690     # readers.
691    
692     # Have a write lock already
693     return $self if $self->{write_lock};
694    
695     if ($self->{read_lock}) { # We are a becoming a writer now. So
696     # we release the read lock to avoid
697     # blocking ourselves.
698     $self->{read_lock}->release;
699     delete $self->{read_lock};
700     }
701    
702     # Get the preliminary write lock
703 ulpfr 85 $self->{write_lock} = $lockmgr->lock($self->{file} . '.write')
704     or die "Can't lock '$self->{file}.write'";
705 ulpfr 35
706     # If we actually want to write we must check if there are any
707     # readers. The write lock is confirmed if wen cannot find any
708     # valid readers.
709    
710     local *DIR;
711     opendir DIR, $lockdir or
712     die "Could not opendir '$lockdir': $!";
713     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
714     # Check if the locks are still valid. Since we are protected by
715     # a write lock, we could use a plain file. But we want to use
716     # the stale testing from LockFile::Simple.
717     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
718     warn "Removing stale lockfile '$lockdir/$lockfile'";
719     $lck->release;
720     } else { # Found an active reader, rats!
721     $self->{write_lock}->release;
722     die "Cannot write table '$file' while it's in use";
723     }
724     }
725     closedir DIR;
726     } else {
727     # Have a read lock already
728     return $self if $self->{read_lock};
729    
730     # Get the preliminary write lock to protect the directory
731 ulpfr 66 # operations.
732 ulpfr 35
733 ulpfr 85 my $write_lock = $lockmgr->lock($self->{file} . '.read/write')
734     or die "Can't lock '$self->{file}.read/write'";
735 ulpfr 35
736     # Find a new read slot. Maybe the plain file would be better?
737     my $id = time;
738     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
739     $id++;
740     }
741    
742     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
743     or die "Can't lock '$lockdir/$id'";
744    
745     # We are a reader now. So we release the write lock
746 ulpfr 66 $write_lock->release;
747 ulpfr 35 }
748     return $self;
749     }
750    
751 ulpfr 19 sub unlock {
752     my $self = shift;
753    
754     # Either we have a read or a write lock (or we close the table already)
755     # unless ($self->{read_lock} || $self->{write_lock}) {
756     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
757     # }
758     if ($self->{write_lock}) {
759     $self->{write_lock}->release();
760     delete $self->{write_lock};
761     }
762     if ($self->{read_lock}) {
763     $self->{read_lock}->release();
764     delete $self->{read_lock};
765     }
766    
767     }
768    
769 ulpfr 13 sub DESTROY {
770     my $self = shift;
771    
772 ulpfr 35 if ($self->{write_lock} || $self->{read_lock}) {
773     warn "Table handle destroyed without closing it first";
774     $self->unlock;
775     }
776 ulpfr 13 }
777    
778 ulpfr 10 sub open_scan {
779     my $self = shift;
780     my $code = shift;
781    
782     $self->{dbh} or $self->open;
783     require WAIT::Scan;
784     new WAIT::Scan $self, $self->{nextk}-1, $code;
785     }
786    
787     sub open_index_scan {
788     my $self = shift;
789     my $attr = shift;
790     my $code = shift;
791     my $name = join '-', @$attr;
792    
793     if (defined $self->{indexes}->{$name}) {
794     $self->{indexes}->{$name}->open_scan($code);
795     } else {
796     croak "No such index '$name'";
797     }
798     }
799    
800     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
801    
802     sub prefix {
803     my ($self , $attr, $prefix) = @_;
804     my %result;
805    
806     defined $self->{db} or $self->open; # require layout
807    
808     for (@{$self->{inverted}->{$attr}}) {
809     my $result = $_->prefix($prefix);
810     if (defined $result) {
811     $result{$_->name} = $result;
812     }
813     }
814     bless \%result, 'WAIT::Query::Raw';
815     }
816    
817     sub intervall {
818     my ($self, $attr, $lb, $ub) = @_;
819     my %result;
820    
821     defined $self->{db} or $self->open; # require layout
822    
823     for (@{$self->{inverted}->{$attr}}) {
824     my $result = $_->intervall($lb, $ub);
825     if (defined $result) {
826     $result{$_->name} = $result;
827     }
828     }
829     bless \%result, 'WAIT::Query::Raw';
830     }
831    
832     sub search {
833 ulpfr 19 my $self = shift;
834     my ($query, $attr, $cont, $raw);
835     if (ref $_[0]) {
836     $query = shift;
837    
838     $attr = $query->{attr};
839     $cont = $query->{cont};
840     $raw = $query->{raw};
841     } else {
842     require Carp;
843     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
844     $attr = shift;
845     $cont = shift;
846     $raw = shift;
847     $query = {
848     attr => $attr,
849     cont => $cont,
850     raw => $raw,
851     };
852     }
853    
854 ulpfr 10 my %result;
855    
856     defined $self->{db} or $self->open; # require layout
857    
858     if ($raw) {
859     for (@{$self->{inverted}->{$attr}}) {
860     my $name = $_->name;
861     if (exists $raw->{$name} and @{$raw->{$name}}) {
862     my $scale = 1/scalar(@{$raw->{$name}});
863 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
864 ulpfr 10 my ($key, $val);
865     while (($key, $val) = each %r) {
866     if (exists $result{$key}) {
867     $result{$key} += $val*$scale;
868     } else {
869     $result{$key} = $val*$scale;
870     }
871     }
872     }
873     }
874     }
875     if (defined $cont and $cont ne '') {
876     for (@{$self->{inverted}->{$attr}}) {
877 ulpfr 19 my %r = $_->search($query, $cont);
878 ulpfr 10 my ($key, $val);
879     while (($key, $val) = each %r) {
880     if (exists $result{$key}) {
881     $result{$key} += $val;
882     } else {
883     $result{$key} = $val;
884     }
885     }
886     }
887     }
888     # sanity check for deleted documents.
889     # this should not be necessary !@#$
890     for (keys %result) {
891     delete $result{$_} if $self->{deleted}->{$_}
892     }
893     %result;
894     }
895    
896     sub hilight_positions {
897     my ($self, $attr, $text, $query, $raw) = @_;
898     my %pos;
899    
900     if (defined $raw) {
901 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
902     # WAIT::InvertedIndex for
903     # this index field $attr
904 ulpfr 10 my $name = $_->name;
905     if (exists $raw->{$name}) {
906     my %qt;
907     grep $qt{$_}++, @{$raw->{$name}};
908     for ($_->parse_pos($text)) {
909     if (exists $qt{$_->[0]}) {
910     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
911     }
912     }
913     }
914     }
915     }
916     if (defined $query) {
917     for (@{$self->{inverted}->{$attr}}) {
918     my %qt;
919    
920     grep $qt{$_}++, $_->parse($query);
921     for ($_->parse_pos($text)) {
922     if (exists $qt{$_->[0]}) {
923     if (exists $pos{$_->[1]}) { # perl -w ;-)
924     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
925     } else {
926     $pos{$_->[1]} = length($_->[0]);
927     }
928     }
929     }
930     }
931     }
932    
933     \%pos;
934     }
935    
936     sub hilight {
937 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
938     my $layout = $tb->layout();
939    
940 ulpfr 10 my @result;
941    
942 ulpfr 13 $qplain ||= {};
943     $qraw ||= {};
944     my @ttxt = $layout->tag($buf);
945 ulpfr 10 while (@ttxt) {
946     no strict 'refs';
947     my %tag = %{shift @ttxt};
948     my $txt = shift @ttxt;
949     my $fld;
950    
951     my %hl;
952 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
953 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
954 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
955 ulpfr 10 for (keys %$hp) {
956     if (exists $hl{$_}) { # -w ;-(
957     $hl{$_} = max($hl{$_}, $hp->{$_});
958     } else {
959     $hl{$_} = $hp->{$_};
960     }
961     }
962     }
963     my $pos;
964     my $qt = {_qt => 1, %tag};
965     my $pl = \%tag;
966     my $last = length($txt);
967     my @tmp;
968     for $pos (sort {$b <=> $a} keys %hl) {
969     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
970     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
971     $last = $pos;
972     }
973     push @result, $pl, substr($txt,0,$last);
974     push @result, @tmp;
975     }
976     @result; # no speed necessary
977     }
978    
979     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26