/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (hide annotations)
Mon Nov 13 20:25:49 2000 UTC (23 years, 6 months ago) by laperla
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 25168 byte(s)
utf8iso was completely nonsense, returning only true and false.

LockFile::Simple now default-configured to remove stale locks.

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 35 # Last Modified On: Sun Nov 12 17:51:56 2000
8 ulpfr 10 # Language : CPerl
9 ulpfr 35 # Update Count : 148
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 ulpfr 10 use DB_File;
36     use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 ulpfr 13 if (-d $self->{file}){
163     warn "Warning: Directory '$self->{file}' already exists\n";
164     } elsif (!mkdir($self->{file}, 0775)) {
165 ulpfr 10 croak "Could not 'mkdir $self->{file}': $!\n";
166     }
167 ulpfr 19
168 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
169     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
170     $self->{access} = $parm{access} if defined $parm{access};
171     $self->{nextk} = 1; # next record to insert; first record unused
172     $self->{deleted} = {}; # no deleted records yet
173     $self->{indexes} = {};
174    
175     bless $self, $type;
176 ulpfr 35
177     # Checking for readers is not necessary, but let's go with the
178     # generic method.
179     $self->getlock(O_RDWR|O_CREAT); # dies when failing
180    
181 ulpfr 10 # Call create_index() and create_index() for compatibility
182     for (@{$self->{keyset}||[]}) {
183     #carp "Specification of indexes at table create time is deprecated";
184     $self->create_index(@$_);
185     }
186     while (@{$parm{invindex}||[]}) {
187     # carp "Specification of inverted indexes at table create time is deprecated";
188     my $att = shift @{$parm{invindex}};
189     my @spec = @{shift @{$parm{invindex}}};
190     my @opt;
191 ulpfr 13
192 ulpfr 10 if (ref($spec[0])) {
193     carp "Secondary pipelines are deprecated\n";
194     @opt = %{shift @spec};
195     }
196     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197     }
198 ulpfr 19
199 ulpfr 10 $self;
200     # end of backwarn compatibility stuff
201     }
202    
203     =head2 Creating an index
204    
205     $tb->create_index('docid');
206    
207     =item C<create_index>
208    
209     must be called with a list of attributes. This must be a subset of the
210     attributes specified when the table was created. Currently this
211     method must be called before the first tuple is inserted in the
212     table!
213    
214     =cut
215    
216     sub create_index {
217     my $self= shift;
218 ulpfr 13
219 ulpfr 10 croak "Cannot create index for table aready populated"
220     if $self->{nextk} > 1;
221 ulpfr 13
222 ulpfr 10 require WAIT::Index;
223 ulpfr 13
224 ulpfr 10 my $name = join '-', @_;
225     $self->{indexes}->{$name} =
226     new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227     }
228    
229     =head2 Creating an inverted index
230    
231     $tb->create_inverted_index
232     (attribute => 'au',
233     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234     predicate => 'plain',
235     );
236    
237     =over 5
238    
239     =item C<attribute>
240    
241     The attribute to build the index on. This attribute may not be in the
242     set attributes specified when the table was created.
243    
244     =item C<pipeline>
245    
246 ulpfr 13 A piplines specification is a reference to an array of method names
247     (from package C<WAIT::Filter>) which are to be applied in sequence to
248     the contents of the named attribute. The attribute name may not be in
249     the attribute list.
250 ulpfr 10
251     =item C<predicate>
252    
253     An indication which predicate the index implements. This may be
254     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255     query processing. Currently there is no standard set of predicate
256 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
257 ulpfr 10 omitted.
258    
259     =back
260    
261     Currently this method must be called before the first tuple is
262     inserted in the table!
263    
264     =cut
265    
266     sub create_inverted_index {
267     my $self = shift;
268     my %parm = @_;
269    
270     croak "No attribute specified" unless $parm{attribute};
271     croak "No pipeline specified" unless $parm{pipeline};
272    
273     $parm{predicate} ||= $parm{pipeline}->[-1];
274 ulpfr 13
275 ulpfr 10 croak "Cannot create index for table aready populated"
276     if $self->{nextk} > 1;
277 ulpfr 13
278 ulpfr 10 require WAIT::InvertedIndex;
279    
280     # backward compatibility stuff
281     my %opt = %parm;
282     for (qw(attribute pipeline predicate)) {
283     delete $opt{$_};
284     }
285 ulpfr 13
286 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288     filter => [@{$parm{pipeline}}], # clone
289     name => $name,
290     attr => $parm{attribute},
291     %opt, # backward compatibility stuff
292     );
293     # We will have to use $parm{predicate} here
294     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295     }
296    
297     sub dir {
298     $_[0]->{file};
299     }
300    
301     =head2 C<$tb-E<gt>layout>
302    
303     Returns the reference to the associated parser object.
304    
305     =cut
306    
307     sub layout { $_[0]->{layout} }
308    
309     =head2 C<$tb-E<gt>fields>
310    
311     Returns the array of attribute names.
312    
313     =cut
314    
315    
316     sub fields { keys %{$_[0]->{inverted}}}
317    
318     =head2 C<$tb-E<gt>drop>
319    
320     Must be called via C<WAIT::Database::drop_table>
321    
322     =cut
323    
324     sub drop {
325     my $self = shift;
326 ulpfr 35
327     unless ($self->{write_lock}){
328     warn "Cannot drop table without write lock. Nothing done";
329     return;
330     }
331    
332 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
333     $self->close; # just make sure
334 ulpfr 35
335 ulpfr 10 my $file = $self->{file};
336    
337     for (values %{$self->{indexes}}) {
338     $_->drop;
339     }
340     unlink "$file/records";
341 ulpfr 35 rmdir "$file/read" or warn "Could not rmdir '$file/read'";
342    
343 ulpfr 19 # $self->unlock;
344 ulpfr 10 ! (!-e $file or rmdir $file);
345     } else {
346     croak ref($self)."::drop called directly";
347     }
348     }
349    
350     sub mrequire ($) {
351     my $module = shift;
352    
353     $module =~ s{::}{/}g;
354     $module .= '.pm';
355     require $module;
356     }
357    
358     sub open {
359     my $self = shift;
360     my $file = $self->{file} . '/records';
361    
362     mrequire ref($self); # that's tricky eh?
363     if (defined $self->{'layout'}) {
364     mrequire ref($self->{'layout'});
365     }
366     if (defined $self->{'access'}) {
367     mrequire ref($self->{'access'});
368     }
369     if (exists $self->{indexes}) {
370     require WAIT::Index;
371     for (values %{$self->{indexes}}) {
372     $_->{mode} = $self->{mode};
373     }
374     }
375     if (exists $self->{inverted}) {
376     my ($att, $idx);
377     for $att (keys %{$self->{inverted}}) {
378     for $idx (@{$self->{inverted}->{$att}}) {
379     $idx->{mode} = $self->{mode};
380     }
381     }
382     require WAIT::InvertedIndex;
383     }
384     unless (defined $self->{dbh}) {
385     if ($USE_RECNO) {
386     $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
387     $self->{mode}, 0664, $DB_RECNO);
388     } else {
389     $self->{dbh} =
390     tie(%{$self->{db}}, 'DB_File', $file,
391     $self->{mode}, 0664, $DB_BTREE);
392     }
393     }
394 ulpfr 35
395     $self->getlock($self->{mode});
396    
397 ulpfr 10 $self;
398     }
399    
400     sub fetch_extern {
401     my $self = shift;
402    
403 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
404 ulpfr 10 if (exists $self->{'access'}) {
405     mrequire ref($self->{'access'});
406     $self->{'access'}->FETCH(@_);
407     }
408     }
409    
410     sub fetch_extern_by_id {
411     my $self = shift;
412    
413     $self->fetch_extern($self->fetch(@_));
414     }
415    
416     sub _find_index {
417     my $self = shift;
418     my (@att) = @_;
419     my %att;
420     my $name;
421 ulpfr 13
422 ulpfr 10 @att{@att} = @att;
423    
424     KEY: for $name (keys %{$self->{indexes}}) {
425     my @iat = split /-/, $name;
426     for (@iat) {
427     next KEY unless exists $att{$_};
428     }
429     return $self->{indexes}->{$name};
430     }
431     return undef;
432     }
433    
434     sub have {
435     my $self = shift;
436     my %parm = @_;
437    
438 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
439    
440 ulpfr 10 defined $self->{db} or $self->open;
441     return $index->have(@_);
442     }
443    
444     sub insert {
445     my $self = shift;
446     my %parm = @_;
447    
448     defined $self->{db} or $self->open;
449    
450 ulpfr 13 # We should move all writing methods to a subclass to check only once
451     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
452    
453 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
454     my $key;
455     my @deleted = keys %{$self->{deleted}};
456 ulpfr 19 my $gotkey = 0;
457 ulpfr 10
458     if (@deleted) {
459     $key = pop @deleted;
460     delete $self->{deleted}->{$key};
461 ulpfr 19 # Sanity check
462     if ($key && $key>0) {
463     $gotkey=1;
464 ulpfr 10 } else {
465 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
466     "key[%s]: Please rebuild index\n",
467     $key
468     ));
469     }
470     }
471     unless ($gotkey) {
472 ulpfr 10 $key = $self->{nextk}++;
473     }
474     if ($USE_RECNO) {
475     $self->{db}->[$key] = $tuple;
476     } else {
477     $self->{db}->{$key} = $tuple;
478     }
479     for (values %{$self->{indexes}}) {
480     unless ($_->insert($key, %parm)) {
481     # duplicate key, undo changes
482     if ($key == $self->{nextk}-1) {
483     $self->{nextk}--;
484     } else {
485 ulpfr 19 # warn "setting key[$key] deleted during insert";
486 ulpfr 10 $self->{deleted}->{$key}=1;
487     }
488     my $idx;
489     for $idx (values %{$self->{indexes}}) {
490     last if $idx eq $_;
491     $idx->remove($key, %parm);
492     }
493     return undef;
494 ulpfr 13 }
495 ulpfr 10 }
496     if (defined $self->{inverted}) {
497     my $att;
498     for $att (keys %{$self->{inverted}}) {
499     if (defined $parm{$att}) {
500     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
501     #map $_->sync, @{$self->{inverted}->{$att}}
502     }
503     }
504     }
505     $key
506     }
507    
508     sub sync {
509     my $self = shift;
510 ulpfr 13
511 ulpfr 10 for (values %{$self->{indexes}}) {
512     map $_->sync, $_;
513     }
514     if (defined $self->{inverted}) {
515     my $att;
516     for $att (keys %{$self->{inverted}}) {
517     map $_->sync, @{$self->{inverted}->{$att}}
518     }
519     }
520     }
521    
522     sub fetch {
523     my $self = shift;
524     my $key = shift;
525    
526     return () if exists $self->{deleted}->{$key};
527 ulpfr 13
528 ulpfr 10 defined $self->{db} or $self->open;
529     if ($USE_RECNO) {
530     $self->unpack($self->{db}->[$key]);
531     } else {
532     $self->unpack($self->{db}->{$key});
533     }
534     }
535    
536     sub delete_by_key {
537     my $self = shift;
538     my $key = shift;
539    
540 ulpfr 19 unless ($key) {
541     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
542     return;
543     }
544    
545 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
546     my %tuple = $self->fetch($key);
547     for (values %{$self->{indexes}}) {
548     $_->delete($key, %tuple);
549     }
550     if (defined $self->{inverted}) {
551     # User *must* provide the full record for this or the entries
552     # in the inverted index will not be removed
553     %tuple = (%tuple, @_);
554     my $att;
555     for $att (keys %{$self->{inverted}}) {
556     if (defined $tuple{$att}) {
557     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
558     }
559     }
560     }
561 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
562 ulpfr 10 ++$self->{deleted}->{$key};
563     }
564    
565     sub delete {
566     my $self = shift;
567     my $tkey = $self->have(@_);
568 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
569 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
570     }
571    
572     sub unpack {
573 laperla 31 my($self, $tuple) = @_;
574 ulpfr 10
575 laperla 31 unless (defined $tuple){
576     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
577     warn("Debug: somebody called unpack without argument tuple!");
578     return;
579     }
580    
581 ulpfr 10 my $att;
582     my @result;
583     my @tuple = split /$;/, $tuple;
584    
585     for $att (@{$self->{attr}}) {
586     push @result, $att, shift @tuple;
587     }
588     @result;
589     }
590    
591 ulpfr 19 sub set {
592     my ($self, $iattr, $value) = @_;
593    
594 laperla 31 unless ($self->{write_lock}){
595     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
596     return;
597 ulpfr 24 }
598 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
599     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
600     my $idx;
601     for $idx (@{$self->{inverted}->{$att}}) {
602     $idx->set($iattr, $value);
603     }
604     } else {
605     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
606     }
607     }
608    
609     1;
610     }
611    
612 ulpfr 10 sub close {
613     my $self = shift;
614    
615     if (exists $self->{'access'}) {
616     eval {$self->{'access'}->close}; # dont bother if not opened
617     }
618 laperla 31 if ($WAIT::Index::VERSION) {
619     for (values %{$self->{indexes}}) {
620     $_->close();
621     }
622 ulpfr 10 }
623 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
624     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
625     # if WAIT::InvertedIndex has not been loaded, they cannot have
626     # been altered so far
627 ulpfr 10 my $att;
628     for $att (keys %{$self->{inverted}}) {
629     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
630     my $idx;
631     for $idx (@{$self->{inverted}->{$att}}) {
632     $idx->close;
633     }
634     } else {
635     map $_->close(), @{$self->{inverted}->{$att}};
636     }
637     }
638     }
639     if ($self->{dbh}) {
640     delete $self->{dbh};
641    
642     if ($USE_RECNO) {
643     untie @{$self->{db}};
644     } else {
645     untie %{$self->{db}};
646     }
647     delete $self->{db};
648     }
649    
650 ulpfr 19 $self->unlock;
651    
652 ulpfr 10 1;
653     }
654    
655 ulpfr 35 # Locking
656     #
657     # We allow multiple readers to coexists. But write access excludes
658     # all read access and vice versa. In practice read access on tables
659     # open for writing will mostly work ;-)
660    
661     # If a "write" lock is requested, an existing "read" lock will be
662     # released. If a "read" lock ist requested, an existing "write" lock
663     # will be released. Requiring a lock already hold has no effect.
664 laperla 41
665 ulpfr 35 sub getlock {
666     my ($self, $mode) = @_;
667 laperla 41
668     # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
669     #
670     my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
671 ulpfr 35 my $file = $self->{file} . '/records';
672     my $lockdir = $self->{file} . '/read';
673    
674     unless (-d $lockdir) {
675     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
676     }
677    
678     if ($mode & O_RDWR) { # Get a write lock. Release it again
679     # and die if there is any valid
680     # readers.
681    
682     # Have a write lock already
683     return $self if $self->{write_lock};
684    
685     if ($self->{read_lock}) { # We are a becoming a writer now. So
686     # we release the read lock to avoid
687     # blocking ourselves.
688     $self->{read_lock}->release;
689     delete $self->{read_lock};
690     }
691    
692     # Get the preliminary write lock
693     $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
694     or die "Can't lock '$self->{file}/write'";
695    
696     # If we actually want to write we must check if there are any
697     # readers. The write lock is confirmed if wen cannot find any
698     # valid readers.
699    
700     local *DIR;
701     opendir DIR, $lockdir or
702     die "Could not opendir '$lockdir': $!";
703     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
704     # Check if the locks are still valid. Since we are protected by
705     # a write lock, we could use a plain file. But we want to use
706     # the stale testing from LockFile::Simple.
707     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
708     warn "Removing stale lockfile '$lockdir/$lockfile'";
709     $lck->release;
710     } else { # Found an active reader, rats!
711     $self->{write_lock}->release;
712     die "Cannot write table '$file' while it's in use";
713     }
714     }
715     closedir DIR;
716     } else {
717     # Have a read lock already
718     return $self if $self->{read_lock};
719    
720     # Get the preliminary write lock to protect the directory
721     # operations. If we already have a write lock, it will go.
722    
723     $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
724     or die "Can't lock '$self->{file}/write'";
725    
726     # Find a new read slot. Maybe the plain file would be better?
727     my $id = time;
728     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
729     $id++;
730     }
731    
732     $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
733     or die "Can't lock '$lockdir/$id'";
734    
735     # We are a reader now. So we release the write lock
736     $self->{write_lock}->release;
737     delete $self->{write_lock};
738     }
739     return $self;
740     }
741    
742 ulpfr 19 sub unlock {
743     my $self = shift;
744    
745     # Either we have a read or a write lock (or we close the table already)
746     # unless ($self->{read_lock} || $self->{write_lock}) {
747     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
748     # }
749     if ($self->{write_lock}) {
750     $self->{write_lock}->release();
751     delete $self->{write_lock};
752     }
753     if ($self->{read_lock}) {
754     $self->{read_lock}->release();
755     delete $self->{read_lock};
756     }
757    
758     }
759    
760 ulpfr 13 sub DESTROY {
761     my $self = shift;
762    
763 ulpfr 35 if ($self->{write_lock} || $self->{read_lock}) {
764     warn "Table handle destroyed without closing it first";
765     $self->unlock;
766     }
767 ulpfr 13 }
768    
769 ulpfr 10 sub open_scan {
770     my $self = shift;
771     my $code = shift;
772    
773     $self->{dbh} or $self->open;
774     require WAIT::Scan;
775     new WAIT::Scan $self, $self->{nextk}-1, $code;
776     }
777    
778     sub open_index_scan {
779     my $self = shift;
780     my $attr = shift;
781     my $code = shift;
782     my $name = join '-', @$attr;
783    
784     if (defined $self->{indexes}->{$name}) {
785     $self->{indexes}->{$name}->open_scan($code);
786     } else {
787     croak "No such index '$name'";
788     }
789     }
790    
791     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
792    
793     sub prefix {
794     my ($self , $attr, $prefix) = @_;
795     my %result;
796    
797     defined $self->{db} or $self->open; # require layout
798    
799     for (@{$self->{inverted}->{$attr}}) {
800     my $result = $_->prefix($prefix);
801     if (defined $result) {
802     $result{$_->name} = $result;
803     }
804     }
805     bless \%result, 'WAIT::Query::Raw';
806     }
807    
808     sub intervall {
809     my ($self, $attr, $lb, $ub) = @_;
810     my %result;
811    
812     defined $self->{db} or $self->open; # require layout
813    
814     for (@{$self->{inverted}->{$attr}}) {
815     my $result = $_->intervall($lb, $ub);
816     if (defined $result) {
817     $result{$_->name} = $result;
818     }
819     }
820     bless \%result, 'WAIT::Query::Raw';
821     }
822    
823     sub search {
824 ulpfr 19 my $self = shift;
825     my ($query, $attr, $cont, $raw);
826     if (ref $_[0]) {
827     $query = shift;
828    
829     $attr = $query->{attr};
830     $cont = $query->{cont};
831     $raw = $query->{raw};
832     } else {
833     require Carp;
834     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
835     $attr = shift;
836     $cont = shift;
837     $raw = shift;
838     $query = {
839     attr => $attr,
840     cont => $cont,
841     raw => $raw,
842     };
843     }
844    
845 ulpfr 10 my %result;
846    
847     defined $self->{db} or $self->open; # require layout
848    
849     if ($raw) {
850     for (@{$self->{inverted}->{$attr}}) {
851     my $name = $_->name;
852     if (exists $raw->{$name} and @{$raw->{$name}}) {
853     my $scale = 1/scalar(@{$raw->{$name}});
854 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
855 ulpfr 10 my ($key, $val);
856     while (($key, $val) = each %r) {
857     if (exists $result{$key}) {
858     $result{$key} += $val*$scale;
859     } else {
860     $result{$key} = $val*$scale;
861     }
862     }
863     }
864     }
865     }
866     if (defined $cont and $cont ne '') {
867     for (@{$self->{inverted}->{$attr}}) {
868 ulpfr 19 my %r = $_->search($query, $cont);
869 ulpfr 10 my ($key, $val);
870     while (($key, $val) = each %r) {
871     if (exists $result{$key}) {
872     $result{$key} += $val;
873     } else {
874     $result{$key} = $val;
875     }
876     }
877     }
878     }
879     # sanity check for deleted documents.
880     # this should not be necessary !@#$
881     for (keys %result) {
882     delete $result{$_} if $self->{deleted}->{$_}
883     }
884     %result;
885     }
886    
887     sub hilight_positions {
888     my ($self, $attr, $text, $query, $raw) = @_;
889     my %pos;
890    
891     if (defined $raw) {
892 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
893     # WAIT::InvertedIndex for
894     # this index field $attr
895 ulpfr 10 my $name = $_->name;
896     if (exists $raw->{$name}) {
897     my %qt;
898     grep $qt{$_}++, @{$raw->{$name}};
899     for ($_->parse_pos($text)) {
900     if (exists $qt{$_->[0]}) {
901     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
902     }
903     }
904     }
905     }
906     }
907     if (defined $query) {
908     for (@{$self->{inverted}->{$attr}}) {
909     my %qt;
910    
911     grep $qt{$_}++, $_->parse($query);
912     for ($_->parse_pos($text)) {
913     if (exists $qt{$_->[0]}) {
914     if (exists $pos{$_->[1]}) { # perl -w ;-)
915     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
916     } else {
917     $pos{$_->[1]} = length($_->[0]);
918     }
919     }
920     }
921     }
922     }
923    
924     \%pos;
925     }
926    
927     sub hilight {
928 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
929     my $layout = $tb->layout();
930    
931 ulpfr 10 my @result;
932    
933 ulpfr 13 $qplain ||= {};
934     $qraw ||= {};
935     my @ttxt = $layout->tag($buf);
936 ulpfr 10 while (@ttxt) {
937     no strict 'refs';
938     my %tag = %{shift @ttxt};
939     my $txt = shift @ttxt;
940     my $fld;
941    
942     my %hl;
943 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
944 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
945 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
946 ulpfr 10 for (keys %$hp) {
947     if (exists $hl{$_}) { # -w ;-(
948     $hl{$_} = max($hl{$_}, $hp->{$_});
949     } else {
950     $hl{$_} = $hp->{$_};
951     }
952     }
953     }
954     my $pos;
955     my $qt = {_qt => 1, %tag};
956     my $pl = \%tag;
957     my $last = length($txt);
958     my @tmp;
959     for $pos (sort {$b <=> $a} keys %hl) {
960     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
961     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
962     $last = $pos;
963     }
964     push @result, $pl, substr($txt,0,$last);
965     push @result, @tmp;
966     }
967     @result; # no speed necessary
968     }
969    
970     1;

Properties

Name Value
cvs2svn:cvs-rev 1.6

  ViewVC Help
Powered by ViewVC 1.1.26