/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (hide annotations)
Sun Nov 12 01:26:10 2000 UTC (23 years, 6 months ago) by laperla
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 23764 byte(s)
Table loads Index and doesn't load InvertedIndex anymore

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 ulpfr 24 # Last Modified On: Fri May 19 14:51:14 2000
8 ulpfr 10 # Language : CPerl
9 ulpfr 24 # Update Count : 133
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 ulpfr 13
29     use WAIT::Table::Handle ();
30 ulpfr 10 require WAIT::Parse::Base;
31 ulpfr 13
32 ulpfr 10 use strict;
33     use Carp;
34 ulpfr 13 # use autouse Carp => qw( croak($) );
35 ulpfr 10 use DB_File;
36     use Fcntl;
37 ulpfr 19 use LockFile::Simple ();
38 ulpfr 10
39     my $USE_RECNO = 0;
40    
41     =head2 Creating a Table.
42    
43 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
44 ulpfr 10 create_table method of a database handle. This is not enforced, but
45 ulpfr 13 creating a table does not make any sense unless the table is
46 ulpfr 10 registered by the database because the latter implements persistence
47     of the meta data. Registering is done automatically by letting the
48 ulpfr 13 database handle the creation of a table.
49 ulpfr 10
50 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
51     my $tb = $db->create_table(name => 'test',
52     access => $access,
53     layout => $layout,
54     attr => ['docid', 'headline'],
55     );
56 ulpfr 10
57     The constructor returns a handle for the table. This handle is hidden by the
58     table module, to prevent direct access if called via Table.
59    
60     =over 10
61    
62 ulpfr 13 =item C<access> => I<accessobj>
63 ulpfr 10
64 ulpfr 13 A reference to an access object for the external parts (attributes) of
65 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
66     objects are completely stored inside the system to avoid duplication.
67 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
68 ulpfr 10 the system when indexing your WWW-Server.
69    
70 ulpfr 13 The access object is designed to work like as a tied hash. You pass
71     the refernce to the object, not the tied hash though. An example
72     implementation of an access class that works for manpages is
73     WAIT::Document::Nroff.
74    
75     The implementation needs to take into account that WAIT will keep this
76     object in a Data::Dumper or Storable database and re-use it when sman
77     is run. So it is not good enough if we can produce the index with it
78     now, when we create or actively access the table, WAIT also must be
79     able to retrieve documents on its own, when we are in a different
80     context. This happens specifically in a retrieval. To get this working
81     seemlessly, the access-defining class must implement a close method.
82     This method will be called before the Data::Dumper dump takes place.
83     In that moment the access-defining class must get rid of all data
84     structures that cannot be reconstructed via the Data::Dumper dump,
85     such as database handles or C pointers.
86    
87 ulpfr 10 =item C<file> => I<fname>
88    
89     The filename of the records file. Files for indexes will have I<fname>
90 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
91     WAIT::Database handle when the constructor is called via
92     WAIT::Database::create_table().
93 ulpfr 10
94     =item C<name> => I<name>
95    
96     The name of this table. I<Mandatory>
97    
98     =item C<attr> => [ I<attr> ... ]
99    
100 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
101     contents of these attributes in its table. I<Mandatory>
102 ulpfr 10
103     =item C<djk> => [ I<attr> ... ]
104    
105     A reference to an array of attribute names which make up the
106 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
107 ulpfr 10
108     =item C<layout> => I<layoutobj>
109    
110 ulpfr 13 A reference to an external parser object. Defaults to a new instance
111     of C<WAIT::Parse::Base>. For an example implementation see
112     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113     class if you so like.
114 ulpfr 10
115 ulpfr 13 =item C<keyset> => I<keyset>
116 ulpfr 10
117 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
118     attributes.
119 ulpfr 10
120 ulpfr 13 =item C<invindex> => I<inverted index>
121    
122     A reference to an anon array defining attributes of each record that
123     need to be indexed. See the source of smakewhatis for how to set this
124     up.
125    
126 ulpfr 10 =back
127    
128     =cut
129    
130     sub new {
131     my $type = shift;
132     my %parm = @_;
133     my $self = {};
134    
135 ulpfr 13 # Check for mandatory attrs early
136     $self->{name} = $parm{name} or croak "No name specified";
137     $self->{attr} = $parm{attr} or croak "No attributes specified";
138    
139 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
140     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141 ulpfr 13
142 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
143 ulpfr 13
144 ulpfr 10 # Determine and set up subclass
145     $type = ref($type) || $type;
146     if (defined $parm{djk}) {
147     if (@{$parm{djk}} == @{$parm{attr}}) {
148     # All attributes in DK (sloppy test here!)
149     $type .= '::Independent';
150     require WAIT::Table::Independent;
151     } else {
152     $type .= '::Disjoint';
153     require WAIT::Table::Disjoint;
154     }
155     # Add '_weight' to attributes
156     my %attr;
157     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159     }
160    
161     $self->{file} = $parm{file} or croak "No file specified";
162 ulpfr 13 if (-d $self->{file}){
163     warn "Warning: Directory '$self->{file}' already exists\n";
164     } elsif (!mkdir($self->{file}, 0775)) {
165 ulpfr 10 croak "Could not 'mkdir $self->{file}': $!\n";
166     }
167 ulpfr 19
168     my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169     # aquire a write lock
170     $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
171     or die "Can't lock '$self->{file}/write'";
172    
173 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
174     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
175     $self->{access} = $parm{access} if defined $parm{access};
176     $self->{nextk} = 1; # next record to insert; first record unused
177     $self->{deleted} = {}; # no deleted records yet
178     $self->{indexes} = {};
179    
180     bless $self, $type;
181     # Call create_index() and create_index() for compatibility
182     for (@{$self->{keyset}||[]}) {
183     #carp "Specification of indexes at table create time is deprecated";
184     $self->create_index(@$_);
185     }
186     while (@{$parm{invindex}||[]}) {
187     # carp "Specification of inverted indexes at table create time is deprecated";
188     my $att = shift @{$parm{invindex}};
189     my @spec = @{shift @{$parm{invindex}}};
190     my @opt;
191 ulpfr 13
192 ulpfr 10 if (ref($spec[0])) {
193     carp "Secondary pipelines are deprecated\n";
194     @opt = %{shift @spec};
195     }
196     $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197     }
198 ulpfr 19
199 ulpfr 10 $self;
200     # end of backwarn compatibility stuff
201     }
202    
203     =head2 Creating an index
204    
205     $tb->create_index('docid');
206    
207     =item C<create_index>
208    
209     must be called with a list of attributes. This must be a subset of the
210     attributes specified when the table was created. Currently this
211     method must be called before the first tuple is inserted in the
212     table!
213    
214     =cut
215    
216     sub create_index {
217     my $self= shift;
218 ulpfr 13
219 ulpfr 10 croak "Cannot create index for table aready populated"
220     if $self->{nextk} > 1;
221 ulpfr 13
222 ulpfr 10 require WAIT::Index;
223 ulpfr 13
224 ulpfr 10 my $name = join '-', @_;
225     $self->{indexes}->{$name} =
226     new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227     }
228    
229     =head2 Creating an inverted index
230    
231     $tb->create_inverted_index
232     (attribute => 'au',
233     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234     predicate => 'plain',
235     );
236    
237     =over 5
238    
239     =item C<attribute>
240    
241     The attribute to build the index on. This attribute may not be in the
242     set attributes specified when the table was created.
243    
244     =item C<pipeline>
245    
246 ulpfr 13 A piplines specification is a reference to an array of method names
247     (from package C<WAIT::Filter>) which are to be applied in sequence to
248     the contents of the named attribute. The attribute name may not be in
249     the attribute list.
250 ulpfr 10
251     =item C<predicate>
252    
253     An indication which predicate the index implements. This may be
254     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255     query processing. Currently there is no standard set of predicate
256 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
257 ulpfr 10 omitted.
258    
259     =back
260    
261     Currently this method must be called before the first tuple is
262     inserted in the table!
263    
264     =cut
265    
266     sub create_inverted_index {
267     my $self = shift;
268     my %parm = @_;
269    
270     croak "No attribute specified" unless $parm{attribute};
271     croak "No pipeline specified" unless $parm{pipeline};
272    
273     $parm{predicate} ||= $parm{pipeline}->[-1];
274 ulpfr 13
275 ulpfr 10 croak "Cannot create index for table aready populated"
276     if $self->{nextk} > 1;
277 ulpfr 13
278 ulpfr 10 require WAIT::InvertedIndex;
279    
280     # backward compatibility stuff
281     my %opt = %parm;
282     for (qw(attribute pipeline predicate)) {
283     delete $opt{$_};
284     }
285 ulpfr 13
286 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287     my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288     filter => [@{$parm{pipeline}}], # clone
289     name => $name,
290     attr => $parm{attribute},
291     %opt, # backward compatibility stuff
292     );
293     # We will have to use $parm{predicate} here
294     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295     }
296    
297     sub dir {
298     $_[0]->{file};
299     }
300    
301     =head2 C<$tb-E<gt>layout>
302    
303     Returns the reference to the associated parser object.
304    
305     =cut
306    
307     sub layout { $_[0]->{layout} }
308    
309     =head2 C<$tb-E<gt>fields>
310    
311     Returns the array of attribute names.
312    
313     =cut
314    
315    
316     sub fields { keys %{$_[0]->{inverted}}}
317    
318     =head2 C<$tb-E<gt>drop>
319    
320     Must be called via C<WAIT::Database::drop_table>
321    
322     =cut
323    
324     sub drop {
325     my $self = shift;
326     if ((caller)[0] eq 'WAIT::Database') { # database knows about this
327     $self->close; # just make sure
328     my $file = $self->{file};
329    
330     for (values %{$self->{indexes}}) {
331     $_->drop;
332     }
333     unlink "$file/records";
334 ulpfr 19 # $self->unlock;
335 ulpfr 10 ! (!-e $file or rmdir $file);
336     } else {
337     croak ref($self)."::drop called directly";
338     }
339     }
340    
341     sub mrequire ($) {
342     my $module = shift;
343    
344     $module =~ s{::}{/}g;
345     $module .= '.pm';
346     require $module;
347     }
348    
349     sub open {
350     my $self = shift;
351     my $file = $self->{file} . '/records';
352    
353     mrequire ref($self); # that's tricky eh?
354     if (defined $self->{'layout'}) {
355     mrequire ref($self->{'layout'});
356     }
357     if (defined $self->{'access'}) {
358     mrequire ref($self->{'access'});
359     }
360     if (exists $self->{indexes}) {
361     require WAIT::Index;
362     for (values %{$self->{indexes}}) {
363     $_->{mode} = $self->{mode};
364     }
365     }
366     if (exists $self->{inverted}) {
367     my ($att, $idx);
368     for $att (keys %{$self->{inverted}}) {
369     for $idx (@{$self->{inverted}->{$att}}) {
370     $idx->{mode} = $self->{mode};
371     }
372     }
373     require WAIT::InvertedIndex;
374     }
375     unless (defined $self->{dbh}) {
376     if ($USE_RECNO) {
377     $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
378     $self->{mode}, 0664, $DB_RECNO);
379     } else {
380     $self->{dbh} =
381     tie(%{$self->{db}}, 'DB_File', $file,
382     $self->{mode}, 0664, $DB_BTREE);
383     }
384     }
385 ulpfr 19
386     # Locking
387     #
388     # We allow multiple readers to coexists. But write access excludes
389     # all read access vice versa. In practice read access on tables
390     # open for writing will mostly work ;-)
391    
392     my $lockmgr = LockFile::Simple->make(-autoclean => 1);
393    
394     # aquire a write lock. We might hold one acquired in create() already
395     $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
396     or die "Can't lock '$self->{file}/write'";
397    
398     my $lockdir = $self->{file} . '/read';
399     unless (-d $lockdir) {
400     mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
401     }
402    
403     if ($self->{mode} & O_RDWR) {
404     # this is a hack. We do not check for reopening ...
405     return $self if $self->{write_lock};
406    
407     # If we actually want to write we must check if there are any readers
408 laperla 31 local *DIR;
409 ulpfr 19 opendir DIR, $lockdir or
410     die "Could not opendir '$lockdir': $!";
411     for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
412     # check if the locks are still valid.
413     # Since we are protected by a write lock, we could use a pline file.
414     # But we want to use the stale testing from LockFile::Simple.
415     if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
416     warn "Removing stale lockfile '$lockdir/$lockfile'";
417     $lck->release;
418     } else {
419     $self->{write_lock}->release;
420     die "Cannot write table '$file' while it's in use";
421     }
422     }
423 laperla 31 closedir DIR;
424 ulpfr 19 } else {
425     # this is a hack. We do not check for reopening ...
426     return $self if $self->{read_lock};
427    
428     # We are a reader. So we release the write lock
429     my $id = time;
430     while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
431     $id++;
432     }
433     $self->{read_lock} = $lockmgr->lock("$lockdir/$id");
434     $self->{write_lock}->release;
435     delete $self->{write_lock};
436     }
437    
438 ulpfr 10 $self;
439     }
440    
441     sub fetch_extern {
442     my $self = shift;
443    
444 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
445 ulpfr 10 if (exists $self->{'access'}) {
446     mrequire ref($self->{'access'});
447     $self->{'access'}->FETCH(@_);
448     }
449     }
450    
451     sub fetch_extern_by_id {
452     my $self = shift;
453    
454     $self->fetch_extern($self->fetch(@_));
455     }
456    
457     sub _find_index {
458     my $self = shift;
459     my (@att) = @_;
460     my %att;
461     my $name;
462 ulpfr 13
463 ulpfr 10 @att{@att} = @att;
464    
465     KEY: for $name (keys %{$self->{indexes}}) {
466     my @iat = split /-/, $name;
467     for (@iat) {
468     next KEY unless exists $att{$_};
469     }
470     return $self->{indexes}->{$name};
471     }
472     return undef;
473     }
474    
475     sub have {
476     my $self = shift;
477     my %parm = @_;
478    
479 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
480    
481 ulpfr 10 defined $self->{db} or $self->open;
482     return $index->have(@_);
483     }
484    
485     sub insert {
486     my $self = shift;
487     my %parm = @_;
488    
489     defined $self->{db} or $self->open;
490    
491 ulpfr 13 # We should move all writing methods to a subclass to check only once
492     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
493    
494 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
495     my $key;
496     my @deleted = keys %{$self->{deleted}};
497 ulpfr 19 my $gotkey = 0;
498 ulpfr 10
499     if (@deleted) {
500     $key = pop @deleted;
501     delete $self->{deleted}->{$key};
502 ulpfr 19 # Sanity check
503     if ($key && $key>0) {
504     $gotkey=1;
505 ulpfr 10 } else {
506 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
507     "key[%s]: Please rebuild index\n",
508     $key
509     ));
510     }
511     }
512     unless ($gotkey) {
513 ulpfr 10 $key = $self->{nextk}++;
514     }
515     if ($USE_RECNO) {
516     $self->{db}->[$key] = $tuple;
517     } else {
518     $self->{db}->{$key} = $tuple;
519     }
520     for (values %{$self->{indexes}}) {
521     unless ($_->insert($key, %parm)) {
522     # duplicate key, undo changes
523     if ($key == $self->{nextk}-1) {
524     $self->{nextk}--;
525     } else {
526 ulpfr 19 # warn "setting key[$key] deleted during insert";
527 ulpfr 10 $self->{deleted}->{$key}=1;
528     }
529     my $idx;
530     for $idx (values %{$self->{indexes}}) {
531     last if $idx eq $_;
532     $idx->remove($key, %parm);
533     }
534     return undef;
535 ulpfr 13 }
536 ulpfr 10 }
537     if (defined $self->{inverted}) {
538     my $att;
539     for $att (keys %{$self->{inverted}}) {
540     if (defined $parm{$att}) {
541     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
542     #map $_->sync, @{$self->{inverted}->{$att}}
543     }
544     }
545     }
546     $key
547     }
548    
549     sub sync {
550     my $self = shift;
551 ulpfr 13
552 ulpfr 10 for (values %{$self->{indexes}}) {
553     map $_->sync, $_;
554     }
555     if (defined $self->{inverted}) {
556     my $att;
557     for $att (keys %{$self->{inverted}}) {
558     map $_->sync, @{$self->{inverted}->{$att}}
559     }
560     }
561     }
562    
563     sub fetch {
564     my $self = shift;
565     my $key = shift;
566    
567     return () if exists $self->{deleted}->{$key};
568 ulpfr 13
569 ulpfr 10 defined $self->{db} or $self->open;
570     if ($USE_RECNO) {
571     $self->unpack($self->{db}->[$key]);
572     } else {
573     $self->unpack($self->{db}->{$key});
574     }
575     }
576    
577     sub delete_by_key {
578     my $self = shift;
579     my $key = shift;
580    
581 ulpfr 19 unless ($key) {
582     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
583     return;
584     }
585    
586 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
587     my %tuple = $self->fetch($key);
588     for (values %{$self->{indexes}}) {
589     $_->delete($key, %tuple);
590     }
591     if (defined $self->{inverted}) {
592     # User *must* provide the full record for this or the entries
593     # in the inverted index will not be removed
594     %tuple = (%tuple, @_);
595     my $att;
596     for $att (keys %{$self->{inverted}}) {
597     if (defined $tuple{$att}) {
598     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
599     }
600     }
601     }
602 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
603 ulpfr 10 ++$self->{deleted}->{$key};
604     }
605    
606     sub delete {
607     my $self = shift;
608     my $tkey = $self->have(@_);
609 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
610 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
611     }
612    
613     sub unpack {
614 laperla 31 my($self, $tuple) = @_;
615 ulpfr 10
616 laperla 31 unless (defined $tuple){
617     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
618     warn("Debug: somebody called unpack without argument tuple!");
619     return;
620     }
621    
622 ulpfr 10 my $att;
623     my @result;
624     my @tuple = split /$;/, $tuple;
625    
626     for $att (@{$self->{attr}}) {
627     push @result, $att, shift @tuple;
628     }
629     @result;
630     }
631    
632 ulpfr 19 sub set {
633     my ($self, $iattr, $value) = @_;
634    
635 laperla 31 unless ($self->{write_lock}){
636     warn "Cannot set iattr[$iattr] without write lock. Nothing done";
637     return;
638 ulpfr 24 }
639 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
640     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
641     my $idx;
642     for $idx (@{$self->{inverted}->{$att}}) {
643     $idx->set($iattr, $value);
644     }
645     } else {
646     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
647     }
648     }
649    
650     1;
651     }
652    
653 ulpfr 10 sub close {
654     my $self = shift;
655    
656     if (exists $self->{'access'}) {
657     eval {$self->{'access'}->close}; # dont bother if not opened
658     }
659 laperla 31 if ($WAIT::Index::VERSION) {
660     for (values %{$self->{indexes}}) {
661     $_->close();
662     }
663 ulpfr 10 }
664 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
665     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
666     # if WAIT::InvertedIndex has not been loaded, they cannot have
667     # been altered so far
668 ulpfr 10 my $att;
669     for $att (keys %{$self->{inverted}}) {
670     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
671     my $idx;
672     for $idx (@{$self->{inverted}->{$att}}) {
673     $idx->close;
674     }
675     } else {
676     map $_->close(), @{$self->{inverted}->{$att}};
677     }
678     }
679     }
680     if ($self->{dbh}) {
681     delete $self->{dbh};
682    
683     if ($USE_RECNO) {
684     untie @{$self->{db}};
685     } else {
686     untie %{$self->{db}};
687     }
688     delete $self->{db};
689     }
690    
691 ulpfr 19 $self->unlock;
692    
693 ulpfr 10 1;
694     }
695    
696 ulpfr 19 sub unlock {
697     my $self = shift;
698    
699     # Either we have a read or a write lock (or we close the table already)
700     # unless ($self->{read_lock} || $self->{write_lock}) {
701     # warn "WAIT::Table::unlock: Table aparently hold's no lock"
702     # }
703     if ($self->{write_lock}) {
704     $self->{write_lock}->release();
705     delete $self->{write_lock};
706     }
707     if ($self->{read_lock}) {
708     $self->{read_lock}->release();
709     delete $self->{read_lock};
710     }
711    
712     }
713    
714 ulpfr 13 sub DESTROY {
715     my $self = shift;
716    
717     warn "Table handle destroyed without closing it first"
718 ulpfr 19 if $self->{write_lock} || $self->{read_lock};
719 ulpfr 13 }
720    
721 ulpfr 10 sub open_scan {
722     my $self = shift;
723     my $code = shift;
724    
725     $self->{dbh} or $self->open;
726     require WAIT::Scan;
727     new WAIT::Scan $self, $self->{nextk}-1, $code;
728     }
729    
730     sub open_index_scan {
731     my $self = shift;
732     my $attr = shift;
733     my $code = shift;
734     my $name = join '-', @$attr;
735    
736     if (defined $self->{indexes}->{$name}) {
737     $self->{indexes}->{$name}->open_scan($code);
738     } else {
739     croak "No such index '$name'";
740     }
741     }
742    
743     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
744    
745     sub prefix {
746     my ($self , $attr, $prefix) = @_;
747     my %result;
748    
749     defined $self->{db} or $self->open; # require layout
750    
751     for (@{$self->{inverted}->{$attr}}) {
752     my $result = $_->prefix($prefix);
753     if (defined $result) {
754     $result{$_->name} = $result;
755     }
756     }
757     bless \%result, 'WAIT::Query::Raw';
758     }
759    
760     sub intervall {
761     my ($self, $attr, $lb, $ub) = @_;
762     my %result;
763    
764     defined $self->{db} or $self->open; # require layout
765    
766     for (@{$self->{inverted}->{$attr}}) {
767     my $result = $_->intervall($lb, $ub);
768     if (defined $result) {
769     $result{$_->name} = $result;
770     }
771     }
772     bless \%result, 'WAIT::Query::Raw';
773     }
774    
775     sub search {
776 ulpfr 19 my $self = shift;
777     my ($query, $attr, $cont, $raw);
778     if (ref $_[0]) {
779     $query = shift;
780    
781     $attr = $query->{attr};
782     $cont = $query->{cont};
783     $raw = $query->{raw};
784     } else {
785     require Carp;
786     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
787     $attr = shift;
788     $cont = shift;
789     $raw = shift;
790     $query = {
791     attr => $attr,
792     cont => $cont,
793     raw => $raw,
794     };
795     }
796    
797 ulpfr 10 my %result;
798    
799     defined $self->{db} or $self->open; # require layout
800    
801     if ($raw) {
802     for (@{$self->{inverted}->{$attr}}) {
803     my $name = $_->name;
804     if (exists $raw->{$name} and @{$raw->{$name}}) {
805     my $scale = 1/scalar(@{$raw->{$name}});
806 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
807 ulpfr 10 my ($key, $val);
808     while (($key, $val) = each %r) {
809     if (exists $result{$key}) {
810     $result{$key} += $val*$scale;
811     } else {
812     $result{$key} = $val*$scale;
813     }
814     }
815     }
816     }
817     }
818     if (defined $cont and $cont ne '') {
819     for (@{$self->{inverted}->{$attr}}) {
820 ulpfr 19 my %r = $_->search($query, $cont);
821 ulpfr 10 my ($key, $val);
822     while (($key, $val) = each %r) {
823     if (exists $result{$key}) {
824     $result{$key} += $val;
825     } else {
826     $result{$key} = $val;
827     }
828     }
829     }
830     }
831     # sanity check for deleted documents.
832     # this should not be necessary !@#$
833     for (keys %result) {
834     delete $result{$_} if $self->{deleted}->{$_}
835     }
836     %result;
837     }
838    
839     sub hilight_positions {
840     my ($self, $attr, $text, $query, $raw) = @_;
841     my %pos;
842    
843     if (defined $raw) {
844 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
845     # WAIT::InvertedIndex for
846     # this index field $attr
847 ulpfr 10 my $name = $_->name;
848     if (exists $raw->{$name}) {
849     my %qt;
850     grep $qt{$_}++, @{$raw->{$name}};
851     for ($_->parse_pos($text)) {
852     if (exists $qt{$_->[0]}) {
853     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
854     }
855     }
856     }
857     }
858     }
859     if (defined $query) {
860     for (@{$self->{inverted}->{$attr}}) {
861     my %qt;
862    
863     grep $qt{$_}++, $_->parse($query);
864     for ($_->parse_pos($text)) {
865     if (exists $qt{$_->[0]}) {
866     if (exists $pos{$_->[1]}) { # perl -w ;-)
867     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
868     } else {
869     $pos{$_->[1]} = length($_->[0]);
870     }
871     }
872     }
873     }
874     }
875    
876     \%pos;
877     }
878    
879     sub hilight {
880 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
881     my $layout = $tb->layout();
882    
883 ulpfr 10 my @result;
884    
885 ulpfr 13 $qplain ||= {};
886     $qraw ||= {};
887     my @ttxt = $layout->tag($buf);
888 ulpfr 10 while (@ttxt) {
889     no strict 'refs';
890     my %tag = %{shift @ttxt};
891     my $txt = shift @ttxt;
892     my $fld;
893    
894     my %hl;
895 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
896 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
897 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
898 ulpfr 10 for (keys %$hp) {
899     if (exists $hl{$_}) { # -w ;-(
900     $hl{$_} = max($hl{$_}, $hp->{$_});
901     } else {
902     $hl{$_} = $hp->{$_};
903     }
904     }
905     }
906     my $pos;
907     my $qt = {_qt => 1, %tag};
908     my $pl = \%tag;
909     my $last = length($txt);
910     my @tmp;
911     for $pos (sort {$b <=> $a} keys %hl) {
912     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
913     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
914     $last = $pos;
915     }
916     push @result, $pl, substr($txt,0,$last);
917     push @result, @tmp;
918     }
919     @result; # no speed necessary
920     }
921    
922     1;

Properties

Name Value
cvs2svn:cvs-rev 1.3

  ViewVC Help
Powered by ViewVC 1.1.26