/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (show annotations)
Fri May 3 16:16:10 2002 UTC (22 years ago) by ulpfr
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 25272 byte(s)
First stab at moving backend from DB_File to BerkeleyDB.

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Sat Apr 27 17:20:31 2002
8 # Language : CPerl
9 # Update Count : 172
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use BerkeleyDB;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-e $self->{file}){
163 warn "Warning: file '$self->{file}' already exists\n";
164 }
165
166 $self->{djk} = $parm{djk} if defined $parm{djk};
167 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
168 $self->{access} = $parm{access} if defined $parm{access};
169 $self->{nextk} = 1; # next record to insert; first record unused
170 $self->{deleted} = {}; # no deleted records yet
171 $self->{indexes} = {};
172
173 bless $self, $type;
174
175 # Checking for readers is not necessary, but let's go with the
176 # generic method.
177 $self->getlock(O_RDWR|O_CREAT); # dies when failing
178
179 # Call create_index() and create_index() for compatibility
180 for (@{$self->{keyset}||[]}) {
181 #carp "Specification of indexes at table create time is deprecated";
182 $self->create_index(@$_);
183 }
184 while (@{$parm{invindex}||[]}) {
185 # carp "Specification of inverted indexes at table create time is deprecated";
186 my $att = shift @{$parm{invindex}};
187 my @spec = @{shift @{$parm{invindex}}};
188 my @opt;
189
190 if (ref($spec[0])) {
191 carp "Secondary pipelines are deprecated\n";
192 @opt = %{shift @spec};
193 }
194 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
195 }
196
197 $self;
198 # end of backwarn compatibility stuff
199 }
200
201 =head2 Creating an index
202
203 $tb->create_index('docid');
204
205 =item C<create_index>
206
207 must be called with a list of attributes. This must be a subset of the
208 attributes specified when the table was created. Currently this
209 method must be called before the first tuple is inserted in the
210 table!
211
212 =cut
213
214 sub create_index {
215 my $self= shift;
216
217 croak "Cannot create index for table aready populated"
218 if $self->{nextk} > 1;
219
220 require WAIT::Index;
221
222 my $name = join '-', @_;
223 $self->{indexes}->{$name} =
224 new WAIT::Index file => $self->{file}, name => $name, attr => $_;
225 }
226
227 =head2 Creating an inverted index
228
229 $tb->create_inverted_index
230 (attribute => 'au',
231 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
232 predicate => 'plain',
233 );
234
235 =over 5
236
237 =item C<attribute>
238
239 The attribute to build the index on. This attribute may not be in the
240 set attributes specified when the table was created.
241
242 =item C<pipeline>
243
244 A piplines specification is a reference to an array of method names
245 (from package C<WAIT::Filter>) which are to be applied in sequence to
246 the contents of the named attribute. The attribute name may not be in
247 the attribute list.
248
249 =item C<predicate>
250
251 An indication which predicate the index implements. This may be
252 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
253 query processing. Currently there is no standard set of predicate
254 names. The predicate defaults to the last member of the pipeline if
255 omitted.
256
257 =back
258
259 Currently this method must be called before the first tuple is
260 inserted in the table!
261
262 =cut
263
264 sub create_inverted_index {
265 my $self = shift;
266 my %parm = @_;
267
268 croak "No attribute specified" unless $parm{attribute};
269 croak "No pipeline specified" unless $parm{pipeline};
270
271 $parm{predicate} ||= $parm{pipeline}->[-1];
272
273 croak "Cannot create index for table aready populated"
274 if $self->{nextk} > 1;
275
276 require WAIT::InvertedIndex;
277
278 # backward compatibility stuff
279 my %opt = %parm;
280 for (qw(attribute pipeline predicate)) {
281 delete $opt{$_};
282 }
283
284 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
285 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
286 filter => [@{$parm{pipeline}}], # clone
287 name => $name,
288 attr => $parm{attribute},
289 %opt, # backward compatibility stuff
290 );
291 # We will have to use $parm{predicate} here
292 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
293 }
294
295 sub dir {
296 $_[0]->{file};
297 }
298
299 =head2 C<$tb-E<gt>layout>
300
301 Returns the reference to the associated parser object.
302
303 =cut
304
305 sub layout { $_[0]->{layout} }
306
307 =head2 C<$tb-E<gt>fields>
308
309 Returns the array of attribute names.
310
311 =cut
312
313
314 sub fields { keys %{$_[0]->{inverted}}}
315
316 =head2 C<$tb-E<gt>drop>
317
318 Must be called via C<WAIT::Database::drop_table>
319
320 =cut
321
322 sub drop {
323 my $self = shift;
324
325 unless ($self->{write_lock}){
326 warn "Cannot drop table without write lock. Nothing done";
327 return;
328 }
329
330 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
331 $self->close; # just make sure
332
333 my $file = $self->{file};
334
335 for (values %{$self->{indexes}}) {
336 $_->drop;
337 }
338 rmdir "$file.read" or warn "Could not rmdir '$file/read'";
339 unlink "$file";
340
341 } else {
342 croak ref($self)."::drop called directly";
343 }
344 }
345
346 sub mrequire ($) {
347 my $module = shift;
348
349 $module =~ s{::}{/}g;
350 $module .= '.pm';
351 require $module;
352 }
353
354 sub open {
355 my $self = shift;
356 my $file = $self->{file} . '/records';
357
358 mrequire ref($self); # that's tricky eh?
359 if (defined $self->{'layout'}) {
360 mrequire ref($self->{'layout'});
361 }
362 if (defined $self->{'access'}) {
363 mrequire ref($self->{'access'});
364 }
365 if (exists $self->{indexes}) {
366 require WAIT::Index;
367 for (values %{$self->{indexes}}) {
368 $_->{mode} = $self->{mode};
369 }
370 }
371 if (exists $self->{inverted}) {
372 my ($att, $idx);
373 for $att (keys %{$self->{inverted}}) {
374 for $idx (@{$self->{inverted}->{$att}}) {
375 $idx->{mode} = $self->{mode};
376 }
377 }
378 require WAIT::InvertedIndex;
379 }
380
381 $self->getlock($self->{mode});
382
383 my $dbmode = ($self->{mode} & O_CREAT) ? DB_CREATE : 0;
384 unless (defined $self->{dbh}) {
385 if ($USE_RECNO) {
386 tie(%{$self->{db}}, 'BerkeleyDB::Recno',
387 -Filename => $self->{file},
388 -Subname => 'records',
389 -Flags => $dbmode);
390 } else {
391 $self->{dbh} =
392 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
393 -Filename => $self->{file},
394 -Subname => 'records',
395 -Mode => 0664,
396 -Flags => $dbmode);
397 }
398 }
399
400
401 $self;
402 }
403
404 sub fetch_extern {
405 my $self = shift;
406
407 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
408 if (exists $self->{'access'}) {
409 mrequire ref($self->{'access'});
410 $self->{'access'}->FETCH(@_);
411 }
412 }
413
414 sub fetch_extern_by_id {
415 my $self = shift;
416
417 $self->fetch_extern($self->fetch(@_));
418 }
419
420 sub _find_index {
421 my $self = shift;
422 my (@att) = @_;
423 my %att;
424 my $name;
425
426 @att{@att} = @att;
427
428 KEY: for $name (keys %{$self->{indexes}}) {
429 my @iat = split /-/, $name;
430 for (@iat) {
431 next KEY unless exists $att{$_};
432 }
433 return $self->{indexes}->{$name};
434 }
435 return undef;
436 }
437
438 sub have {
439 my $self = shift;
440 my %parm = @_;
441
442 my $index = $self->_find_index(keys %parm) or return; # no index-no have
443
444 defined $self->{db} or $self->open;
445 return $index->have(@_);
446 }
447
448 sub insert {
449 my $self = shift;
450 my %parm = @_;
451
452 defined $self->{db} or $self->open;
453
454 # We should move all writing methods to a subclass to check only once
455 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
456
457 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
458 my $key;
459 my @deleted = keys %{$self->{deleted}};
460 my $gotkey = 0;
461
462 if (@deleted) {
463 $key = pop @deleted;
464 delete $self->{deleted}->{$key};
465 # Sanity check
466 if ($key && $key>0) {
467 $gotkey=1;
468 } else {
469 warn(sprintf("WAIT database inconsistency during insert ".
470 "key[%s]: Please rebuild index\n",
471 $key
472 ));
473 }
474 }
475 unless ($gotkey) {
476 $key = $self->{nextk}++;
477 }
478 if ($USE_RECNO) {
479 $self->{db}->[$key] = $tuple;
480 } else {
481 $self->{db}->{$key} = $tuple;
482 }
483 for (values %{$self->{indexes}}) {
484 unless ($_->insert($key, %parm)) {
485 # duplicate key, undo changes
486 if ($key == $self->{nextk}-1) {
487 $self->{nextk}--;
488 } else {
489 # warn "setting key[$key] deleted during insert";
490 $self->{deleted}->{$key}=1;
491 }
492 my $idx;
493 for $idx (values %{$self->{indexes}}) {
494 last if $idx eq $_;
495 $idx->remove($key, %parm);
496 }
497 return undef;
498 }
499 }
500 if (defined $self->{inverted}) {
501 my $att;
502 for $att (keys %{$self->{inverted}}) {
503 if (defined $parm{$att}) {
504 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
505 #map $_->sync, @{$self->{inverted}->{$att}}
506 }
507 }
508 }
509 $key
510 }
511
512 sub sync {
513 my $self = shift;
514
515 for (values %{$self->{indexes}}) {
516 map $_->sync, $_;
517 }
518 if (defined $self->{inverted}) {
519 my $att;
520 for $att (keys %{$self->{inverted}}) {
521 map $_->sync, @{$self->{inverted}->{$att}}
522 }
523 }
524 }
525
526 sub fetch {
527 my $self = shift;
528 my $key = shift;
529
530 return () if exists $self->{deleted}->{$key};
531
532 defined $self->{db} or $self->open;
533 if ($USE_RECNO) {
534 $self->unpack($self->{db}->[$key]);
535 } else {
536 $self->unpack($self->{db}->{$key});
537 }
538 }
539
540 sub delete_by_key {
541 my $self = shift;
542 my $key = shift;
543
544 unless ($key) {
545 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
546 return;
547 }
548
549 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
550 my %tuple = $self->fetch($key);
551 for (values %{$self->{indexes}}) {
552 $_->delete($key, %tuple);
553 }
554 if (defined $self->{inverted}) {
555 # User *must* provide the full record for this or the entries
556 # in the inverted index will not be removed
557 %tuple = (%tuple, @_);
558 my $att;
559 for $att (keys %{$self->{inverted}}) {
560 if (defined $tuple{$att}) {
561 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
562 }
563 }
564 }
565 # warn "setting key[$key] deleted during delete_by_key";
566 ++$self->{deleted}->{$key};
567 }
568
569 sub delete {
570 my $self = shift;
571 my $tkey = $self->have(@_);
572 # warn "tkey[$tkey]\@_[@_]";
573 defined $tkey && $self->delete_by_key($tkey, @_);
574 }
575
576 sub unpack {
577 my($self, $tuple) = @_;
578
579 unless (defined $tuple){
580 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
581 warn("Debug: somebody called unpack without argument tuple!");
582 return;
583 }
584
585 my $att;
586 my @result;
587 my @tuple = split /$;/, $tuple;
588
589 for $att (@{$self->{attr}}) {
590 push @result, $att, shift @tuple;
591 }
592 @result;
593 }
594
595 sub set {
596 my ($self, $iattr, $value) = @_;
597
598 unless ($self->{write_lock}){
599 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
600 return;
601 }
602
603 # in the rare case that they haven't written a single record yet, we
604 # make sure, the inverted inherits our $self->{mode}:
605 defined $self->{db} or $self->open;
606
607 for my $att (keys %{$self->{inverted}}) {
608 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
609 my $idx;
610 for $idx (@{$self->{inverted}->{$att}}) {
611 $idx->set($iattr, $value);
612 }
613 } else {
614 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
615 }
616 }
617
618 1;
619 }
620
621 sub close {
622 my $self = shift;
623
624 if (exists $self->{'access'}) {
625 eval {$self->{'access'}->close}; # dont bother if not opened
626 }
627 if ($WAIT::Index::VERSION) {
628 for (values %{$self->{indexes}}) {
629 $_->close();
630 }
631 }
632 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
633 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
634 # if WAIT::InvertedIndex has not been loaded, they cannot have
635 # been altered so far
636 my $att;
637 for $att (keys %{$self->{inverted}}) {
638 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
639 my $idx;
640 for $idx (@{$self->{inverted}->{$att}}) {
641 $idx->close;
642 }
643 } else {
644 map $_->close(), @{$self->{inverted}->{$att}};
645 }
646 }
647 }
648 if ($self->{dbh}) {
649 delete $self->{dbh};
650
651 if ($USE_RECNO) {
652 untie @{$self->{db}};
653 } else {
654 untie %{$self->{db}};
655 }
656 delete $self->{db};
657 }
658
659 $self->unlock;
660
661 1;
662 }
663
664 # Locking
665 #
666 # We allow multiple readers to coexists. But write access excludes
667 # all read access and vice versa. In practice read access on tables
668 # open for writing will mostly work ;-)
669
670 # If a "write" lock is requested, an existing "read" lock will be
671 # released. If a "read" lock ist requested, an existing "write" lock
672 # will be released. Requiring a lock already hold has no effect.
673
674 sub getlock {
675 my ($self, $mode) = @_;
676
677 # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
678 #
679 my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
680 my $file = $self->{file};
681 my $lockdir = $self->{file} . '.read';
682
683 unless (-d $lockdir) {
684 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
685 }
686
687 if ($mode & O_RDWR) { # Get a write lock. Release it again
688 # and die if there is any valid
689 # readers.
690
691 # Have a write lock already
692 return $self if $self->{write_lock};
693
694 if ($self->{read_lock}) { # We are a becoming a writer now. So
695 # we release the read lock to avoid
696 # blocking ourselves.
697 $self->{read_lock}->release;
698 delete $self->{read_lock};
699 }
700
701 # Get the preliminary write lock
702 $self->{write_lock} = $lockmgr->lock($self->{file} . '.write')
703 or die "Can't lock '$self->{file}.write'";
704
705 # If we actually want to write we must check if there are any
706 # readers. The write lock is confirmed if wen cannot find any
707 # valid readers.
708
709 local *DIR;
710 opendir DIR, $lockdir or
711 die "Could not opendir '$lockdir': $!";
712 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
713 # Check if the locks are still valid. Since we are protected by
714 # a write lock, we could use a plain file. But we want to use
715 # the stale testing from LockFile::Simple.
716 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
717 warn "Removing stale lockfile '$lockdir/$lockfile'";
718 $lck->release;
719 } else { # Found an active reader, rats!
720 $self->{write_lock}->release;
721 die "Cannot write table '$file' while it's in use";
722 }
723 }
724 closedir DIR;
725 } else {
726 # Have a read lock already
727 return $self if $self->{read_lock};
728
729 # Get the preliminary write lock to protect the directory
730 # operations.
731
732 my $write_lock = $lockmgr->lock($self->{file} . '.read/write')
733 or die "Can't lock '$self->{file}.read/write'";
734
735 # Find a new read slot. Maybe the plain file would be better?
736 my $id = time;
737 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
738 $id++;
739 }
740
741 $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
742 or die "Can't lock '$lockdir/$id'";
743
744 # We are a reader now. So we release the write lock
745 $write_lock->release;
746 }
747 return $self;
748 }
749
750 sub unlock {
751 my $self = shift;
752
753 # Either we have a read or a write lock (or we close the table already)
754 # unless ($self->{read_lock} || $self->{write_lock}) {
755 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
756 # }
757 if ($self->{write_lock}) {
758 $self->{write_lock}->release();
759 delete $self->{write_lock};
760 }
761 if ($self->{read_lock}) {
762 $self->{read_lock}->release();
763 delete $self->{read_lock};
764 }
765
766 }
767
768 sub DESTROY {
769 my $self = shift;
770
771 if ($self->{write_lock} || $self->{read_lock}) {
772 warn "Table handle destroyed without closing it first";
773 $self->unlock;
774 }
775 }
776
777 sub open_scan {
778 my $self = shift;
779 my $code = shift;
780
781 $self->{dbh} or $self->open;
782 require WAIT::Scan;
783 new WAIT::Scan $self, $self->{nextk}-1, $code;
784 }
785
786 sub open_index_scan {
787 my $self = shift;
788 my $attr = shift;
789 my $code = shift;
790 my $name = join '-', @$attr;
791
792 if (defined $self->{indexes}->{$name}) {
793 $self->{indexes}->{$name}->open_scan($code);
794 } else {
795 croak "No such index '$name'";
796 }
797 }
798
799 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
800
801 sub prefix {
802 my ($self , $attr, $prefix) = @_;
803 my %result;
804
805 defined $self->{db} or $self->open; # require layout
806
807 for (@{$self->{inverted}->{$attr}}) {
808 my $result = $_->prefix($prefix);
809 if (defined $result) {
810 $result{$_->name} = $result;
811 }
812 }
813 bless \%result, 'WAIT::Query::Raw';
814 }
815
816 sub intervall {
817 my ($self, $attr, $lb, $ub) = @_;
818 my %result;
819
820 defined $self->{db} or $self->open; # require layout
821
822 for (@{$self->{inverted}->{$attr}}) {
823 my $result = $_->intervall($lb, $ub);
824 if (defined $result) {
825 $result{$_->name} = $result;
826 }
827 }
828 bless \%result, 'WAIT::Query::Raw';
829 }
830
831 sub search {
832 my $self = shift;
833 my ($query, $attr, $cont, $raw);
834 if (ref $_[0]) {
835 $query = shift;
836
837 $attr = $query->{attr};
838 $cont = $query->{cont};
839 $raw = $query->{raw};
840 } else {
841 require Carp;
842 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
843 $attr = shift;
844 $cont = shift;
845 $raw = shift;
846 $query = {
847 attr => $attr,
848 cont => $cont,
849 raw => $raw,
850 };
851 }
852
853 my %result;
854
855 defined $self->{db} or $self->open; # require layout
856
857 if ($raw) {
858 for (@{$self->{inverted}->{$attr}}) {
859 my $name = $_->name;
860 if (exists $raw->{$name} and @{$raw->{$name}}) {
861 my $scale = 1/scalar(@{$raw->{$name}});
862 my %r = $_->search_raw($query, @{$raw->{$name}});
863 my ($key, $val);
864 while (($key, $val) = each %r) {
865 if (exists $result{$key}) {
866 $result{$key} += $val*$scale;
867 } else {
868 $result{$key} = $val*$scale;
869 }
870 }
871 }
872 }
873 }
874 if (defined $cont and $cont ne '') {
875 for (@{$self->{inverted}->{$attr}}) {
876 my %r = $_->search($query, $cont);
877 my ($key, $val);
878 while (($key, $val) = each %r) {
879 if (exists $result{$key}) {
880 $result{$key} += $val;
881 } else {
882 $result{$key} = $val;
883 }
884 }
885 }
886 }
887 # sanity check for deleted documents.
888 # this should not be necessary !@#$
889 for (keys %result) {
890 delete $result{$_} if $self->{deleted}->{$_}
891 }
892 %result;
893 }
894
895 sub hilight_positions {
896 my ($self, $attr, $text, $query, $raw) = @_;
897 my %pos;
898
899 if (defined $raw) {
900 for (@{$self->{inverted}->{$attr}}) { # objects of type
901 # WAIT::InvertedIndex for
902 # this index field $attr
903 my $name = $_->name;
904 if (exists $raw->{$name}) {
905 my %qt;
906 grep $qt{$_}++, @{$raw->{$name}};
907 for ($_->parse_pos($text)) {
908 if (exists $qt{$_->[0]}) {
909 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
910 }
911 }
912 }
913 }
914 }
915 if (defined $query) {
916 for (@{$self->{inverted}->{$attr}}) {
917 my %qt;
918
919 grep $qt{$_}++, $_->parse($query);
920 for ($_->parse_pos($text)) {
921 if (exists $qt{$_->[0]}) {
922 if (exists $pos{$_->[1]}) { # perl -w ;-)
923 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
924 } else {
925 $pos{$_->[1]} = length($_->[0]);
926 }
927 }
928 }
929 }
930 }
931
932 \%pos;
933 }
934
935 sub hilight {
936 my ($tb, $buf, $qplain, $qraw) = @_;
937 my $layout = $tb->layout();
938
939 my @result;
940
941 $qplain ||= {};
942 $qraw ||= {};
943 my @ttxt = $layout->tag($buf);
944 while (@ttxt) {
945 no strict 'refs';
946 my %tag = %{shift @ttxt};
947 my $txt = shift @ttxt;
948 my $fld;
949
950 my %hl;
951 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
952 my $hp = $tb->hilight_positions($fld, $txt,
953 $qplain->{$fld}, $qraw->{$fld});
954 for (keys %$hp) {
955 if (exists $hl{$_}) { # -w ;-(
956 $hl{$_} = max($hl{$_}, $hp->{$_});
957 } else {
958 $hl{$_} = $hp->{$_};
959 }
960 }
961 }
962 my $pos;
963 my $qt = {_qt => 1, %tag};
964 my $pl = \%tag;
965 my $last = length($txt);
966 my @tmp;
967 for $pos (sort {$b <=> $a} keys %hl) {
968 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
969 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
970 $last = $pos;
971 }
972 push @result, $pl, substr($txt,0,$last);
973 push @result, @tmp;
974 }
975 @result; # no speed necessary
976 }
977
978 1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26