/[wait]/cvs-head/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 24 - (show annotations)
Sat Nov 11 17:21:28 2000 UTC (23 years, 5 months ago) by ulpfr
File size: 23415 byte(s)
Die if attribute setting is not possible.

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Fri May 19 14:51:14 2000
8 # Language : CPerl
9 # Update Count : 133
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169 # aquire a write lock
170 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
171 or die "Can't lock '$self->{file}/write'";
172
173 $self->{djk} = $parm{djk} if defined $parm{djk};
174 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
175 $self->{access} = $parm{access} if defined $parm{access};
176 $self->{nextk} = 1; # next record to insert; first record unused
177 $self->{deleted} = {}; # no deleted records yet
178 $self->{indexes} = {};
179
180 bless $self, $type;
181 # Call create_index() and create_index() for compatibility
182 for (@{$self->{keyset}||[]}) {
183 #carp "Specification of indexes at table create time is deprecated";
184 $self->create_index(@$_);
185 }
186 while (@{$parm{invindex}||[]}) {
187 # carp "Specification of inverted indexes at table create time is deprecated";
188 my $att = shift @{$parm{invindex}};
189 my @spec = @{shift @{$parm{invindex}}};
190 my @opt;
191
192 if (ref($spec[0])) {
193 carp "Secondary pipelines are deprecated\n";
194 @opt = %{shift @spec};
195 }
196 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197 }
198
199 $self;
200 # end of backwarn compatibility stuff
201 }
202
203 =head2 Creating an index
204
205 $tb->create_index('docid');
206
207 =item C<create_index>
208
209 must be called with a list of attributes. This must be a subset of the
210 attributes specified when the table was created. Currently this
211 method must be called before the first tuple is inserted in the
212 table!
213
214 =cut
215
216 sub create_index {
217 my $self= shift;
218
219 croak "Cannot create index for table aready populated"
220 if $self->{nextk} > 1;
221
222 require WAIT::Index;
223
224 my $name = join '-', @_;
225 $self->{indexes}->{$name} =
226 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 }
228
229 =head2 Creating an inverted index
230
231 $tb->create_inverted_index
232 (attribute => 'au',
233 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234 predicate => 'plain',
235 );
236
237 =over 5
238
239 =item C<attribute>
240
241 The attribute to build the index on. This attribute may not be in the
242 set attributes specified when the table was created.
243
244 =item C<pipeline>
245
246 A piplines specification is a reference to an array of method names
247 (from package C<WAIT::Filter>) which are to be applied in sequence to
248 the contents of the named attribute. The attribute name may not be in
249 the attribute list.
250
251 =item C<predicate>
252
253 An indication which predicate the index implements. This may be
254 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255 query processing. Currently there is no standard set of predicate
256 names. The predicate defaults to the last member of the pipeline if
257 omitted.
258
259 =back
260
261 Currently this method must be called before the first tuple is
262 inserted in the table!
263
264 =cut
265
266 sub create_inverted_index {
267 my $self = shift;
268 my %parm = @_;
269
270 croak "No attribute specified" unless $parm{attribute};
271 croak "No pipeline specified" unless $parm{pipeline};
272
273 $parm{predicate} ||= $parm{pipeline}->[-1];
274
275 croak "Cannot create index for table aready populated"
276 if $self->{nextk} > 1;
277
278 require WAIT::InvertedIndex;
279
280 # backward compatibility stuff
281 my %opt = %parm;
282 for (qw(attribute pipeline predicate)) {
283 delete $opt{$_};
284 }
285
286 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288 filter => [@{$parm{pipeline}}], # clone
289 name => $name,
290 attr => $parm{attribute},
291 %opt, # backward compatibility stuff
292 );
293 # We will have to use $parm{predicate} here
294 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295 }
296
297 sub dir {
298 $_[0]->{file};
299 }
300
301 =head2 C<$tb-E<gt>layout>
302
303 Returns the reference to the associated parser object.
304
305 =cut
306
307 sub layout { $_[0]->{layout} }
308
309 =head2 C<$tb-E<gt>fields>
310
311 Returns the array of attribute names.
312
313 =cut
314
315
316 sub fields { keys %{$_[0]->{inverted}}}
317
318 =head2 C<$tb-E<gt>drop>
319
320 Must be called via C<WAIT::Database::drop_table>
321
322 =cut
323
324 sub drop {
325 my $self = shift;
326 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
327 $self->close; # just make sure
328 my $file = $self->{file};
329
330 for (values %{$self->{indexes}}) {
331 $_->drop;
332 }
333 unlink "$file/records";
334 # $self->unlock;
335 ! (!-e $file or rmdir $file);
336 } else {
337 croak ref($self)."::drop called directly";
338 }
339 }
340
341 sub mrequire ($) {
342 my $module = shift;
343
344 $module =~ s{::}{/}g;
345 $module .= '.pm';
346 require $module;
347 }
348
349 sub open {
350 my $self = shift;
351 my $file = $self->{file} . '/records';
352
353 mrequire ref($self); # that's tricky eh?
354 if (defined $self->{'layout'}) {
355 mrequire ref($self->{'layout'});
356 }
357 if (defined $self->{'access'}) {
358 mrequire ref($self->{'access'});
359 }
360 if (exists $self->{indexes}) {
361 require WAIT::Index;
362 for (values %{$self->{indexes}}) {
363 $_->{mode} = $self->{mode};
364 }
365 }
366 if (exists $self->{inverted}) {
367 my ($att, $idx);
368 for $att (keys %{$self->{inverted}}) {
369 for $idx (@{$self->{inverted}->{$att}}) {
370 $idx->{mode} = $self->{mode};
371 }
372 }
373 require WAIT::InvertedIndex;
374 }
375 unless (defined $self->{dbh}) {
376 if ($USE_RECNO) {
377 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
378 $self->{mode}, 0664, $DB_RECNO);
379 } else {
380 $self->{dbh} =
381 tie(%{$self->{db}}, 'DB_File', $file,
382 $self->{mode}, 0664, $DB_BTREE);
383 }
384 }
385
386 # Locking
387 #
388 # We allow multiple readers to coexists. But write access excludes
389 # all read access vice versa. In practice read access on tables
390 # open for writing will mostly work ;-)
391
392 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
393
394 # aquire a write lock. We might hold one acquired in create() already
395 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
396 or die "Can't lock '$self->{file}/write'";
397
398 my $lockdir = $self->{file} . '/read';
399 unless (-d $lockdir) {
400 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
401 }
402
403 if ($self->{mode} & O_RDWR) {
404 # this is a hack. We do not check for reopening ...
405 return $self if $self->{write_lock};
406
407 # If we actually want to write we must check if there are any readers
408 opendir DIR, $lockdir or
409 die "Could not opendir '$lockdir': $!";
410 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
411 # check if the locks are still valid.
412 # Since we are protected by a write lock, we could use a pline file.
413 # But we want to use the stale testing from LockFile::Simple.
414 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
415 warn "Removing stale lockfile '$lockdir/$lockfile'";
416 $lck->release;
417 } else {
418 $self->{write_lock}->release;
419 die "Cannot write table '$file' while it's in use";
420 }
421 }
422 } else {
423 # this is a hack. We do not check for reopening ...
424 return $self if $self->{read_lock};
425
426 # We are a reader. So we release the write lock
427 my $id = time;
428 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
429 $id++;
430 }
431 $self->{read_lock} = $lockmgr->lock("$lockdir/$id");
432 $self->{write_lock}->release;
433 delete $self->{write_lock};
434 }
435
436 $self;
437 }
438
439 sub fetch_extern {
440 my $self = shift;
441
442 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
443 if (exists $self->{'access'}) {
444 mrequire ref($self->{'access'});
445 $self->{'access'}->FETCH(@_);
446 }
447 }
448
449 sub fetch_extern_by_id {
450 my $self = shift;
451
452 $self->fetch_extern($self->fetch(@_));
453 }
454
455 sub _find_index {
456 my $self = shift;
457 my (@att) = @_;
458 my %att;
459 my $name;
460
461 @att{@att} = @att;
462
463 KEY: for $name (keys %{$self->{indexes}}) {
464 my @iat = split /-/, $name;
465 for (@iat) {
466 next KEY unless exists $att{$_};
467 }
468 return $self->{indexes}->{$name};
469 }
470 return undef;
471 }
472
473 sub have {
474 my $self = shift;
475 my %parm = @_;
476
477 my $index = $self->_find_index(keys %parm) or return; # no index-no have
478
479 defined $self->{db} or $self->open;
480 return $index->have(@_);
481 }
482
483 sub insert {
484 my $self = shift;
485 my %parm = @_;
486
487 defined $self->{db} or $self->open;
488
489 # We should move all writing methods to a subclass to check only once
490 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
491
492 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
493 my $key;
494 my @deleted = keys %{$self->{deleted}};
495 my $gotkey = 0;
496
497 if (@deleted) {
498 $key = pop @deleted;
499 delete $self->{deleted}->{$key};
500 # Sanity check
501 if ($key && $key>0) {
502 $gotkey=1;
503 } else {
504 warn(sprintf("WAIT database inconsistency during insert ".
505 "key[%s]: Please rebuild index\n",
506 $key
507 ));
508 }
509 }
510 unless ($gotkey) {
511 $key = $self->{nextk}++;
512 }
513 if ($USE_RECNO) {
514 $self->{db}->[$key] = $tuple;
515 } else {
516 $self->{db}->{$key} = $tuple;
517 }
518 for (values %{$self->{indexes}}) {
519 unless ($_->insert($key, %parm)) {
520 # duplicate key, undo changes
521 if ($key == $self->{nextk}-1) {
522 $self->{nextk}--;
523 } else {
524 # warn "setting key[$key] deleted during insert";
525 $self->{deleted}->{$key}=1;
526 }
527 my $idx;
528 for $idx (values %{$self->{indexes}}) {
529 last if $idx eq $_;
530 $idx->remove($key, %parm);
531 }
532 return undef;
533 }
534 }
535 if (defined $self->{inverted}) {
536 my $att;
537 for $att (keys %{$self->{inverted}}) {
538 if (defined $parm{$att}) {
539 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
540 #map $_->sync, @{$self->{inverted}->{$att}}
541 }
542 }
543 }
544 $key
545 }
546
547 sub sync {
548 my $self = shift;
549
550 for (values %{$self->{indexes}}) {
551 map $_->sync, $_;
552 }
553 if (defined $self->{inverted}) {
554 my $att;
555 for $att (keys %{$self->{inverted}}) {
556 map $_->sync, @{$self->{inverted}->{$att}}
557 }
558 }
559 }
560
561 sub fetch {
562 my $self = shift;
563 my $key = shift;
564
565 return () if exists $self->{deleted}->{$key};
566
567 defined $self->{db} or $self->open;
568 if ($USE_RECNO) {
569 $self->unpack($self->{db}->[$key]);
570 } else {
571 $self->unpack($self->{db}->{$key});
572 }
573 }
574
575 sub delete_by_key {
576 my $self = shift;
577 my $key = shift;
578
579 unless ($key) {
580 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
581 return;
582 }
583
584 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
585 my %tuple = $self->fetch($key);
586 for (values %{$self->{indexes}}) {
587 $_->delete($key, %tuple);
588 }
589 if (defined $self->{inverted}) {
590 # User *must* provide the full record for this or the entries
591 # in the inverted index will not be removed
592 %tuple = (%tuple, @_);
593 my $att;
594 for $att (keys %{$self->{inverted}}) {
595 if (defined $tuple{$att}) {
596 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
597 }
598 }
599 }
600 # warn "setting key[$key] deleted during delete_by_key";
601 ++$self->{deleted}->{$key};
602 }
603
604 sub delete {
605 my $self = shift;
606 my $tkey = $self->have(@_);
607 # warn "tkey[$tkey]\@_[@_]";
608 defined $tkey && $self->delete_by_key($tkey, @_);
609 }
610
611 sub unpack {
612 my $self = shift;
613 my $tuple = shift;
614 return unless defined $tuple;
615
616 my $att;
617 my @result;
618 my @tuple = split /$;/, $tuple;
619
620 for $att (@{$self->{attr}}) {
621 push @result, $att, shift @tuple;
622 }
623 @result;
624 }
625
626 sub set {
627 my ($self, $iattr, $value) = @_;
628
629 unless ($self->{write_lock}) {
630 die "Cannot set attribute $iattr without having a write lock. Nothing done";
631 }
632 for my $att (keys %{$self->{inverted}}) {
633 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
634 my $idx;
635 for $idx (@{$self->{inverted}->{$att}}) {
636 $idx->set($iattr, $value);
637 }
638 } else {
639 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
640 }
641 }
642
643 1;
644 }
645
646 sub close {
647 my $self = shift;
648
649 if (exists $self->{'access'}) {
650 eval {$self->{'access'}->close}; # dont bother if not opened
651 }
652 for (values %{$self->{indexes}}) {
653 require WAIT::Index;
654 $_->close();
655 }
656 if (defined $self->{inverted}) {
657 require WAIT::InvertedIndex;
658 my $att;
659 for $att (keys %{$self->{inverted}}) {
660 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
661 my $idx;
662 for $idx (@{$self->{inverted}->{$att}}) {
663 $idx->close;
664 }
665 } else {
666 map $_->close(), @{$self->{inverted}->{$att}};
667 }
668 }
669 }
670 if ($self->{dbh}) {
671 delete $self->{dbh};
672
673 if ($USE_RECNO) {
674 untie @{$self->{db}};
675 } else {
676 untie %{$self->{db}};
677 }
678 delete $self->{db};
679 }
680
681 $self->unlock;
682
683 1;
684 }
685
686 sub unlock {
687 my $self = shift;
688
689 # Either we have a read or a write lock (or we close the table already)
690 # unless ($self->{read_lock} || $self->{write_lock}) {
691 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
692 # }
693 if ($self->{write_lock}) {
694 $self->{write_lock}->release();
695 delete $self->{write_lock};
696 }
697 if ($self->{read_lock}) {
698 $self->{read_lock}->release();
699 delete $self->{read_lock};
700 }
701
702 }
703
704 sub DESTROY {
705 my $self = shift;
706
707 warn "Table handle destroyed without closing it first"
708 if $self->{write_lock} || $self->{read_lock};
709 }
710
711 sub open_scan {
712 my $self = shift;
713 my $code = shift;
714
715 $self->{dbh} or $self->open;
716 require WAIT::Scan;
717 new WAIT::Scan $self, $self->{nextk}-1, $code;
718 }
719
720 sub open_index_scan {
721 my $self = shift;
722 my $attr = shift;
723 my $code = shift;
724 my $name = join '-', @$attr;
725
726 if (defined $self->{indexes}->{$name}) {
727 $self->{indexes}->{$name}->open_scan($code);
728 } else {
729 croak "No such index '$name'";
730 }
731 }
732
733 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
734
735 sub prefix {
736 my ($self , $attr, $prefix) = @_;
737 my %result;
738
739 defined $self->{db} or $self->open; # require layout
740
741 for (@{$self->{inverted}->{$attr}}) {
742 my $result = $_->prefix($prefix);
743 if (defined $result) {
744 $result{$_->name} = $result;
745 }
746 }
747 bless \%result, 'WAIT::Query::Raw';
748 }
749
750 sub intervall {
751 my ($self, $attr, $lb, $ub) = @_;
752 my %result;
753
754 defined $self->{db} or $self->open; # require layout
755
756 for (@{$self->{inverted}->{$attr}}) {
757 my $result = $_->intervall($lb, $ub);
758 if (defined $result) {
759 $result{$_->name} = $result;
760 }
761 }
762 bless \%result, 'WAIT::Query::Raw';
763 }
764
765 sub search {
766 my $self = shift;
767 my ($query, $attr, $cont, $raw);
768 if (ref $_[0]) {
769 $query = shift;
770
771 $attr = $query->{attr};
772 $cont = $query->{cont};
773 $raw = $query->{raw};
774 } else {
775 require Carp;
776 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
777 $attr = shift;
778 $cont = shift;
779 $raw = shift;
780 $query = {
781 attr => $attr,
782 cont => $cont,
783 raw => $raw,
784 };
785 }
786
787 my %result;
788
789 defined $self->{db} or $self->open; # require layout
790
791 if ($raw) {
792 for (@{$self->{inverted}->{$attr}}) {
793 my $name = $_->name;
794 if (exists $raw->{$name} and @{$raw->{$name}}) {
795 my $scale = 1/scalar(@{$raw->{$name}});
796 my %r = $_->search_raw($query, @{$raw->{$name}});
797 my ($key, $val);
798 while (($key, $val) = each %r) {
799 if (exists $result{$key}) {
800 $result{$key} += $val*$scale;
801 } else {
802 $result{$key} = $val*$scale;
803 }
804 }
805 }
806 }
807 }
808 if (defined $cont and $cont ne '') {
809 for (@{$self->{inverted}->{$attr}}) {
810 my %r = $_->search($query, $cont);
811 my ($key, $val);
812 while (($key, $val) = each %r) {
813 if (exists $result{$key}) {
814 $result{$key} += $val;
815 } else {
816 $result{$key} = $val;
817 }
818 }
819 }
820 }
821 # sanity check for deleted documents.
822 # this should not be necessary !@#$
823 for (keys %result) {
824 delete $result{$_} if $self->{deleted}->{$_}
825 }
826 %result;
827 }
828
829 sub hilight_positions {
830 my ($self, $attr, $text, $query, $raw) = @_;
831 my %pos;
832
833 if (defined $raw) {
834 for (@{$self->{inverted}->{$attr}}) { # objects of type
835 # WAIT::InvertedIndex for
836 # this index field $attr
837 my $name = $_->name;
838 if (exists $raw->{$name}) {
839 my %qt;
840 grep $qt{$_}++, @{$raw->{$name}};
841 for ($_->parse_pos($text)) {
842 if (exists $qt{$_->[0]}) {
843 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
844 }
845 }
846 }
847 }
848 }
849 if (defined $query) {
850 for (@{$self->{inverted}->{$attr}}) {
851 my %qt;
852
853 grep $qt{$_}++, $_->parse($query);
854 for ($_->parse_pos($text)) {
855 if (exists $qt{$_->[0]}) {
856 if (exists $pos{$_->[1]}) { # perl -w ;-)
857 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
858 } else {
859 $pos{$_->[1]} = length($_->[0]);
860 }
861 }
862 }
863 }
864 }
865
866 \%pos;
867 }
868
869 sub hilight {
870 my ($tb, $buf, $qplain, $qraw) = @_;
871 my $layout = $tb->layout();
872
873 my @result;
874
875 $qplain ||= {};
876 $qraw ||= {};
877 my @ttxt = $layout->tag($buf);
878 while (@ttxt) {
879 no strict 'refs';
880 my %tag = %{shift @ttxt};
881 my $txt = shift @ttxt;
882 my $fld;
883
884 my %hl;
885 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
886 my $hp = $tb->hilight_positions($fld, $txt,
887 $qplain->{$fld}, $qraw->{$fld});
888 for (keys %$hp) {
889 if (exists $hl{$_}) { # -w ;-(
890 $hl{$_} = max($hl{$_}, $hp->{$_});
891 } else {
892 $hl{$_} = $hp->{$_};
893 }
894 }
895 }
896 my $pos;
897 my $qt = {_qt => 1, %tag};
898 my $pl = \%tag;
899 my $last = length($txt);
900 my @tmp;
901 for $pos (sort {$b <=> $a} keys %hl) {
902 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
903 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
904 $last = $pos;
905 }
906 push @result, $pl, substr($txt,0,$last);
907 push @result, @tmp;
908 }
909 @result; # no speed necessary
910 }
911
912 1;

Properties

Name Value
cvs2svn:cvs-rev 1.2

  ViewVC Help
Powered by ViewVC 1.1.26