/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (show annotations)
Tue May 9 11:29:45 2000 UTC (24 years ago) by ulpfr
Original Path: branches/CPAN/lib/WAIT/Table.pm
File size: 23301 byte(s)
Import of WAIT-1.800

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Mon May 8 20:20:58 2000
8 # Language : CPerl
9 # Update Count : 131
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169 # aquire a write lock
170 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
171 or die "Can't lock '$self->{file}/write'";
172
173 $self->{djk} = $parm{djk} if defined $parm{djk};
174 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
175 $self->{access} = $parm{access} if defined $parm{access};
176 $self->{nextk} = 1; # next record to insert; first record unused
177 $self->{deleted} = {}; # no deleted records yet
178 $self->{indexes} = {};
179
180 bless $self, $type;
181 # Call create_index() and create_index() for compatibility
182 for (@{$self->{keyset}||[]}) {
183 #carp "Specification of indexes at table create time is deprecated";
184 $self->create_index(@$_);
185 }
186 while (@{$parm{invindex}||[]}) {
187 # carp "Specification of inverted indexes at table create time is deprecated";
188 my $att = shift @{$parm{invindex}};
189 my @spec = @{shift @{$parm{invindex}}};
190 my @opt;
191
192 if (ref($spec[0])) {
193 carp "Secondary pipelines are deprecated\n";
194 @opt = %{shift @spec};
195 }
196 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197 }
198
199 $self;
200 # end of backwarn compatibility stuff
201 }
202
203 =head2 Creating an index
204
205 $tb->create_index('docid');
206
207 =item C<create_index>
208
209 must be called with a list of attributes. This must be a subset of the
210 attributes specified when the table was created. Currently this
211 method must be called before the first tuple is inserted in the
212 table!
213
214 =cut
215
216 sub create_index {
217 my $self= shift;
218
219 croak "Cannot create index for table aready populated"
220 if $self->{nextk} > 1;
221
222 require WAIT::Index;
223
224 my $name = join '-', @_;
225 $self->{indexes}->{$name} =
226 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 }
228
229 =head2 Creating an inverted index
230
231 $tb->create_inverted_index
232 (attribute => 'au',
233 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234 predicate => 'plain',
235 );
236
237 =over 5
238
239 =item C<attribute>
240
241 The attribute to build the index on. This attribute may not be in the
242 set attributes specified when the table was created.
243
244 =item C<pipeline>
245
246 A piplines specification is a reference to an array of method names
247 (from package C<WAIT::Filter>) which are to be applied in sequence to
248 the contents of the named attribute. The attribute name may not be in
249 the attribute list.
250
251 =item C<predicate>
252
253 An indication which predicate the index implements. This may be
254 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255 query processing. Currently there is no standard set of predicate
256 names. The predicate defaults to the last member of the pipeline if
257 omitted.
258
259 =back
260
261 Currently this method must be called before the first tuple is
262 inserted in the table!
263
264 =cut
265
266 sub create_inverted_index {
267 my $self = shift;
268 my %parm = @_;
269
270 croak "No attribute specified" unless $parm{attribute};
271 croak "No pipeline specified" unless $parm{pipeline};
272
273 $parm{predicate} ||= $parm{pipeline}->[-1];
274
275 croak "Cannot create index for table aready populated"
276 if $self->{nextk} > 1;
277
278 require WAIT::InvertedIndex;
279
280 # backward compatibility stuff
281 my %opt = %parm;
282 for (qw(attribute pipeline predicate)) {
283 delete $opt{$_};
284 }
285
286 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288 filter => [@{$parm{pipeline}}], # clone
289 name => $name,
290 attr => $parm{attribute},
291 %opt, # backward compatibility stuff
292 );
293 # We will have to use $parm{predicate} here
294 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295 }
296
297 sub dir {
298 $_[0]->{file};
299 }
300
301 =head2 C<$tb-E<gt>layout>
302
303 Returns the reference to the associated parser object.
304
305 =cut
306
307 sub layout { $_[0]->{layout} }
308
309 =head2 C<$tb-E<gt>fields>
310
311 Returns the array of attribute names.
312
313 =cut
314
315
316 sub fields { keys %{$_[0]->{inverted}}}
317
318 =head2 C<$tb-E<gt>drop>
319
320 Must be called via C<WAIT::Database::drop_table>
321
322 =cut
323
324 sub drop {
325 my $self = shift;
326 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
327 $self->close; # just make sure
328 my $file = $self->{file};
329
330 for (values %{$self->{indexes}}) {
331 $_->drop;
332 }
333 unlink "$file/records";
334 # $self->unlock;
335 ! (!-e $file or rmdir $file);
336 } else {
337 croak ref($self)."::drop called directly";
338 }
339 }
340
341 sub mrequire ($) {
342 my $module = shift;
343
344 $module =~ s{::}{/}g;
345 $module .= '.pm';
346 require $module;
347 }
348
349 sub open {
350 my $self = shift;
351 my $file = $self->{file} . '/records';
352
353 mrequire ref($self); # that's tricky eh?
354 if (defined $self->{'layout'}) {
355 mrequire ref($self->{'layout'});
356 }
357 if (defined $self->{'access'}) {
358 mrequire ref($self->{'access'});
359 }
360 if (exists $self->{indexes}) {
361 require WAIT::Index;
362 for (values %{$self->{indexes}}) {
363 $_->{mode} = $self->{mode};
364 }
365 }
366 if (exists $self->{inverted}) {
367 my ($att, $idx);
368 for $att (keys %{$self->{inverted}}) {
369 for $idx (@{$self->{inverted}->{$att}}) {
370 $idx->{mode} = $self->{mode};
371 }
372 }
373 require WAIT::InvertedIndex;
374 }
375 unless (defined $self->{dbh}) {
376 if ($USE_RECNO) {
377 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
378 $self->{mode}, 0664, $DB_RECNO);
379 } else {
380 $self->{dbh} =
381 tie(%{$self->{db}}, 'DB_File', $file,
382 $self->{mode}, 0664, $DB_BTREE);
383 }
384 }
385
386 # Locking
387 #
388 # We allow multiple readers to coexists. But write access excludes
389 # all read access vice versa. In practice read access on tables
390 # open for writing will mostly work ;-)
391
392 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
393
394 # aquire a write lock. We might hold one acquired in create() already
395 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
396 or die "Can't lock '$self->{file}/write'";
397
398 my $lockdir = $self->{file} . '/read';
399 unless (-d $lockdir) {
400 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
401 }
402
403 if ($self->{mode} & O_RDWR) {
404 # this is a hack. We do not check for reopening ...
405 return $self if $self->{write_lock};
406
407 # If we actually want to write we must check if there are any readers
408 opendir DIR, $lockdir or
409 die "Could not opendir '$lockdir': $!";
410 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
411 # check if the locks are still valid.
412 # Since we are protected by a write lock, we could use a pline file.
413 # But we want to use the stale testing from LockFile::Simple.
414 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
415 warn "Removing stale lockfile '$lockdir/$lockfile'";
416 $lck->release;
417 } else {
418 $self->{write_lock}->release;
419 die "Cannot write table '$file' while it's in use";
420 }
421 }
422 } else {
423 # this is a hack. We do not check for reopening ...
424 return $self if $self->{read_lock};
425
426 # We are a reader. So we release the write lock
427 my $id = time;
428 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
429 $id++;
430 }
431 $self->{read_lock} = $lockmgr->lock("$lockdir/$id");
432 $self->{write_lock}->release;
433 delete $self->{write_lock};
434 }
435
436 $self;
437 }
438
439 sub fetch_extern {
440 my $self = shift;
441
442 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
443 if (exists $self->{'access'}) {
444 mrequire ref($self->{'access'});
445 $self->{'access'}->FETCH(@_);
446 }
447 }
448
449 sub fetch_extern_by_id {
450 my $self = shift;
451
452 $self->fetch_extern($self->fetch(@_));
453 }
454
455 sub _find_index {
456 my $self = shift;
457 my (@att) = @_;
458 my %att;
459 my $name;
460
461 @att{@att} = @att;
462
463 KEY: for $name (keys %{$self->{indexes}}) {
464 my @iat = split /-/, $name;
465 for (@iat) {
466 next KEY unless exists $att{$_};
467 }
468 return $self->{indexes}->{$name};
469 }
470 return undef;
471 }
472
473 sub have {
474 my $self = shift;
475 my %parm = @_;
476
477 my $index = $self->_find_index(keys %parm) or return; # no index-no have
478
479 defined $self->{db} or $self->open;
480 return $index->have(@_);
481 }
482
483 sub insert {
484 my $self = shift;
485 my %parm = @_;
486
487 defined $self->{db} or $self->open;
488
489 # We should move all writing methods to a subclass to check only once
490 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
491
492 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
493 my $key;
494 my @deleted = keys %{$self->{deleted}};
495 my $gotkey = 0;
496
497 if (@deleted) {
498 $key = pop @deleted;
499 delete $self->{deleted}->{$key};
500 # Sanity check
501 if ($key && $key>0) {
502 $gotkey=1;
503 } else {
504 warn(sprintf("WAIT database inconsistency during insert ".
505 "key[%s]: Please rebuild index\n",
506 $key
507 ));
508 }
509 }
510 unless ($gotkey) {
511 $key = $self->{nextk}++;
512 }
513 if ($USE_RECNO) {
514 $self->{db}->[$key] = $tuple;
515 } else {
516 $self->{db}->{$key} = $tuple;
517 }
518 for (values %{$self->{indexes}}) {
519 unless ($_->insert($key, %parm)) {
520 # duplicate key, undo changes
521 if ($key == $self->{nextk}-1) {
522 $self->{nextk}--;
523 } else {
524 # warn "setting key[$key] deleted during insert";
525 $self->{deleted}->{$key}=1;
526 }
527 my $idx;
528 for $idx (values %{$self->{indexes}}) {
529 last if $idx eq $_;
530 $idx->remove($key, %parm);
531 }
532 return undef;
533 }
534 }
535 if (defined $self->{inverted}) {
536 my $att;
537 for $att (keys %{$self->{inverted}}) {
538 if (defined $parm{$att}) {
539 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
540 #map $_->sync, @{$self->{inverted}->{$att}}
541 }
542 }
543 }
544 $key
545 }
546
547 sub sync {
548 my $self = shift;
549
550 for (values %{$self->{indexes}}) {
551 map $_->sync, $_;
552 }
553 if (defined $self->{inverted}) {
554 my $att;
555 for $att (keys %{$self->{inverted}}) {
556 map $_->sync, @{$self->{inverted}->{$att}}
557 }
558 }
559 }
560
561 sub fetch {
562 my $self = shift;
563 my $key = shift;
564
565 return () if exists $self->{deleted}->{$key};
566
567 defined $self->{db} or $self->open;
568 if ($USE_RECNO) {
569 $self->unpack($self->{db}->[$key]);
570 } else {
571 $self->unpack($self->{db}->{$key});
572 }
573 }
574
575 sub delete_by_key {
576 my $self = shift;
577 my $key = shift;
578
579 unless ($key) {
580 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
581 return;
582 }
583
584 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
585 my %tuple = $self->fetch($key);
586 for (values %{$self->{indexes}}) {
587 $_->delete($key, %tuple);
588 }
589 if (defined $self->{inverted}) {
590 # User *must* provide the full record for this or the entries
591 # in the inverted index will not be removed
592 %tuple = (%tuple, @_);
593 my $att;
594 for $att (keys %{$self->{inverted}}) {
595 if (defined $tuple{$att}) {
596 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
597 }
598 }
599 }
600 # warn "setting key[$key] deleted during delete_by_key";
601 ++$self->{deleted}->{$key};
602 }
603
604 sub delete {
605 my $self = shift;
606 my $tkey = $self->have(@_);
607 # warn "tkey[$tkey]\@_[@_]";
608 defined $tkey && $self->delete_by_key($tkey, @_);
609 }
610
611 sub unpack {
612 my $self = shift;
613 my $tuple = shift;
614 return unless defined $tuple;
615
616 my $att;
617 my @result;
618 my @tuple = split /$;/, $tuple;
619
620 for $att (@{$self->{attr}}) {
621 push @result, $att, shift @tuple;
622 }
623 @result;
624 }
625
626 sub set {
627 my ($self, $iattr, $value) = @_;
628
629 return unless $self->{write_lock};
630 for my $att (keys %{$self->{inverted}}) {
631 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
632 my $idx;
633 for $idx (@{$self->{inverted}->{$att}}) {
634 $idx->set($iattr, $value);
635 }
636 } else {
637 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
638 }
639 }
640
641 1;
642 }
643
644 sub close {
645 my $self = shift;
646
647 if (exists $self->{'access'}) {
648 eval {$self->{'access'}->close}; # dont bother if not opened
649 }
650 for (values %{$self->{indexes}}) {
651 require WAIT::Index;
652 $_->close();
653 }
654 if (defined $self->{inverted}) {
655 my $att;
656 for $att (keys %{$self->{inverted}}) {
657 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
658 my $idx;
659 for $idx (@{$self->{inverted}->{$att}}) {
660 $idx->close;
661 }
662 } else {
663 map $_->close(), @{$self->{inverted}->{$att}};
664 }
665 }
666 }
667 if ($self->{dbh}) {
668 delete $self->{dbh};
669
670 if ($USE_RECNO) {
671 untie @{$self->{db}};
672 } else {
673 untie %{$self->{db}};
674 }
675 delete $self->{db};
676 }
677
678 $self->unlock;
679
680 1;
681 }
682
683 sub unlock {
684 my $self = shift;
685
686 # Either we have a read or a write lock (or we close the table already)
687 # unless ($self->{read_lock} || $self->{write_lock}) {
688 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
689 # }
690 if ($self->{write_lock}) {
691 $self->{write_lock}->release();
692 delete $self->{write_lock};
693 }
694 if ($self->{read_lock}) {
695 $self->{read_lock}->release();
696 delete $self->{read_lock};
697 }
698
699 }
700
701 sub DESTROY {
702 my $self = shift;
703
704 warn "Table handle destroyed without closing it first"
705 if $self->{write_lock} || $self->{read_lock};
706 }
707
708 sub open_scan {
709 my $self = shift;
710 my $code = shift;
711
712 $self->{dbh} or $self->open;
713 require WAIT::Scan;
714 new WAIT::Scan $self, $self->{nextk}-1, $code;
715 }
716
717 sub open_index_scan {
718 my $self = shift;
719 my $attr = shift;
720 my $code = shift;
721 my $name = join '-', @$attr;
722
723 if (defined $self->{indexes}->{$name}) {
724 $self->{indexes}->{$name}->open_scan($code);
725 } else {
726 croak "No such index '$name'";
727 }
728 }
729
730 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
731
732 sub prefix {
733 my ($self , $attr, $prefix) = @_;
734 my %result;
735
736 defined $self->{db} or $self->open; # require layout
737
738 for (@{$self->{inverted}->{$attr}}) {
739 my $result = $_->prefix($prefix);
740 if (defined $result) {
741 $result{$_->name} = $result;
742 }
743 }
744 bless \%result, 'WAIT::Query::Raw';
745 }
746
747 sub intervall {
748 my ($self, $attr, $lb, $ub) = @_;
749 my %result;
750
751 defined $self->{db} or $self->open; # require layout
752
753 for (@{$self->{inverted}->{$attr}}) {
754 my $result = $_->intervall($lb, $ub);
755 if (defined $result) {
756 $result{$_->name} = $result;
757 }
758 }
759 bless \%result, 'WAIT::Query::Raw';
760 }
761
762 sub search {
763 my $self = shift;
764 my ($query, $attr, $cont, $raw);
765 if (ref $_[0]) {
766 $query = shift;
767
768 $attr = $query->{attr};
769 $cont = $query->{cont};
770 $raw = $query->{raw};
771 } else {
772 require Carp;
773 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
774 $attr = shift;
775 $cont = shift;
776 $raw = shift;
777 $query = {
778 attr => $attr,
779 cont => $cont,
780 raw => $raw,
781 };
782 }
783
784 my %result;
785
786 defined $self->{db} or $self->open; # require layout
787
788 if ($raw) {
789 for (@{$self->{inverted}->{$attr}}) {
790 my $name = $_->name;
791 if (exists $raw->{$name} and @{$raw->{$name}}) {
792 my $scale = 1/scalar(@{$raw->{$name}});
793 my %r = $_->search_raw($query, @{$raw->{$name}});
794 my ($key, $val);
795 while (($key, $val) = each %r) {
796 if (exists $result{$key}) {
797 $result{$key} += $val*$scale;
798 } else {
799 $result{$key} = $val*$scale;
800 }
801 }
802 }
803 }
804 }
805 if (defined $cont and $cont ne '') {
806 for (@{$self->{inverted}->{$attr}}) {
807 my %r = $_->search($query, $cont);
808 my ($key, $val);
809 while (($key, $val) = each %r) {
810 if (exists $result{$key}) {
811 $result{$key} += $val;
812 } else {
813 $result{$key} = $val;
814 }
815 }
816 }
817 }
818 # sanity check for deleted documents.
819 # this should not be necessary !@#$
820 for (keys %result) {
821 delete $result{$_} if $self->{deleted}->{$_}
822 }
823 %result;
824 }
825
826 sub hilight_positions {
827 my ($self, $attr, $text, $query, $raw) = @_;
828 my %pos;
829
830 if (defined $raw) {
831 for (@{$self->{inverted}->{$attr}}) { # objects of type
832 # WAIT::InvertedIndex for
833 # this index field $attr
834 my $name = $_->name;
835 if (exists $raw->{$name}) {
836 my %qt;
837 grep $qt{$_}++, @{$raw->{$name}};
838 for ($_->parse_pos($text)) {
839 if (exists $qt{$_->[0]}) {
840 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
841 }
842 }
843 }
844 }
845 }
846 if (defined $query) {
847 for (@{$self->{inverted}->{$attr}}) {
848 my %qt;
849
850 grep $qt{$_}++, $_->parse($query);
851 for ($_->parse_pos($text)) {
852 if (exists $qt{$_->[0]}) {
853 if (exists $pos{$_->[1]}) { # perl -w ;-)
854 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
855 } else {
856 $pos{$_->[1]} = length($_->[0]);
857 }
858 }
859 }
860 }
861 }
862
863 \%pos;
864 }
865
866 sub hilight {
867 my ($tb, $buf, $qplain, $qraw) = @_;
868 my $layout = $tb->layout();
869
870 my @result;
871
872 $qplain ||= {};
873 $qraw ||= {};
874 my @ttxt = $layout->tag($buf);
875 while (@ttxt) {
876 no strict 'refs';
877 my %tag = %{shift @ttxt};
878 my $txt = shift @ttxt;
879 my $fld;
880
881 my %hl;
882 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
883 my $hp = $tb->hilight_positions($fld, $txt,
884 $qplain->{$fld}, $qraw->{$fld});
885 for (keys %$hp) {
886 if (exists $hl{$_}) { # -w ;-(
887 $hl{$_} = max($hl{$_}, $hp->{$_});
888 } else {
889 $hl{$_} = $hp->{$_};
890 }
891 }
892 }
893 my $pos;
894 my $qt = {_qt => 1, %tag};
895 my $pl = \%tag;
896 my $last = length($txt);
897 my @tmp;
898 for $pos (sort {$b <=> $a} keys %hl) {
899 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
900 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
901 $last = $pos;
902 }
903 push @result, $pl, substr($txt,0,$last);
904 push @result, @tmp;
905 }
906 @result; # no speed necessary
907 }
908
909 1;

Properties

Name Value
cvs2svn:cvs-rev 1.1.1.3

  ViewVC Help
Powered by ViewVC 1.1.26