/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 108 - (show annotations)
Tue Jul 13 17:41:12 2004 UTC (19 years, 9 months ago) by dpavlin
File size: 23742 byte(s)
beginning of version 2.0 using BerkeleyDB (non-functional for now)

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Wed Jan 23 14:15:15 2002
8 # Language : CPerl
9 # Update Count : 152
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28 our $VERSION = "2.000";
29
30 use WAIT::Table::Handle ();
31 require WAIT::Parse::Base;
32
33 use strict;
34 use Carp;
35 # use autouse Carp => qw( croak($) );
36 use BerkeleyDB;
37 use Fcntl;
38
39 =head2 Creating a Table.
40
41 The constructor WAIT::Table-E<gt>new is normally called via the
42 create_table method of a database handle. This is not enforced, but
43 creating a table does not make any sense unless the table is
44 registered by the database because the latter implements persistence
45 of the meta data. Registering is done automatically by letting the
46 database handle the creation of a table.
47
48 my $db = WAIT::Database->create(name => 'sample');
49 my $tb = $db->create_table(name => 'test',
50 access => $access,
51 layout => $layout,
52 attr => ['docid', 'headline'],
53 );
54
55 The constructor returns a handle for the table. This handle is hidden by the
56 table module, to prevent direct access if called via Table.
57
58 =over 10
59
60 =item C<access> => I<accessobj>
61
62 A reference to an access object for the external parts (attributes) of
63 tuples. As you may remember, the WAIT System does not enforce that
64 objects are completely stored inside the system to avoid duplication.
65 There is no (strong) point in storing all your HTML documents inside
66 the system when indexing your WWW-Server.
67
68 The access object is designed to work like as a tied hash. You pass
69 the refernce to the object, not the tied hash though. An example
70 implementation of an access class that works for manpages is
71 WAIT::Document::Nroff.
72
73 The implementation needs to take into account that WAIT will keep this
74 object in a Data::Dumper or Storable database and re-use it when sman
75 is run. So it is not good enough if we can produce the index with it
76 now, when we create or actively access the table, WAIT also must be
77 able to retrieve documents on its own, when we are in a different
78 context. This happens specifically in a retrieval. To get this working
79 seemlessly, the access-defining class must implement a close method.
80 This method will be called before the Data::Dumper dump takes place.
81 In that moment the access-defining class must get rid of all data
82 structures that cannot be reconstructed via the Data::Dumper dump,
83 such as database handles or C pointers.
84
85 =item C<file> => I<fname>
86
87 The filename of the records file. Files for indexes will have I<fname>
88 as prefix. I<Mandatory>, but usually taken care of by the
89 WAIT::Database handle when the constructor is called via
90 WAIT::Database::create_table().
91
92 =item C<name> => I<name>
93
94 The name of this table. I<Mandatory>
95
96 =item C<attr> => [ I<attr> ... ]
97
98 A reference to an array of attribute names. WAIT will keep the
99 contents of these attributes in its table. I<Mandatory>
100
101 =item C<djk> => [ I<attr> ... ]
102
103 A reference to an array of attribute names which make up the
104 I<disjointness key>. Don't think about it - it's of no use yet;
105
106 =item C<layout> => I<layoutobj>
107
108 A reference to an external parser object. Defaults to a new instance
109 of C<WAIT::Parse::Base>. For an example implementation see
110 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111 class if you so like.
112
113 =item C<keyset> => I<keyset>
114
115 The set of attributes needed to identify a record. Defaults to all
116 attributes.
117
118 =item C<invindex> => I<inverted index>
119
120 A reference to an anon array defining attributes of each record that
121 need to be indexed. See the source of smakewhatis for how to set this
122 up.
123
124 =back
125
126 =cut
127
128 sub new {
129 my $type = shift;
130 my %parm = @_;
131 my $self = {};
132
133 # Check for mandatory attrs early
134 for my $x (qw(name attr env maindbfile tablename)) {
135 $self->{$x} = $parm{$x} or croak "No $x specified";
136 }
137
138 # Do that before we eventually add '_weight' to attributes.
139 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140
141 $self->{mode} = O_CREAT | O_RDWR;
142
143 # Determine and set up subclass
144 $type = ref($type) || $type;
145 if (defined $parm{djk}) {
146 if (@{$parm{djk}} == @{$parm{attr}}) {
147 # All attributes in DK (sloppy test here!)
148 $type .= '::Independent';
149 require WAIT::Table::Independent;
150 } else {
151 $type .= '::Disjoint';
152 require WAIT::Table::Disjoint;
153 }
154 # Add '_weight' to attributes
155 my %attr;
156 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158 }
159
160 $self->{path} = $parm{path} or croak "No path specified";
161 bless $self, $type;
162
163 $self->{djk} = $parm{djk} if defined $parm{djk};
164 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165 $self->{access} = $parm{access} if defined $parm{access};
166 $self->{nextk} = 1; # next record to insert; first record unused
167 $self->{deleted} = {}; # no deleted records yet
168 $self->{indexes} = {};
169
170 # Checking for readers is not necessary, but let's go with the
171 # generic method.
172
173 # Call create_index() and create_index() for compatibility
174 for (@{$self->{keyset}||[]}) {
175 #carp "Specification of indexes at table create time is deprecated";
176 $self->create_index(@$_);
177 }
178 while (@{$parm{invindex}||[]}) {
179 # carp "Specification of inverted indexes at table create time is deprecated";
180 my $att = shift @{$parm{invindex}};
181 my @spec = @{shift @{$parm{invindex}}};
182 my @opt = ();
183
184 if (ref($spec[0])) {
185 warn "Secondary pipelines are deprecated";
186 @opt = %{shift @spec};
187 }
188 $self->create_inverted_index(attribute => $att,
189 pipeline => \@spec,
190 @opt);
191 }
192
193 $self;
194 # end of backwarn compatibility stuff
195 }
196
197 for my $accessor (qw(maindbfile tablename)) {
198 no strict 'refs';
199 *{$accessor} = sub {
200 my($self) = @_;
201 return $self->{$accessor} if $self->{$accessor};
202 require Carp;
203 Carp::confess("accessor $accessor not there");
204 }
205 }
206
207 =head2 Creating an index
208
209 $tb->create_index('docid');
210
211 =item C<create_index>
212
213 must be called with a list of attributes. This must be a subset of the
214 attributes specified when the table was created. Currently this
215 method must be called before the first tuple is inserted in the
216 table!
217
218 =cut
219
220 sub create_index {
221 my $self= shift;
222
223 croak "Cannot create index for table aready populated"
224 if $self->{nextk} > 1;
225
226 require WAIT::Index;
227
228 my $name = join '-', @_;
229 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
230 $self->{indexes}->{$name} =
231 WAIT::Index->new(
232 file => $self->file.'/'.$name,
233 subname => $name,
234 env => $self->{env},
235 maindbfile => $self->maindbfile,
236 tablename => $self->tablename,
237 attr => $_,
238 );
239 }
240
241 =head2 Creating an inverted index
242
243 $tb->create_inverted_index
244 (attribute => 'au',
245 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
246 predicate => 'plain',
247 );
248
249 =over 5
250
251 =item C<attribute>
252
253 The attribute to build the index on. This attribute may not be in the
254 set attributes specified when the table was created.
255
256 =item C<pipeline>
257
258 A piplines specification is a reference to an array of method names
259 (from package C<WAIT::Filter>) which are to be applied in sequence to
260 the contents of the named attribute. The attribute name may not be in
261 the attribute list.
262
263 =item C<predicate>
264
265 An indication which predicate the index implements. This may be
266 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
267 query processing. Currently there is no standard set of predicate
268 names. The predicate defaults to the last member of the pipeline if
269 omitted.
270
271 =back
272
273 Currently this method must be called before the first tuple is
274 inserted in the table!
275
276 =cut
277
278 sub create_inverted_index {
279 my $self = shift;
280 my %parm = @_;
281
282 croak "No attribute specified" unless $parm{attribute};
283 croak "No pipeline specified" unless $parm{pipeline};
284
285 $parm{predicate} ||= $parm{pipeline}->[-1];
286
287 croak "Cannot create index for table aready populated"
288 if $self->{nextk} > 1;
289
290 require WAIT::InvertedIndex;
291
292 # backward compatibility stuff
293 my %opt = %parm;
294 for (qw(attribute pipeline predicate)) {
295 delete $opt{$_};
296 }
297
298 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
299 my $idx = WAIT::InvertedIndex->new(file => $self->file.'/'.$name,
300 subname=> $name,
301 env => $self->{env},
302 maindbfile => $self->maindbfile,
303 tablename => $self->tablename,
304 filter => [@{$parm{pipeline}}], # clone
305 name => $name,
306 attr => $parm{attribute},
307 %opt, # backward compatibility stuff
308 );
309 # We will have to use $parm{predicate} here
310 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
311 }
312
313 sub dir {
314 $_[0]->file;
315 }
316
317 =head2 C<$tb-E<gt>layout>
318
319 Returns the reference to the associated parser object.
320
321 =cut
322
323 sub layout { $_[0]->{layout} }
324
325 =head2 C<$tb-E<gt>fields>
326
327 Returns the array of attribute names.
328
329 =cut
330
331
332 sub fields { keys %{$_[0]->{inverted}}}
333
334 =head2 C<$tb-E<gt>drop>
335
336 Must be called via C<WAIT::Database::drop_table>
337
338 =cut
339
340 sub drop {
341 my $self = shift;
342
343 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
344 $self->close; # just make sure
345
346 my $file = $self->file;
347
348 for (values %{$self->{indexes}}) {
349 $_->drop;
350 }
351 unlink "$file/records";
352 rmdir "$file/read" or warn "Could not rmdir '$file/read'";
353
354 } else {
355 croak ref($self)."::drop called directly";
356 }
357 }
358
359 sub mrequire ($) {
360 my $module = shift;
361
362 $module =~ s{::}{/}g;
363 $module .= '.pm';
364 require $module;
365 }
366
367 sub path {
368 my($self) = @_;
369 return $self->{path} if $self->{path};
370 require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
371 require Carp;
372 Carp::confess("NO file attr");
373 }
374
375 sub open {
376 my $self = shift;
377 my $file = $self->file . '/records';
378
379 mrequire ref($self); # that's tricky eh?
380 if (defined $self->{'layout'}) {
381 mrequire ref($self->{'layout'});
382 }
383 if (defined $self->{'access'}) {
384 mrequire ref($self->{'access'});
385 }
386 if (exists $self->{indexes}) {
387 require WAIT::Index;
388 for my $Ind (values %{$self->{indexes}}) {
389 for my $x (qw(mode env maindbfile)) {
390 $Ind->{$x} = $self->{$x};
391 }
392 }
393 }
394 if (exists $self->{inverted}) {
395 my ($att, $idx);
396 for $att (keys %{$self->{inverted}}) {
397 for $idx (@{$self->{inverted}->{$att}}) {
398 for my $x (qw(mode env maindbfile)) {
399 $idx->{$x} = $self->{$x};
400 }
401 }
402 }
403 require WAIT::InvertedIndex;
404 }
405
406 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
407 # BerkeleyDB means file permissions when talking about Mode.
408 # BerkeleyDB has the "Flags" attribute to specify
409 # read/write/lock/etc subsystems.
410
411 my $flags;
412 if ($self->{mode} & O_RDWR) {
413 $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
414 warn "Flags on table $file set to 'writing'";
415 } else {
416 $flags = DB_RDONLY;
417 # warn "Flags on table $file set to 'readonly'";
418 }
419 unless (defined $self->{dbh}) {
420 my $subname = $self->tablename . "/records";
421 $self->{dbh} =
422 tie(%{$self->{db}}, 'BerkeleyDB::Btree',
423 $self->{env} ? (Env => $self->{env}) : (),
424 # Filename => $file,
425 Filename => $self->maindbfile,
426 Subname => $subname,
427 Mode => 0664,
428 Flags => $flags,
429 $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
430 $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
431 )
432 or die "Cannot tie: $BerkeleyDB::Error;
433 DEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
434 }
435 $self;
436 }
437
438 sub fetch_extern {
439 my $self = shift;
440
441 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
442 if (exists $self->{'access'}) {
443 mrequire ref($self->{'access'});
444 $self->{'access'}->FETCH(@_);
445 }
446 }
447
448 sub fetch_extern_by_id {
449 my $self = shift;
450
451 $self->fetch_extern($self->fetch(@_));
452 }
453
454 sub _find_index {
455 my $self = shift;
456 my (@att) = @_;
457 my %att;
458 my $name;
459
460 @att{@att} = @att;
461
462 KEY: for $name (keys %{$self->{indexes}}) {
463 my @iat = split /-/, $name;
464 for (@iat) {
465 next KEY unless exists $att{$_};
466 }
467 return $self->{indexes}->{$name};
468 }
469 return undef;
470 }
471
472 sub have {
473 my $self = shift;
474 my %parm = @_;
475
476 my $index = $self->_find_index(keys %parm) or return; # no index-no have
477
478 defined $self->{db} or $self->open;
479 return $index->have(@_);
480 }
481
482 sub insert {
483 my $self = shift;
484 my %parm = @_;
485
486 defined $self->{db} or $self->open;
487
488 # We should move all writing methods to a subclass to check only once
489 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
490
491 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
492 my $key;
493 my @deleted = keys %{$self->{deleted}};
494 my $gotkey = 0;
495
496 if (@deleted) {
497 $key = pop @deleted;
498 delete $self->{deleted}->{$key};
499 # Sanity check
500 if ($key && $key>0) {
501 $gotkey=1;
502 } else {
503 warn(sprintf("WAIT database inconsistency during insert ".
504 "key[%s]: Please rebuild index\n",
505 $key
506 ));
507 }
508 }
509 unless ($gotkey) {
510 $key = $self->{nextk}++;
511 }
512 $self->{db}->{$key} = $tuple;
513 for (values %{$self->{indexes}}) {
514 unless ($_->insert($key, %parm)) {
515 # duplicate key, undo changes
516 if ($key == $self->{nextk}-1) {
517 $self->{nextk}--;
518 } else {
519 # warn "setting key[$key] deleted during insert";
520 $self->{deleted}->{$key}=1;
521 }
522 my $idx;
523 for $idx (values %{$self->{indexes}}) {
524 last if $idx eq $_;
525 $idx->remove($key, %parm);
526 }
527 return undef;
528 }
529 }
530 if (defined $self->{inverted}) {
531 my $att;
532 for $att (keys %{$self->{inverted}}) {
533 if (defined $parm{$att}) {
534 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
535 #map $_->sync, @{$self->{inverted}->{$att}}
536 }
537 }
538 }
539 $key
540 }
541
542 sub sync {
543 my $self = shift;
544
545 for (values %{$self->{indexes}}) {
546 map $_->sync, $_;
547 }
548 if (defined $self->{inverted}) {
549 my $att;
550 for $att (keys %{$self->{inverted}}) {
551 map $_->sync, @{$self->{inverted}->{$att}}
552 }
553 }
554 }
555
556 sub fetch {
557 my $self = shift;
558 my $key = shift;
559
560 return () if exists $self->{deleted}->{$key};
561
562 defined $self->{db} or $self->open;
563 $self->unpack($self->{db}->{$key});
564 }
565
566 sub delete_by_key {
567 my $self = shift;
568 my $key = shift;
569
570 unless ($key) {
571 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
572 return;
573 }
574
575 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
576 my %tuple = $self->fetch($key);
577 for (values %{$self->{indexes}}) {
578 $_->delete($key, %tuple);
579 }
580 if (defined $self->{inverted}) {
581 # User *must* provide the full record for this or the entries
582 # in the inverted index will not be removed
583 %tuple = (%tuple, @_);
584 my $att;
585 for $att (keys %{$self->{inverted}}) {
586 if (defined $tuple{$att}) {
587 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
588 }
589 }
590 }
591 # warn "setting key[$key] deleted during delete_by_key";
592 ++$self->{deleted}->{$key};
593 }
594
595 sub delete {
596 my $self = shift;
597 my $tkey = $self->have(@_);
598 # warn "tkey[$tkey]\@_[@_]";
599 defined $tkey && $self->delete_by_key($tkey, @_);
600 }
601
602 sub unpack {
603 my($self, $tuple) = @_;
604
605 unless (defined $tuple){
606 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
607 warn("Debug: somebody called unpack without argument tuple!");
608 return;
609 }
610
611 my $att;
612 my @result;
613 my @tuple = split /$;/, $tuple;
614
615 for $att (@{$self->{attr}}) {
616 push @result, $att, shift @tuple;
617 }
618 @result;
619 }
620
621 sub set {
622 my ($self, $iattr, $value) = @_;
623 # in the rare case that they haven't written a single record yet, we
624 # make sure, the inverted inherits our $self->{mode}:
625 defined $self->{db} or $self->open;
626
627 for my $att (keys %{$self->{inverted}}) {
628 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
629 my $idx;
630 for $idx (@{$self->{inverted}->{$att}}) {
631 $idx->set($iattr, $value);
632 }
633 } else {
634 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
635 }
636 }
637
638 1;
639 }
640
641 sub close {
642 my $self = shift;
643
644 require Carp; Carp::cluck("------->Closing A Table<-------");
645
646 if (exists $self->{'access'}) {
647 eval {$self->{'access'}->close}; # dont bother if not opened
648 }
649 if ($WAIT::Index::VERSION) {
650 for (values %{$self->{indexes}}) {
651 $_->close();
652 }
653 }
654 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
655 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
656 # if WAIT::InvertedIndex has not been loaded, they cannot have
657 # been altered so far
658 my $att;
659 for $att (keys %{$self->{inverted}}) {
660 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
661 my $idx;
662 for $idx (@{$self->{inverted}->{$att}}) {
663 $idx->close;
664 }
665 } else {
666 map $_->close(), @{$self->{inverted}->{$att}};
667 }
668 }
669 }
670 if ($self->{dbh}) {
671 delete $self->{dbh};
672 }
673 untie %{$self->{db}};
674 for my $att (qw(env db file maindbfile)) {
675 delete $self->{$att};
676 warn "----->Deleted att $att<-----";
677 }
678
679 1;
680 }
681
682 sub DESTROY {
683 my $self = shift;
684
685 delete $self->{env};
686
687 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
688
689 }
690
691 sub open_scan {
692 my $self = shift;
693 my $code = shift;
694
695 $self->{dbh} or $self->open;
696 require WAIT::Scan;
697 new WAIT::Scan $self, $self->{nextk}-1, $code;
698 }
699
700 sub open_index_scan {
701 my $self = shift;
702 my $attr = shift;
703 my $code = shift;
704 my $name = join '-', @$attr;
705
706 if (defined $self->{indexes}->{$name}) {
707 $self->{indexes}->{$name}->open_scan($code);
708 } else {
709 croak "No such index '$name'";
710 }
711 }
712
713 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
714
715 sub prefix {
716 my ($self , $attr, $prefix) = @_;
717 my %result;
718
719 defined $self->{db} or $self->open; # require layout
720
721 for (@{$self->{inverted}->{$attr}}) {
722 my $result = $_->prefix($prefix);
723 if (defined $result) {
724 $result{$_->name} = $result;
725 }
726 }
727 bless \%result, 'WAIT::Query::Raw';
728 }
729
730 sub intervall {
731 my ($self, $attr, $lb, $ub) = @_;
732 my %result;
733
734 defined $self->{db} or $self->open; # require layout
735
736 for (@{$self->{inverted}->{$attr}}) {
737 my $result = $_->intervall($lb, $ub);
738 if (defined $result) {
739 $result{$_->name} = $result;
740 }
741 }
742 bless \%result, 'WAIT::Query::Raw';
743 }
744
745 sub search_ref {
746 my $self = shift;
747 my ($query, $attr, $cont, $raw);
748 if (ref $_[0]) {
749 $query = shift;
750 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
751
752 $attr = $query->{attr};
753 $cont = $query->{cont};
754 $raw = $query->{raw};
755 } else {
756 require Carp;
757 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
758 $attr = shift;
759 $cont = shift;
760 $raw = shift;
761 $query = {
762 attr => $attr,
763 cont => $cont,
764 raw => $raw,
765 };
766 }
767
768 my %result;
769
770 defined $self->{db} or $self->open; # require layout
771
772 if ($raw) {
773 for (@{$self->{inverted}->{$attr}}) {
774 my $name = $_->name;
775 if (exists $raw->{$name} and @{$raw->{$name}}) {
776 my $scale = 1/scalar(@{$raw->{$name}});
777 my %r = $_->search_raw($query, @{$raw->{$name}});
778 my ($key, $val);
779 while (($key, $val) = each %r) {
780 if (exists $result{$key}) {
781 $result{$key} += $val*$scale;
782 } else {
783 $result{$key} = $val*$scale;
784 }
785 }
786 }
787 }
788 }
789 if (defined $cont and $cont ne '') {
790 for (@{$self->{inverted}->{$attr}}) {
791 my $r = $_->search_ref($query, $cont);
792 my ($key, $val);
793 while (($key, $val) = each %$r) {
794 if (exists $result{$key}) {
795 $result{$key} += $val;
796 } else {
797 $result{$key} = $val;
798 }
799 }
800 }
801 }
802 # sanity check for deleted documents.
803 # this should not be necessary !@#$
804 for (keys %result) {
805 delete $result{$_} if $self->{deleted}->{$_}
806 }
807 \%result;
808 }
809
810 sub parse_query {
811 my($self, $attr, $query) = @_;
812 return unless defined $query && length $query;
813 my %qt;
814 for (@{$self->{inverted}->{$attr}}) {
815 grep $qt{$_}++, $_->parse($query);
816 }
817 [keys %qt];
818 }
819
820 sub hilight_positions {
821 my ($self, $attr, $text, $query, $raw) = @_;
822 my %pos;
823
824 if (defined $raw) {
825 for (@{$self->{inverted}->{$attr}}) { # objects of type
826 # WAIT::InvertedIndex for
827 # this index field $attr
828 my $name = $_->name;
829 if (exists $raw->{$name}) {
830 my %qt;
831 grep $qt{$_}++, @{$raw->{$name}};
832 for ($_->parse_pos($text)) {
833 if (exists $qt{$_->[0]}) {
834 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
835 }
836 }
837 }
838 }
839 }
840 if (defined $query) {
841 for (@{$self->{inverted}->{$attr}}) {
842 my %qt;
843
844 grep $qt{$_}++, $_->parse($query);
845 for ($_->parse_pos($text)) {
846 if (exists $qt{$_->[0]}) {
847 if (exists $pos{$_->[1]}) { # perl -w ;-)
848 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
849 } else {
850 $pos{$_->[1]} = length($_->[0]);
851 }
852 }
853 }
854 }
855 }
856
857 \%pos;
858 }
859
860 sub hilight {
861 my ($tb, $buf, $qplain, $qraw) = @_;
862 my $layout = $tb->layout();
863
864 my @result;
865
866 $qplain ||= {};
867 $qraw ||= {};
868 my @ttxt = $layout->tag($buf);
869 while (@ttxt) {
870 no strict 'refs';
871 my %tag = %{shift @ttxt};
872 my $txt = shift @ttxt;
873 my $fld;
874
875 my %hl;
876 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
877 my $hp = $tb->hilight_positions($fld, $txt,
878 $qplain->{$fld}, $qraw->{$fld});
879 for (keys %$hp) {
880 if (exists $hl{$_}) { # -w ;-(
881 $hl{$_} = max($hl{$_}, $hp->{$_});
882 } else {
883 $hl{$_} = $hp->{$_};
884 }
885 }
886 }
887 my $pos;
888 my $qt = {_qt => 1, %tag};
889 my $pl = \%tag;
890 my $last = length($txt);
891 my @tmp;
892 for $pos (sort {$b <=> $a} keys %hl) {
893 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
894 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
895 $last = $pos;
896 }
897 push @result, $pl, substr($txt,0,$last);
898 push @result, @tmp;
899 }
900 @result; # no speed necessary
901 }
902
903 1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26