/[wait]/cvs-head/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 34 - (show annotations)
Sun Nov 12 14:22:40 2000 UTC (23 years, 6 months ago) by ulpfr
File size: 24474 byte(s)
Opening a table twice should not be a problem any more.

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Sun Nov 12 15:21:19 2000
8 # Language : CPerl
9 # Update Count : 135
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
169 # Aquire a write lock, since we are creating the table, no readers
170 # could possibly be active.
171 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
172 or die "Can't lock '$self->{file}/write'";
173
174 $self->{djk} = $parm{djk} if defined $parm{djk};
175 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
176 $self->{access} = $parm{access} if defined $parm{access};
177 $self->{nextk} = 1; # next record to insert; first record unused
178 $self->{deleted} = {}; # no deleted records yet
179 $self->{indexes} = {};
180
181 bless $self, $type;
182 # Call create_index() and create_index() for compatibility
183 for (@{$self->{keyset}||[]}) {
184 #carp "Specification of indexes at table create time is deprecated";
185 $self->create_index(@$_);
186 }
187 while (@{$parm{invindex}||[]}) {
188 # carp "Specification of inverted indexes at table create time is deprecated";
189 my $att = shift @{$parm{invindex}};
190 my @spec = @{shift @{$parm{invindex}}};
191 my @opt;
192
193 if (ref($spec[0])) {
194 carp "Secondary pipelines are deprecated\n";
195 @opt = %{shift @spec};
196 }
197 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
198 }
199
200 $self;
201 # end of backwarn compatibility stuff
202 }
203
204 =head2 Creating an index
205
206 $tb->create_index('docid');
207
208 =item C<create_index>
209
210 must be called with a list of attributes. This must be a subset of the
211 attributes specified when the table was created. Currently this
212 method must be called before the first tuple is inserted in the
213 table!
214
215 =cut
216
217 sub create_index {
218 my $self= shift;
219
220 croak "Cannot create index for table aready populated"
221 if $self->{nextk} > 1;
222
223 require WAIT::Index;
224
225 my $name = join '-', @_;
226 $self->{indexes}->{$name} =
227 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
228 }
229
230 =head2 Creating an inverted index
231
232 $tb->create_inverted_index
233 (attribute => 'au',
234 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
235 predicate => 'plain',
236 );
237
238 =over 5
239
240 =item C<attribute>
241
242 The attribute to build the index on. This attribute may not be in the
243 set attributes specified when the table was created.
244
245 =item C<pipeline>
246
247 A piplines specification is a reference to an array of method names
248 (from package C<WAIT::Filter>) which are to be applied in sequence to
249 the contents of the named attribute. The attribute name may not be in
250 the attribute list.
251
252 =item C<predicate>
253
254 An indication which predicate the index implements. This may be
255 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
256 query processing. Currently there is no standard set of predicate
257 names. The predicate defaults to the last member of the pipeline if
258 omitted.
259
260 =back
261
262 Currently this method must be called before the first tuple is
263 inserted in the table!
264
265 =cut
266
267 sub create_inverted_index {
268 my $self = shift;
269 my %parm = @_;
270
271 croak "No attribute specified" unless $parm{attribute};
272 croak "No pipeline specified" unless $parm{pipeline};
273
274 $parm{predicate} ||= $parm{pipeline}->[-1];
275
276 croak "Cannot create index for table aready populated"
277 if $self->{nextk} > 1;
278
279 require WAIT::InvertedIndex;
280
281 # backward compatibility stuff
282 my %opt = %parm;
283 for (qw(attribute pipeline predicate)) {
284 delete $opt{$_};
285 }
286
287 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
288 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
289 filter => [@{$parm{pipeline}}], # clone
290 name => $name,
291 attr => $parm{attribute},
292 %opt, # backward compatibility stuff
293 );
294 # We will have to use $parm{predicate} here
295 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
296 }
297
298 sub dir {
299 $_[0]->{file};
300 }
301
302 =head2 C<$tb-E<gt>layout>
303
304 Returns the reference to the associated parser object.
305
306 =cut
307
308 sub layout { $_[0]->{layout} }
309
310 =head2 C<$tb-E<gt>fields>
311
312 Returns the array of attribute names.
313
314 =cut
315
316
317 sub fields { keys %{$_[0]->{inverted}}}
318
319 =head2 C<$tb-E<gt>drop>
320
321 Must be called via C<WAIT::Database::drop_table>
322
323 =cut
324
325 sub drop {
326 my $self = shift;
327 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
328 $self->close; # just make sure
329 my $file = $self->{file};
330
331 for (values %{$self->{indexes}}) {
332 $_->drop;
333 }
334 unlink "$file/records";
335 # $self->unlock;
336 ! (!-e $file or rmdir $file);
337 } else {
338 croak ref($self)."::drop called directly";
339 }
340 }
341
342 sub mrequire ($) {
343 my $module = shift;
344
345 $module =~ s{::}{/}g;
346 $module .= '.pm';
347 require $module;
348 }
349
350 sub open {
351 my $self = shift;
352 my $file = $self->{file} . '/records';
353
354 mrequire ref($self); # that's tricky eh?
355 if (defined $self->{'layout'}) {
356 mrequire ref($self->{'layout'});
357 }
358 if (defined $self->{'access'}) {
359 mrequire ref($self->{'access'});
360 }
361 if (exists $self->{indexes}) {
362 require WAIT::Index;
363 for (values %{$self->{indexes}}) {
364 $_->{mode} = $self->{mode};
365 }
366 }
367 if (exists $self->{inverted}) {
368 my ($att, $idx);
369 for $att (keys %{$self->{inverted}}) {
370 for $idx (@{$self->{inverted}->{$att}}) {
371 $idx->{mode} = $self->{mode};
372 }
373 }
374 require WAIT::InvertedIndex;
375 }
376 unless (defined $self->{dbh}) {
377 if ($USE_RECNO) {
378 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
379 $self->{mode}, 0664, $DB_RECNO);
380 } else {
381 $self->{dbh} =
382 tie(%{$self->{db}}, 'DB_File', $file,
383 $self->{mode}, 0664, $DB_BTREE);
384 }
385 }
386
387 # Locking
388 #
389 # We allow multiple readers to coexists. But write access excludes
390 # all read access and vice versa. In practice read access on tables
391 # open for writing will mostly work ;-)
392
393 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
394
395 my $lockdir = $self->{file} . '/read';
396 unless (-d $lockdir) {
397 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
398 }
399
400 if ($self->{mode} & O_RDWR) {
401 # Get a write lock. Release it again and die if there is any
402 # valid reader.
403
404 # this is a hack. We do not check for reopening ...
405 return $self if $self->{write_lock};
406
407 if ($self->{read_lock}) {
408 # We are a becoming a writer now. So we release the read lock to
409 # avoid blocking ourselves.
410 $self->{read_lock}->release;
411 delete $self->{read_lock};
412 }
413
414 # Get the preliminary write lock
415 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
416 or die "Can't lock '$self->{file}/write'";
417
418 # If we actually want to write we must check if there are any
419 # readers. The write lock is confirmed if wen cannot find any
420 # valid readers.
421
422 local *DIR;
423 opendir DIR, $lockdir or
424 die "Could not opendir '$lockdir': $!";
425 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
426 # check if the locks are still valid.
427 # Since we are protected by a write lock, we could use a plain file.
428 # But we want to use the stale testing from LockFile::Simple.
429 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
430 warn "Removing stale lockfile '$lockdir/$lockfile'";
431 $lck->release;
432 } else {
433 $self->{write_lock}->release;
434 die "Cannot write table '$file' while it's in use";
435 }
436 }
437 closedir DIR;
438 } else {
439 # this is a hack. We do not check for reopening ...
440 return $self if $self->{read_lock};
441
442 # Get the preliminary write lock to protect the directory
443 # operations.
444
445 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
446 or die "Can't lock '$self->{file}/write'";
447
448 # find a new read slot
449 my $id = time;
450 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
451 $id++;
452 }
453
454 $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
455 or die "Can't lock '$lockdir/$id'";
456
457 # We are a reader now. So we release the write lock
458 $self->{write_lock}->release;
459 delete $self->{write_lock};
460 }
461
462 $self;
463 }
464
465 sub fetch_extern {
466 my $self = shift;
467
468 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
469 if (exists $self->{'access'}) {
470 mrequire ref($self->{'access'});
471 $self->{'access'}->FETCH(@_);
472 }
473 }
474
475 sub fetch_extern_by_id {
476 my $self = shift;
477
478 $self->fetch_extern($self->fetch(@_));
479 }
480
481 sub _find_index {
482 my $self = shift;
483 my (@att) = @_;
484 my %att;
485 my $name;
486
487 @att{@att} = @att;
488
489 KEY: for $name (keys %{$self->{indexes}}) {
490 my @iat = split /-/, $name;
491 for (@iat) {
492 next KEY unless exists $att{$_};
493 }
494 return $self->{indexes}->{$name};
495 }
496 return undef;
497 }
498
499 sub have {
500 my $self = shift;
501 my %parm = @_;
502
503 my $index = $self->_find_index(keys %parm) or return; # no index-no have
504
505 defined $self->{db} or $self->open;
506 return $index->have(@_);
507 }
508
509 sub insert {
510 my $self = shift;
511 my %parm = @_;
512
513 defined $self->{db} or $self->open;
514
515 # We should move all writing methods to a subclass to check only once
516 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
517
518 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
519 my $key;
520 my @deleted = keys %{$self->{deleted}};
521 my $gotkey = 0;
522
523 if (@deleted) {
524 $key = pop @deleted;
525 delete $self->{deleted}->{$key};
526 # Sanity check
527 if ($key && $key>0) {
528 $gotkey=1;
529 } else {
530 warn(sprintf("WAIT database inconsistency during insert ".
531 "key[%s]: Please rebuild index\n",
532 $key
533 ));
534 }
535 }
536 unless ($gotkey) {
537 $key = $self->{nextk}++;
538 }
539 if ($USE_RECNO) {
540 $self->{db}->[$key] = $tuple;
541 } else {
542 $self->{db}->{$key} = $tuple;
543 }
544 for (values %{$self->{indexes}}) {
545 unless ($_->insert($key, %parm)) {
546 # duplicate key, undo changes
547 if ($key == $self->{nextk}-1) {
548 $self->{nextk}--;
549 } else {
550 # warn "setting key[$key] deleted during insert";
551 $self->{deleted}->{$key}=1;
552 }
553 my $idx;
554 for $idx (values %{$self->{indexes}}) {
555 last if $idx eq $_;
556 $idx->remove($key, %parm);
557 }
558 return undef;
559 }
560 }
561 if (defined $self->{inverted}) {
562 my $att;
563 for $att (keys %{$self->{inverted}}) {
564 if (defined $parm{$att}) {
565 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
566 #map $_->sync, @{$self->{inverted}->{$att}}
567 }
568 }
569 }
570 $key
571 }
572
573 sub sync {
574 my $self = shift;
575
576 for (values %{$self->{indexes}}) {
577 map $_->sync, $_;
578 }
579 if (defined $self->{inverted}) {
580 my $att;
581 for $att (keys %{$self->{inverted}}) {
582 map $_->sync, @{$self->{inverted}->{$att}}
583 }
584 }
585 }
586
587 sub fetch {
588 my $self = shift;
589 my $key = shift;
590
591 return () if exists $self->{deleted}->{$key};
592
593 defined $self->{db} or $self->open;
594 if ($USE_RECNO) {
595 $self->unpack($self->{db}->[$key]);
596 } else {
597 $self->unpack($self->{db}->{$key});
598 }
599 }
600
601 sub delete_by_key {
602 my $self = shift;
603 my $key = shift;
604
605 unless ($key) {
606 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
607 return;
608 }
609
610 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
611 my %tuple = $self->fetch($key);
612 for (values %{$self->{indexes}}) {
613 $_->delete($key, %tuple);
614 }
615 if (defined $self->{inverted}) {
616 # User *must* provide the full record for this or the entries
617 # in the inverted index will not be removed
618 %tuple = (%tuple, @_);
619 my $att;
620 for $att (keys %{$self->{inverted}}) {
621 if (defined $tuple{$att}) {
622 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
623 }
624 }
625 }
626 # warn "setting key[$key] deleted during delete_by_key";
627 ++$self->{deleted}->{$key};
628 }
629
630 sub delete {
631 my $self = shift;
632 my $tkey = $self->have(@_);
633 # warn "tkey[$tkey]\@_[@_]";
634 defined $tkey && $self->delete_by_key($tkey, @_);
635 }
636
637 sub unpack {
638 my($self, $tuple) = @_;
639
640 unless (defined $tuple){
641 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
642 warn("Debug: somebody called unpack without argument tuple!");
643 return;
644 }
645
646 my $att;
647 my @result;
648 my @tuple = split /$;/, $tuple;
649
650 for $att (@{$self->{attr}}) {
651 push @result, $att, shift @tuple;
652 }
653 @result;
654 }
655
656 sub set {
657 my ($self, $iattr, $value) = @_;
658
659 unless ($self->{write_lock}){
660 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
661 return;
662 }
663 for my $att (keys %{$self->{inverted}}) {
664 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
665 my $idx;
666 for $idx (@{$self->{inverted}->{$att}}) {
667 $idx->set($iattr, $value);
668 }
669 } else {
670 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
671 }
672 }
673
674 1;
675 }
676
677 sub close {
678 my $self = shift;
679
680 if (exists $self->{'access'}) {
681 eval {$self->{'access'}->close}; # dont bother if not opened
682 }
683 if ($WAIT::Index::VERSION) {
684 for (values %{$self->{indexes}}) {
685 $_->close();
686 }
687 }
688 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
689 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
690 # if WAIT::InvertedIndex has not been loaded, they cannot have
691 # been altered so far
692 my $att;
693 for $att (keys %{$self->{inverted}}) {
694 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
695 my $idx;
696 for $idx (@{$self->{inverted}->{$att}}) {
697 $idx->close;
698 }
699 } else {
700 map $_->close(), @{$self->{inverted}->{$att}};
701 }
702 }
703 }
704 if ($self->{dbh}) {
705 delete $self->{dbh};
706
707 if ($USE_RECNO) {
708 untie @{$self->{db}};
709 } else {
710 untie %{$self->{db}};
711 }
712 delete $self->{db};
713 }
714
715 $self->unlock;
716
717 1;
718 }
719
720 sub unlock {
721 my $self = shift;
722
723 # Either we have a read or a write lock (or we close the table already)
724 # unless ($self->{read_lock} || $self->{write_lock}) {
725 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
726 # }
727 if ($self->{write_lock}) {
728 $self->{write_lock}->release();
729 delete $self->{write_lock};
730 }
731 if ($self->{read_lock}) {
732 $self->{read_lock}->release();
733 delete $self->{read_lock};
734 }
735
736 }
737
738 sub DESTROY {
739 my $self = shift;
740
741 warn "Table handle destroyed without closing it first"
742 if $self->{write_lock} || $self->{read_lock};
743 }
744
745 sub open_scan {
746 my $self = shift;
747 my $code = shift;
748
749 $self->{dbh} or $self->open;
750 require WAIT::Scan;
751 new WAIT::Scan $self, $self->{nextk}-1, $code;
752 }
753
754 sub open_index_scan {
755 my $self = shift;
756 my $attr = shift;
757 my $code = shift;
758 my $name = join '-', @$attr;
759
760 if (defined $self->{indexes}->{$name}) {
761 $self->{indexes}->{$name}->open_scan($code);
762 } else {
763 croak "No such index '$name'";
764 }
765 }
766
767 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
768
769 sub prefix {
770 my ($self , $attr, $prefix) = @_;
771 my %result;
772
773 defined $self->{db} or $self->open; # require layout
774
775 for (@{$self->{inverted}->{$attr}}) {
776 my $result = $_->prefix($prefix);
777 if (defined $result) {
778 $result{$_->name} = $result;
779 }
780 }
781 bless \%result, 'WAIT::Query::Raw';
782 }
783
784 sub intervall {
785 my ($self, $attr, $lb, $ub) = @_;
786 my %result;
787
788 defined $self->{db} or $self->open; # require layout
789
790 for (@{$self->{inverted}->{$attr}}) {
791 my $result = $_->intervall($lb, $ub);
792 if (defined $result) {
793 $result{$_->name} = $result;
794 }
795 }
796 bless \%result, 'WAIT::Query::Raw';
797 }
798
799 sub search {
800 my $self = shift;
801 my ($query, $attr, $cont, $raw);
802 if (ref $_[0]) {
803 $query = shift;
804
805 $attr = $query->{attr};
806 $cont = $query->{cont};
807 $raw = $query->{raw};
808 } else {
809 require Carp;
810 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
811 $attr = shift;
812 $cont = shift;
813 $raw = shift;
814 $query = {
815 attr => $attr,
816 cont => $cont,
817 raw => $raw,
818 };
819 }
820
821 my %result;
822
823 defined $self->{db} or $self->open; # require layout
824
825 if ($raw) {
826 for (@{$self->{inverted}->{$attr}}) {
827 my $name = $_->name;
828 if (exists $raw->{$name} and @{$raw->{$name}}) {
829 my $scale = 1/scalar(@{$raw->{$name}});
830 my %r = $_->search_raw($query, @{$raw->{$name}});
831 my ($key, $val);
832 while (($key, $val) = each %r) {
833 if (exists $result{$key}) {
834 $result{$key} += $val*$scale;
835 } else {
836 $result{$key} = $val*$scale;
837 }
838 }
839 }
840 }
841 }
842 if (defined $cont and $cont ne '') {
843 for (@{$self->{inverted}->{$attr}}) {
844 my %r = $_->search($query, $cont);
845 my ($key, $val);
846 while (($key, $val) = each %r) {
847 if (exists $result{$key}) {
848 $result{$key} += $val;
849 } else {
850 $result{$key} = $val;
851 }
852 }
853 }
854 }
855 # sanity check for deleted documents.
856 # this should not be necessary !@#$
857 for (keys %result) {
858 delete $result{$_} if $self->{deleted}->{$_}
859 }
860 %result;
861 }
862
863 sub hilight_positions {
864 my ($self, $attr, $text, $query, $raw) = @_;
865 my %pos;
866
867 if (defined $raw) {
868 for (@{$self->{inverted}->{$attr}}) { # objects of type
869 # WAIT::InvertedIndex for
870 # this index field $attr
871 my $name = $_->name;
872 if (exists $raw->{$name}) {
873 my %qt;
874 grep $qt{$_}++, @{$raw->{$name}};
875 for ($_->parse_pos($text)) {
876 if (exists $qt{$_->[0]}) {
877 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
878 }
879 }
880 }
881 }
882 }
883 if (defined $query) {
884 for (@{$self->{inverted}->{$attr}}) {
885 my %qt;
886
887 grep $qt{$_}++, $_->parse($query);
888 for ($_->parse_pos($text)) {
889 if (exists $qt{$_->[0]}) {
890 if (exists $pos{$_->[1]}) { # perl -w ;-)
891 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
892 } else {
893 $pos{$_->[1]} = length($_->[0]);
894 }
895 }
896 }
897 }
898 }
899
900 \%pos;
901 }
902
903 sub hilight {
904 my ($tb, $buf, $qplain, $qraw) = @_;
905 my $layout = $tb->layout();
906
907 my @result;
908
909 $qplain ||= {};
910 $qraw ||= {};
911 my @ttxt = $layout->tag($buf);
912 while (@ttxt) {
913 no strict 'refs';
914 my %tag = %{shift @ttxt};
915 my $txt = shift @ttxt;
916 my $fld;
917
918 my %hl;
919 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
920 my $hp = $tb->hilight_positions($fld, $txt,
921 $qplain->{$fld}, $qraw->{$fld});
922 for (keys %$hp) {
923 if (exists $hl{$_}) { # -w ;-(
924 $hl{$_} = max($hl{$_}, $hp->{$_});
925 } else {
926 $hl{$_} = $hp->{$_};
927 }
928 }
929 }
930 my $pos;
931 my $qt = {_qt => 1, %tag};
932 my $pl = \%tag;
933 my $last = length($txt);
934 my @tmp;
935 for $pos (sort {$b <=> $a} keys %hl) {
936 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
937 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
938 $last = $pos;
939 }
940 push @result, $pl, substr($txt,0,$last);
941 push @result, @tmp;
942 }
943 @result; # no speed necessary
944 }
945
946 1;

Properties

Name Value
cvs2svn:cvs-rev 1.4

  ViewVC Help
Powered by ViewVC 1.1.26