/[wait]/cvs-head/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 77 - (show annotations)
Mon Apr 8 21:00:08 2002 UTC (22 years, 1 month ago) by laperla
File size: 25254 byte(s)
- Buglet in WAIT::Table (Hallo Uli!)

- Oreilly.de side track

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Wed Jan 23 14:15:15 2002
8 # Language : CPerl
9 # Update Count : 152
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 $self->{djk} = $parm{djk} if defined $parm{djk};
169 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
170 $self->{access} = $parm{access} if defined $parm{access};
171 $self->{nextk} = 1; # next record to insert; first record unused
172 $self->{deleted} = {}; # no deleted records yet
173 $self->{indexes} = {};
174
175 bless $self, $type;
176
177 # Checking for readers is not necessary, but let's go with the
178 # generic method.
179 $self->getlock(O_RDWR|O_CREAT); # dies when failing
180
181 # Call create_index() and create_index() for compatibility
182 for (@{$self->{keyset}||[]}) {
183 #carp "Specification of indexes at table create time is deprecated";
184 $self->create_index(@$_);
185 }
186 while (@{$parm{invindex}||[]}) {
187 # carp "Specification of inverted indexes at table create time is deprecated";
188 my $att = shift @{$parm{invindex}};
189 my @spec = @{shift @{$parm{invindex}}};
190 my @opt;
191
192 if (ref($spec[0])) {
193 carp "Secondary pipelines are deprecated\n";
194 @opt = %{shift @spec};
195 }
196 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197 }
198
199 $self;
200 # end of backwarn compatibility stuff
201 }
202
203 =head2 Creating an index
204
205 $tb->create_index('docid');
206
207 =item C<create_index>
208
209 must be called with a list of attributes. This must be a subset of the
210 attributes specified when the table was created. Currently this
211 method must be called before the first tuple is inserted in the
212 table!
213
214 =cut
215
216 sub create_index {
217 my $self= shift;
218
219 croak "Cannot create index for table aready populated"
220 if $self->{nextk} > 1;
221
222 require WAIT::Index;
223
224 my $name = join '-', @_;
225 $self->{indexes}->{$name} =
226 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 }
228
229 =head2 Creating an inverted index
230
231 $tb->create_inverted_index
232 (attribute => 'au',
233 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234 predicate => 'plain',
235 );
236
237 =over 5
238
239 =item C<attribute>
240
241 The attribute to build the index on. This attribute may not be in the
242 set attributes specified when the table was created.
243
244 =item C<pipeline>
245
246 A piplines specification is a reference to an array of method names
247 (from package C<WAIT::Filter>) which are to be applied in sequence to
248 the contents of the named attribute. The attribute name may not be in
249 the attribute list.
250
251 =item C<predicate>
252
253 An indication which predicate the index implements. This may be
254 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255 query processing. Currently there is no standard set of predicate
256 names. The predicate defaults to the last member of the pipeline if
257 omitted.
258
259 =back
260
261 Currently this method must be called before the first tuple is
262 inserted in the table!
263
264 =cut
265
266 sub create_inverted_index {
267 my $self = shift;
268 my %parm = @_;
269
270 croak "No attribute specified" unless $parm{attribute};
271 croak "No pipeline specified" unless $parm{pipeline};
272
273 $parm{predicate} ||= $parm{pipeline}->[-1];
274
275 croak "Cannot create index for table aready populated"
276 if $self->{nextk} > 1;
277
278 require WAIT::InvertedIndex;
279
280 # backward compatibility stuff
281 my %opt = %parm;
282 for (qw(attribute pipeline predicate)) {
283 delete $opt{$_};
284 }
285
286 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288 filter => [@{$parm{pipeline}}], # clone
289 name => $name,
290 attr => $parm{attribute},
291 %opt, # backward compatibility stuff
292 );
293 # We will have to use $parm{predicate} here
294 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295 }
296
297 sub dir {
298 $_[0]->{file};
299 }
300
301 =head2 C<$tb-E<gt>layout>
302
303 Returns the reference to the associated parser object.
304
305 =cut
306
307 sub layout { $_[0]->{layout} }
308
309 =head2 C<$tb-E<gt>fields>
310
311 Returns the array of attribute names.
312
313 =cut
314
315
316 sub fields { keys %{$_[0]->{inverted}}}
317
318 =head2 C<$tb-E<gt>drop>
319
320 Must be called via C<WAIT::Database::drop_table>
321
322 =cut
323
324 sub drop {
325 my $self = shift;
326
327 unless ($self->{write_lock}){
328 warn "Cannot drop table without write lock. Nothing done";
329 return;
330 }
331
332 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
333 $self->close; # just make sure
334
335 my $file = $self->{file};
336
337 for (values %{$self->{indexes}}) {
338 $_->drop;
339 }
340 unlink "$file/records";
341 rmdir "$file/read" or warn "Could not rmdir '$file/read'";
342
343 # $self->unlock;
344 ! (!-e $file or rmdir $file);
345 } else {
346 croak ref($self)."::drop called directly";
347 }
348 }
349
350 sub mrequire ($) {
351 my $module = shift;
352
353 $module =~ s{::}{/}g;
354 $module .= '.pm';
355 require $module;
356 }
357
358 sub open {
359 my $self = shift;
360 my $file = $self->{file} . '/records';
361
362 mrequire ref($self); # that's tricky eh?
363 if (defined $self->{'layout'}) {
364 mrequire ref($self->{'layout'});
365 }
366 if (defined $self->{'access'}) {
367 mrequire ref($self->{'access'});
368 }
369 if (exists $self->{indexes}) {
370 require WAIT::Index;
371 for (values %{$self->{indexes}}) {
372 $_->{mode} = $self->{mode};
373 }
374 }
375 if (exists $self->{inverted}) {
376 my ($att, $idx);
377 for $att (keys %{$self->{inverted}}) {
378 for $idx (@{$self->{inverted}->{$att}}) {
379 $idx->{mode} = $self->{mode};
380 }
381 }
382 require WAIT::InvertedIndex;
383 }
384
385 $self->getlock($self->{mode});
386
387 unless (defined $self->{dbh}) {
388 if ($USE_RECNO) {
389 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
390 $self->{mode}, 0664, $DB_RECNO);
391 } else {
392 $self->{dbh} =
393 tie(%{$self->{db}}, 'DB_File', $file,
394 $self->{mode}, 0664, $DB_BTREE);
395 }
396 }
397
398
399 $self;
400 }
401
402 sub fetch_extern {
403 my $self = shift;
404
405 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
406 if (exists $self->{'access'}) {
407 mrequire ref($self->{'access'});
408 $self->{'access'}->FETCH(@_);
409 }
410 }
411
412 sub fetch_extern_by_id {
413 my $self = shift;
414
415 $self->fetch_extern($self->fetch(@_));
416 }
417
418 sub _find_index {
419 my $self = shift;
420 my (@att) = @_;
421 my %att;
422 my $name;
423
424 @att{@att} = @att;
425
426 KEY: for $name (keys %{$self->{indexes}}) {
427 my @iat = split /-/, $name;
428 for (@iat) {
429 next KEY unless exists $att{$_};
430 }
431 return $self->{indexes}->{$name};
432 }
433 return undef;
434 }
435
436 sub have {
437 my $self = shift;
438 my %parm = @_;
439
440 my $index = $self->_find_index(keys %parm) or return; # no index-no have
441
442 defined $self->{db} or $self->open;
443 return $index->have(@_);
444 }
445
446 sub insert {
447 my $self = shift;
448 my %parm = @_;
449
450 defined $self->{db} or $self->open;
451
452 # We should move all writing methods to a subclass to check only once
453 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
454
455 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
456 my $key;
457 my @deleted = keys %{$self->{deleted}};
458 my $gotkey = 0;
459
460 if (@deleted) {
461 $key = pop @deleted;
462 delete $self->{deleted}->{$key};
463 # Sanity check
464 if ($key && $key>0) {
465 $gotkey=1;
466 } else {
467 warn(sprintf("WAIT database inconsistency during insert ".
468 "key[%s]: Please rebuild index\n",
469 $key
470 ));
471 }
472 }
473 unless ($gotkey) {
474 $key = $self->{nextk}++;
475 }
476 if ($USE_RECNO) {
477 $self->{db}->[$key] = $tuple;
478 } else {
479 $self->{db}->{$key} = $tuple;
480 }
481 for (values %{$self->{indexes}}) {
482 unless ($_->insert($key, %parm)) {
483 # duplicate key, undo changes
484 if ($key == $self->{nextk}-1) {
485 $self->{nextk}--;
486 } else {
487 # warn "setting key[$key] deleted during insert";
488 $self->{deleted}->{$key}=1;
489 }
490 my $idx;
491 for $idx (values %{$self->{indexes}}) {
492 last if $idx eq $_;
493 $idx->remove($key, %parm);
494 }
495 return undef;
496 }
497 }
498 if (defined $self->{inverted}) {
499 my $att;
500 for $att (keys %{$self->{inverted}}) {
501 if (defined $parm{$att}) {
502 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
503 #map $_->sync, @{$self->{inverted}->{$att}}
504 }
505 }
506 }
507 $key
508 }
509
510 sub sync {
511 my $self = shift;
512
513 for (values %{$self->{indexes}}) {
514 map $_->sync, $_;
515 }
516 if (defined $self->{inverted}) {
517 my $att;
518 for $att (keys %{$self->{inverted}}) {
519 map $_->sync, @{$self->{inverted}->{$att}}
520 }
521 }
522 }
523
524 sub fetch {
525 my $self = shift;
526 my $key = shift;
527
528 return () if exists $self->{deleted}->{$key};
529
530 defined $self->{db} or $self->open;
531 if ($USE_RECNO) {
532 $self->unpack($self->{db}->[$key]);
533 } else {
534 $self->unpack($self->{db}->{$key});
535 }
536 }
537
538 sub delete_by_key {
539 my $self = shift;
540 my $key = shift;
541
542 unless ($key) {
543 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
544 return;
545 }
546
547 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
548 my %tuple = $self->fetch($key);
549 for (values %{$self->{indexes}}) {
550 $_->delete($key, %tuple);
551 }
552 if (defined $self->{inverted}) {
553 # User *must* provide the full record for this or the entries
554 # in the inverted index will not be removed
555 %tuple = (%tuple, @_);
556 my $att;
557 for $att (keys %{$self->{inverted}}) {
558 if (defined $tuple{$att}) {
559 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
560 }
561 }
562 }
563 # warn "setting key[$key] deleted during delete_by_key";
564 ++$self->{deleted}->{$key};
565 }
566
567 sub delete {
568 my $self = shift;
569 my $tkey = $self->have(@_);
570 # warn "tkey[$tkey]\@_[@_]";
571 defined $tkey && $self->delete_by_key($tkey, @_);
572 }
573
574 sub unpack {
575 my($self, $tuple) = @_;
576
577 unless (defined $tuple){
578 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
579 warn("Debug: somebody called unpack without argument tuple!");
580 return;
581 }
582
583 my $att;
584 my @result;
585 my @tuple = split /$;/, $tuple;
586
587 for $att (@{$self->{attr}}) {
588 push @result, $att, shift @tuple;
589 }
590 @result;
591 }
592
593 sub set {
594 my ($self, $iattr, $value) = @_;
595
596 unless ($self->{write_lock}){
597 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
598 return;
599 }
600
601 # in the rare case that they haven't written a single record yet, we
602 # make sure, the inverted inherits our $self->{mode}:
603 defined $self->{db} or $self->open;
604
605 for my $att (keys %{$self->{inverted}}) {
606 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
607 my $idx;
608 for $idx (@{$self->{inverted}->{$att}}) {
609 $idx->set($iattr, $value);
610 }
611 } else {
612 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
613 }
614 }
615
616 1;
617 }
618
619 sub close {
620 my $self = shift;
621
622 if (exists $self->{'access'}) {
623 eval {$self->{'access'}->close}; # dont bother if not opened
624 }
625 if ($WAIT::Index::VERSION) {
626 for (values %{$self->{indexes}}) {
627 $_->close();
628 }
629 }
630 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
631 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
632 # if WAIT::InvertedIndex has not been loaded, they cannot have
633 # been altered so far
634 my $att;
635 for $att (keys %{$self->{inverted}}) {
636 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
637 my $idx;
638 for $idx (@{$self->{inverted}->{$att}}) {
639 $idx->close;
640 }
641 } else {
642 map $_->close(), @{$self->{inverted}->{$att}};
643 }
644 }
645 }
646 if ($self->{dbh}) {
647 delete $self->{dbh};
648
649 if ($USE_RECNO) {
650 untie @{$self->{db}};
651 } else {
652 untie %{$self->{db}};
653 }
654 delete $self->{db};
655 }
656
657 $self->unlock;
658
659 1;
660 }
661
662 # Locking
663 #
664 # We allow multiple readers to coexists. But write access excludes
665 # all read access and vice versa. In practice read access on tables
666 # open for writing will mostly work ;-)
667
668 # If a "write" lock is requested, an existing "read" lock will be
669 # released. If a "read" lock ist requested, an existing "write" lock
670 # will be released. Requiring a lock already hold has no effect.
671
672 sub getlock {
673 my ($self, $mode) = @_;
674
675 # autoclean cleans on DESTROY, stale sends SIGZERO to the owner
676 #
677 my $lockmgr = LockFile::Simple->make(-autoclean => 1, -stale => 1);
678 my $file = $self->{file} . '/records';
679 my $lockdir = $self->{file} . '/read';
680
681 unless (-d $lockdir) {
682 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
683 }
684
685 if ($mode & O_RDWR) { # Get a write lock. Release it again
686 # and die if there is any valid
687 # readers.
688
689 # Have a write lock already
690 return $self if $self->{write_lock};
691
692 if ($self->{read_lock}) { # We are a becoming a writer now. So
693 # we release the read lock to avoid
694 # blocking ourselves.
695 $self->{read_lock}->release;
696 delete $self->{read_lock};
697 }
698
699 # Get the preliminary write lock
700 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
701 or die "Can't lock '$self->{file}/write'";
702
703 # If we actually want to write we must check if there are any
704 # readers. The write lock is confirmed if wen cannot find any
705 # valid readers.
706
707 local *DIR;
708 opendir DIR, $lockdir or
709 die "Could not opendir '$lockdir': $!";
710 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
711 # Check if the locks are still valid. Since we are protected by
712 # a write lock, we could use a plain file. But we want to use
713 # the stale testing from LockFile::Simple.
714 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
715 warn "Removing stale lockfile '$lockdir/$lockfile'";
716 $lck->release;
717 } else { # Found an active reader, rats!
718 $self->{write_lock}->release;
719 die "Cannot write table '$file' while it's in use";
720 }
721 }
722 closedir DIR;
723 } else {
724 # Have a read lock already
725 return $self if $self->{read_lock};
726
727 # Get the preliminary write lock to protect the directory
728 # operations.
729
730 my $write_lock = $lockmgr->lock($self->{file} . '/read/write')
731 or die "Can't lock '$self->{file}/read/write'";
732
733 # Find a new read slot. Maybe the plain file would be better?
734 my $id = time;
735 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
736 $id++;
737 }
738
739 $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
740 or die "Can't lock '$lockdir/$id'";
741
742 # We are a reader now. So we release the write lock
743 $write_lock->release;
744 }
745 return $self;
746 }
747
748 sub unlock {
749 my $self = shift;
750
751 # Either we have a read or a write lock (or we close the table already)
752 # unless ($self->{read_lock} || $self->{write_lock}) {
753 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
754 # }
755 if ($self->{write_lock}) {
756 $self->{write_lock}->release();
757 delete $self->{write_lock};
758 }
759 if ($self->{read_lock}) {
760 $self->{read_lock}->release();
761 delete $self->{read_lock};
762 }
763
764 }
765
766 sub DESTROY {
767 my $self = shift;
768
769 if ($self->{write_lock} || $self->{read_lock}) {
770 warn "Table handle destroyed without closing it first";
771 $self->unlock;
772 }
773 }
774
775 sub open_scan {
776 my $self = shift;
777 my $code = shift;
778
779 $self->{dbh} or $self->open;
780 require WAIT::Scan;
781 new WAIT::Scan $self, $self->{nextk}-1, $code;
782 }
783
784 sub open_index_scan {
785 my $self = shift;
786 my $attr = shift;
787 my $code = shift;
788 my $name = join '-', @$attr;
789
790 if (defined $self->{indexes}->{$name}) {
791 $self->{indexes}->{$name}->open_scan($code);
792 } else {
793 croak "No such index '$name'";
794 }
795 }
796
797 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
798
799 sub prefix {
800 my ($self , $attr, $prefix) = @_;
801 my %result;
802
803 defined $self->{db} or $self->open; # require layout
804
805 for (@{$self->{inverted}->{$attr}}) {
806 my $result = $_->prefix($prefix);
807 if (defined $result) {
808 $result{$_->name} = $result;
809 }
810 }
811 bless \%result, 'WAIT::Query::Raw';
812 }
813
814 sub intervall {
815 my ($self, $attr, $lb, $ub) = @_;
816 my %result;
817
818 defined $self->{db} or $self->open; # require layout
819
820 for (@{$self->{inverted}->{$attr}}) {
821 my $result = $_->intervall($lb, $ub);
822 if (defined $result) {
823 $result{$_->name} = $result;
824 }
825 }
826 bless \%result, 'WAIT::Query::Raw';
827 }
828
829 sub search {
830 my $self = shift;
831 my ($query, $attr, $cont, $raw);
832 if (ref $_[0]) {
833 $query = shift;
834
835 $attr = $query->{attr};
836 $cont = $query->{cont};
837 $raw = $query->{raw};
838 } else {
839 require Carp;
840 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
841 $attr = shift;
842 $cont = shift;
843 $raw = shift;
844 $query = {
845 attr => $attr,
846 cont => $cont,
847 raw => $raw,
848 };
849 }
850
851 my %result;
852
853 defined $self->{db} or $self->open; # require layout
854
855 if ($raw) {
856 for (@{$self->{inverted}->{$attr}}) {
857 my $name = $_->name;
858 if (exists $raw->{$name} and @{$raw->{$name}}) {
859 my $scale = 1/scalar(@{$raw->{$name}});
860 my %r = $_->search_raw($query, @{$raw->{$name}});
861 my ($key, $val);
862 while (($key, $val) = each %r) {
863 if (exists $result{$key}) {
864 $result{$key} += $val*$scale;
865 } else {
866 $result{$key} = $val*$scale;
867 }
868 }
869 }
870 }
871 }
872 if (defined $cont and $cont ne '') {
873 for (@{$self->{inverted}->{$attr}}) {
874 my %r = $_->search($query, $cont);
875 my ($key, $val);
876 while (($key, $val) = each %r) {
877 if (exists $result{$key}) {
878 $result{$key} += $val;
879 } else {
880 $result{$key} = $val;
881 }
882 }
883 }
884 }
885 # sanity check for deleted documents.
886 # this should not be necessary !@#$
887 for (keys %result) {
888 delete $result{$_} if $self->{deleted}->{$_}
889 }
890 %result;
891 }
892
893 sub hilight_positions {
894 my ($self, $attr, $text, $query, $raw) = @_;
895 my %pos;
896
897 if (defined $raw) {
898 for (@{$self->{inverted}->{$attr}}) { # objects of type
899 # WAIT::InvertedIndex for
900 # this index field $attr
901 my $name = $_->name;
902 if (exists $raw->{$name}) {
903 my %qt;
904 grep $qt{$_}++, @{$raw->{$name}};
905 for ($_->parse_pos($text)) {
906 if (exists $qt{$_->[0]}) {
907 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
908 }
909 }
910 }
911 }
912 }
913 if (defined $query) {
914 for (@{$self->{inverted}->{$attr}}) {
915 my %qt;
916
917 grep $qt{$_}++, $_->parse($query);
918 for ($_->parse_pos($text)) {
919 if (exists $qt{$_->[0]}) {
920 if (exists $pos{$_->[1]}) { # perl -w ;-)
921 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
922 } else {
923 $pos{$_->[1]} = length($_->[0]);
924 }
925 }
926 }
927 }
928 }
929
930 \%pos;
931 }
932
933 sub hilight {
934 my ($tb, $buf, $qplain, $qraw) = @_;
935 my $layout = $tb->layout();
936
937 my @result;
938
939 $qplain ||= {};
940 $qraw ||= {};
941 my @ttxt = $layout->tag($buf);
942 while (@ttxt) {
943 no strict 'refs';
944 my %tag = %{shift @ttxt};
945 my $txt = shift @ttxt;
946 my $fld;
947
948 my %hl;
949 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
950 my $hp = $tb->hilight_positions($fld, $txt,
951 $qplain->{$fld}, $qraw->{$fld});
952 for (keys %$hp) {
953 if (exists $hl{$_}) { # -w ;-(
954 $hl{$_} = max($hl{$_}, $hp->{$_});
955 } else {
956 $hl{$_} = $hp->{$_};
957 }
958 }
959 }
960 my $pos;
961 my $qt = {_qt => 1, %tag};
962 my $pl = \%tag;
963 my $last = length($txt);
964 my @tmp;
965 for $pos (sort {$b <=> $a} keys %hl) {
966 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
967 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
968 $last = $pos;
969 }
970 push @result, $pl, substr($txt,0,$last);
971 push @result, @tmp;
972 }
973 @result; # no speed necessary
974 }
975
976 1;

Properties

Name Value
cvs2svn:cvs-rev 1.9

  ViewVC Help
Powered by ViewVC 1.1.26