/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 35 - (show annotations)
Sun Nov 12 17:00:27 2000 UTC (23 years, 6 months ago) by ulpfr
Original Path: cvs-head/lib/WAIT/Table.pm
File size: 25088 byte(s)
Moved locking stuff to new method getlock().  Checked for write lock
in drop().

1 # -*- Mode: Cperl -*-
2 # Table.pm --
3 # ITIID : $ITI$ $Header $__Header$
4 # Author : Ulrich Pfeifer
5 # Created On : Thu Aug 8 13:05:10 1996
6 # Last Modified By: Ulrich Pfeifer
7 # Last Modified On: Sun Nov 12 17:51:56 2000
8 # Language : CPerl
9 # Update Count : 148
10 # Status : Unknown, Use with caution!
11 #
12 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 #
14
15 =head1 NAME
16
17 WAIT::Table -- Module for maintaining Tables / Relations
18
19 =head1 SYNOPSIS
20
21 require WAIT::Table;
22
23 =head1 DESCRIPTION
24
25 =cut
26
27 package WAIT::Table;
28
29 use WAIT::Table::Handle ();
30 require WAIT::Parse::Base;
31
32 use strict;
33 use Carp;
34 # use autouse Carp => qw( croak($) );
35 use DB_File;
36 use Fcntl;
37 use LockFile::Simple ();
38
39 my $USE_RECNO = 0;
40
41 =head2 Creating a Table.
42
43 The constructor WAIT::Table-E<gt>new is normally called via the
44 create_table method of a database handle. This is not enforced, but
45 creating a table does not make any sense unless the table is
46 registered by the database because the latter implements persistence
47 of the meta data. Registering is done automatically by letting the
48 database handle the creation of a table.
49
50 my $db = WAIT::Database->create(name => 'sample');
51 my $tb = $db->create_table(name => 'test',
52 access => $access,
53 layout => $layout,
54 attr => ['docid', 'headline'],
55 );
56
57 The constructor returns a handle for the table. This handle is hidden by the
58 table module, to prevent direct access if called via Table.
59
60 =over 10
61
62 =item C<access> => I<accessobj>
63
64 A reference to an access object for the external parts (attributes) of
65 tuples. As you may remember, the WAIT System does not enforce that
66 objects are completely stored inside the system to avoid duplication.
67 There is no (strong) point in storing all your HTML documents inside
68 the system when indexing your WWW-Server.
69
70 The access object is designed to work like as a tied hash. You pass
71 the refernce to the object, not the tied hash though. An example
72 implementation of an access class that works for manpages is
73 WAIT::Document::Nroff.
74
75 The implementation needs to take into account that WAIT will keep this
76 object in a Data::Dumper or Storable database and re-use it when sman
77 is run. So it is not good enough if we can produce the index with it
78 now, when we create or actively access the table, WAIT also must be
79 able to retrieve documents on its own, when we are in a different
80 context. This happens specifically in a retrieval. To get this working
81 seemlessly, the access-defining class must implement a close method.
82 This method will be called before the Data::Dumper dump takes place.
83 In that moment the access-defining class must get rid of all data
84 structures that cannot be reconstructed via the Data::Dumper dump,
85 such as database handles or C pointers.
86
87 =item C<file> => I<fname>
88
89 The filename of the records file. Files for indexes will have I<fname>
90 as prefix. I<Mandatory>, but usually taken care of by the
91 WAIT::Database handle when the constructor is called via
92 WAIT::Database::create_table().
93
94 =item C<name> => I<name>
95
96 The name of this table. I<Mandatory>
97
98 =item C<attr> => [ I<attr> ... ]
99
100 A reference to an array of attribute names. WAIT will keep the
101 contents of these attributes in its table. I<Mandatory>
102
103 =item C<djk> => [ I<attr> ... ]
104
105 A reference to an array of attribute names which make up the
106 I<disjointness key>. Don't think about it - it's of no use yet;
107
108 =item C<layout> => I<layoutobj>
109
110 A reference to an external parser object. Defaults to a new instance
111 of C<WAIT::Parse::Base>. For an example implementation see
112 WAIT::Parse::Nroff. A layout class can be implemented as a singleton
113 class if you so like.
114
115 =item C<keyset> => I<keyset>
116
117 The set of attributes needed to identify a record. Defaults to all
118 attributes.
119
120 =item C<invindex> => I<inverted index>
121
122 A reference to an anon array defining attributes of each record that
123 need to be indexed. See the source of smakewhatis for how to set this
124 up.
125
126 =back
127
128 =cut
129
130 sub new {
131 my $type = shift;
132 my %parm = @_;
133 my $self = {};
134
135 # Check for mandatory attrs early
136 $self->{name} = $parm{name} or croak "No name specified";
137 $self->{attr} = $parm{attr} or croak "No attributes specified";
138
139 # Do that before we eventually add '_weight' to attributes.
140 $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
141
142 $self->{mode} = O_CREAT | O_RDWR;
143
144 # Determine and set up subclass
145 $type = ref($type) || $type;
146 if (defined $parm{djk}) {
147 if (@{$parm{djk}} == @{$parm{attr}}) {
148 # All attributes in DK (sloppy test here!)
149 $type .= '::Independent';
150 require WAIT::Table::Independent;
151 } else {
152 $type .= '::Disjoint';
153 require WAIT::Table::Disjoint;
154 }
155 # Add '_weight' to attributes
156 my %attr;
157 @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
158 unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
159 }
160
161 $self->{file} = $parm{file} or croak "No file specified";
162 if (-d $self->{file}){
163 warn "Warning: Directory '$self->{file}' already exists\n";
164 } elsif (!mkdir($self->{file}, 0775)) {
165 croak "Could not 'mkdir $self->{file}': $!\n";
166 }
167
168 $self->{djk} = $parm{djk} if defined $parm{djk};
169 $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
170 $self->{access} = $parm{access} if defined $parm{access};
171 $self->{nextk} = 1; # next record to insert; first record unused
172 $self->{deleted} = {}; # no deleted records yet
173 $self->{indexes} = {};
174
175 bless $self, $type;
176
177 # Checking for readers is not necessary, but let's go with the
178 # generic method.
179 $self->getlock(O_RDWR|O_CREAT); # dies when failing
180
181 # Call create_index() and create_index() for compatibility
182 for (@{$self->{keyset}||[]}) {
183 #carp "Specification of indexes at table create time is deprecated";
184 $self->create_index(@$_);
185 }
186 while (@{$parm{invindex}||[]}) {
187 # carp "Specification of inverted indexes at table create time is deprecated";
188 my $att = shift @{$parm{invindex}};
189 my @spec = @{shift @{$parm{invindex}}};
190 my @opt;
191
192 if (ref($spec[0])) {
193 carp "Secondary pipelines are deprecated\n";
194 @opt = %{shift @spec};
195 }
196 $self->create_inverted_index(attribute => $att, pipeline => \@spec, @opt);
197 }
198
199 $self;
200 # end of backwarn compatibility stuff
201 }
202
203 =head2 Creating an index
204
205 $tb->create_index('docid');
206
207 =item C<create_index>
208
209 must be called with a list of attributes. This must be a subset of the
210 attributes specified when the table was created. Currently this
211 method must be called before the first tuple is inserted in the
212 table!
213
214 =cut
215
216 sub create_index {
217 my $self= shift;
218
219 croak "Cannot create index for table aready populated"
220 if $self->{nextk} > 1;
221
222 require WAIT::Index;
223
224 my $name = join '-', @_;
225 $self->{indexes}->{$name} =
226 new WAIT::Index file => $self->{file}.'/'.$name, attr => $_;
227 }
228
229 =head2 Creating an inverted index
230
231 $tb->create_inverted_index
232 (attribute => 'au',
233 pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
234 predicate => 'plain',
235 );
236
237 =over 5
238
239 =item C<attribute>
240
241 The attribute to build the index on. This attribute may not be in the
242 set attributes specified when the table was created.
243
244 =item C<pipeline>
245
246 A piplines specification is a reference to an array of method names
247 (from package C<WAIT::Filter>) which are to be applied in sequence to
248 the contents of the named attribute. The attribute name may not be in
249 the attribute list.
250
251 =item C<predicate>
252
253 An indication which predicate the index implements. This may be
254 e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
255 query processing. Currently there is no standard set of predicate
256 names. The predicate defaults to the last member of the pipeline if
257 omitted.
258
259 =back
260
261 Currently this method must be called before the first tuple is
262 inserted in the table!
263
264 =cut
265
266 sub create_inverted_index {
267 my $self = shift;
268 my %parm = @_;
269
270 croak "No attribute specified" unless $parm{attribute};
271 croak "No pipeline specified" unless $parm{pipeline};
272
273 $parm{predicate} ||= $parm{pipeline}->[-1];
274
275 croak "Cannot create index for table aready populated"
276 if $self->{nextk} > 1;
277
278 require WAIT::InvertedIndex;
279
280 # backward compatibility stuff
281 my %opt = %parm;
282 for (qw(attribute pipeline predicate)) {
283 delete $opt{$_};
284 }
285
286 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
287 my $idx = new WAIT::InvertedIndex(file => $self->{file}.'/'.$name,
288 filter => [@{$parm{pipeline}}], # clone
289 name => $name,
290 attr => $parm{attribute},
291 %opt, # backward compatibility stuff
292 );
293 # We will have to use $parm{predicate} here
294 push @{$self->{inverted}->{$parm{attribute}}}, $idx;
295 }
296
297 sub dir {
298 $_[0]->{file};
299 }
300
301 =head2 C<$tb-E<gt>layout>
302
303 Returns the reference to the associated parser object.
304
305 =cut
306
307 sub layout { $_[0]->{layout} }
308
309 =head2 C<$tb-E<gt>fields>
310
311 Returns the array of attribute names.
312
313 =cut
314
315
316 sub fields { keys %{$_[0]->{inverted}}}
317
318 =head2 C<$tb-E<gt>drop>
319
320 Must be called via C<WAIT::Database::drop_table>
321
322 =cut
323
324 sub drop {
325 my $self = shift;
326
327 unless ($self->{write_lock}){
328 warn "Cannot drop table without write lock. Nothing done";
329 return;
330 }
331
332 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
333 $self->close; # just make sure
334
335 my $file = $self->{file};
336
337 for (values %{$self->{indexes}}) {
338 $_->drop;
339 }
340 unlink "$file/records";
341 rmdir "$file/read" or warn "Could not rmdir '$file/read'";
342
343 # $self->unlock;
344 ! (!-e $file or rmdir $file);
345 } else {
346 croak ref($self)."::drop called directly";
347 }
348 }
349
350 sub mrequire ($) {
351 my $module = shift;
352
353 $module =~ s{::}{/}g;
354 $module .= '.pm';
355 require $module;
356 }
357
358 sub open {
359 my $self = shift;
360 my $file = $self->{file} . '/records';
361
362 mrequire ref($self); # that's tricky eh?
363 if (defined $self->{'layout'}) {
364 mrequire ref($self->{'layout'});
365 }
366 if (defined $self->{'access'}) {
367 mrequire ref($self->{'access'});
368 }
369 if (exists $self->{indexes}) {
370 require WAIT::Index;
371 for (values %{$self->{indexes}}) {
372 $_->{mode} = $self->{mode};
373 }
374 }
375 if (exists $self->{inverted}) {
376 my ($att, $idx);
377 for $att (keys %{$self->{inverted}}) {
378 for $idx (@{$self->{inverted}->{$att}}) {
379 $idx->{mode} = $self->{mode};
380 }
381 }
382 require WAIT::InvertedIndex;
383 }
384 unless (defined $self->{dbh}) {
385 if ($USE_RECNO) {
386 $self->{dbh} = tie(@{$self->{db}}, 'DB_File', $file,
387 $self->{mode}, 0664, $DB_RECNO);
388 } else {
389 $self->{dbh} =
390 tie(%{$self->{db}}, 'DB_File', $file,
391 $self->{mode}, 0664, $DB_BTREE);
392 }
393 }
394
395 $self->getlock($self->{mode});
396
397 $self;
398 }
399
400 sub fetch_extern {
401 my $self = shift;
402
403 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
404 if (exists $self->{'access'}) {
405 mrequire ref($self->{'access'});
406 $self->{'access'}->FETCH(@_);
407 }
408 }
409
410 sub fetch_extern_by_id {
411 my $self = shift;
412
413 $self->fetch_extern($self->fetch(@_));
414 }
415
416 sub _find_index {
417 my $self = shift;
418 my (@att) = @_;
419 my %att;
420 my $name;
421
422 @att{@att} = @att;
423
424 KEY: for $name (keys %{$self->{indexes}}) {
425 my @iat = split /-/, $name;
426 for (@iat) {
427 next KEY unless exists $att{$_};
428 }
429 return $self->{indexes}->{$name};
430 }
431 return undef;
432 }
433
434 sub have {
435 my $self = shift;
436 my %parm = @_;
437
438 my $index = $self->_find_index(keys %parm) or return; # no index-no have
439
440 defined $self->{db} or $self->open;
441 return $index->have(@_);
442 }
443
444 sub insert {
445 my $self = shift;
446 my %parm = @_;
447
448 defined $self->{db} or $self->open;
449
450 # We should move all writing methods to a subclass to check only once
451 $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
452
453 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
454 my $key;
455 my @deleted = keys %{$self->{deleted}};
456 my $gotkey = 0;
457
458 if (@deleted) {
459 $key = pop @deleted;
460 delete $self->{deleted}->{$key};
461 # Sanity check
462 if ($key && $key>0) {
463 $gotkey=1;
464 } else {
465 warn(sprintf("WAIT database inconsistency during insert ".
466 "key[%s]: Please rebuild index\n",
467 $key
468 ));
469 }
470 }
471 unless ($gotkey) {
472 $key = $self->{nextk}++;
473 }
474 if ($USE_RECNO) {
475 $self->{db}->[$key] = $tuple;
476 } else {
477 $self->{db}->{$key} = $tuple;
478 }
479 for (values %{$self->{indexes}}) {
480 unless ($_->insert($key, %parm)) {
481 # duplicate key, undo changes
482 if ($key == $self->{nextk}-1) {
483 $self->{nextk}--;
484 } else {
485 # warn "setting key[$key] deleted during insert";
486 $self->{deleted}->{$key}=1;
487 }
488 my $idx;
489 for $idx (values %{$self->{indexes}}) {
490 last if $idx eq $_;
491 $idx->remove($key, %parm);
492 }
493 return undef;
494 }
495 }
496 if (defined $self->{inverted}) {
497 my $att;
498 for $att (keys %{$self->{inverted}}) {
499 if (defined $parm{$att}) {
500 map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
501 #map $_->sync, @{$self->{inverted}->{$att}}
502 }
503 }
504 }
505 $key
506 }
507
508 sub sync {
509 my $self = shift;
510
511 for (values %{$self->{indexes}}) {
512 map $_->sync, $_;
513 }
514 if (defined $self->{inverted}) {
515 my $att;
516 for $att (keys %{$self->{inverted}}) {
517 map $_->sync, @{$self->{inverted}->{$att}}
518 }
519 }
520 }
521
522 sub fetch {
523 my $self = shift;
524 my $key = shift;
525
526 return () if exists $self->{deleted}->{$key};
527
528 defined $self->{db} or $self->open;
529 if ($USE_RECNO) {
530 $self->unpack($self->{db}->[$key]);
531 } else {
532 $self->unpack($self->{db}->{$key});
533 }
534 }
535
536 sub delete_by_key {
537 my $self = shift;
538 my $key = shift;
539
540 unless ($key) {
541 Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
542 return;
543 }
544
545 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
546 my %tuple = $self->fetch($key);
547 for (values %{$self->{indexes}}) {
548 $_->delete($key, %tuple);
549 }
550 if (defined $self->{inverted}) {
551 # User *must* provide the full record for this or the entries
552 # in the inverted index will not be removed
553 %tuple = (%tuple, @_);
554 my $att;
555 for $att (keys %{$self->{inverted}}) {
556 if (defined $tuple{$att}) {
557 map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
558 }
559 }
560 }
561 # warn "setting key[$key] deleted during delete_by_key";
562 ++$self->{deleted}->{$key};
563 }
564
565 sub delete {
566 my $self = shift;
567 my $tkey = $self->have(@_);
568 # warn "tkey[$tkey]\@_[@_]";
569 defined $tkey && $self->delete_by_key($tkey, @_);
570 }
571
572 sub unpack {
573 my($self, $tuple) = @_;
574
575 unless (defined $tuple){
576 # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
577 warn("Debug: somebody called unpack without argument tuple!");
578 return;
579 }
580
581 my $att;
582 my @result;
583 my @tuple = split /$;/, $tuple;
584
585 for $att (@{$self->{attr}}) {
586 push @result, $att, shift @tuple;
587 }
588 @result;
589 }
590
591 sub set {
592 my ($self, $iattr, $value) = @_;
593
594 unless ($self->{write_lock}){
595 warn "Cannot set iattr[$iattr] without write lock. Nothing done";
596 return;
597 }
598 for my $att (keys %{$self->{inverted}}) {
599 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
600 my $idx;
601 for $idx (@{$self->{inverted}->{$att}}) {
602 $idx->set($iattr, $value);
603 }
604 } else {
605 map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
606 }
607 }
608
609 1;
610 }
611
612 sub close {
613 my $self = shift;
614
615 if (exists $self->{'access'}) {
616 eval {$self->{'access'}->close}; # dont bother if not opened
617 }
618 if ($WAIT::Index::VERSION) {
619 for (values %{$self->{indexes}}) {
620 $_->close();
621 }
622 }
623 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
624 # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
625 # if WAIT::InvertedIndex has not been loaded, they cannot have
626 # been altered so far
627 my $att;
628 for $att (keys %{$self->{inverted}}) {
629 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
630 my $idx;
631 for $idx (@{$self->{inverted}->{$att}}) {
632 $idx->close;
633 }
634 } else {
635 map $_->close(), @{$self->{inverted}->{$att}};
636 }
637 }
638 }
639 if ($self->{dbh}) {
640 delete $self->{dbh};
641
642 if ($USE_RECNO) {
643 untie @{$self->{db}};
644 } else {
645 untie %{$self->{db}};
646 }
647 delete $self->{db};
648 }
649
650 $self->unlock;
651
652 1;
653 }
654
655 # Locking
656 #
657 # We allow multiple readers to coexists. But write access excludes
658 # all read access and vice versa. In practice read access on tables
659 # open for writing will mostly work ;-)
660
661 # If a "write" lock is requested, an existing "read" lock will be
662 # released. If a "read" lock ist requested, an existing "write" lock
663 # will be released. Requiring a lock already hold has no effect.
664
665 sub getlock {
666 my ($self, $mode) = @_;
667
668 my $lockmgr = LockFile::Simple->make(-autoclean => 1);
669 my $file = $self->{file} . '/records';
670 my $lockdir = $self->{file} . '/read';
671
672 unless (-d $lockdir) {
673 mkdir $lockdir, 0755 or die "Could not mkdir $lockdir: $!";
674 }
675
676 if ($mode & O_RDWR) { # Get a write lock. Release it again
677 # and die if there is any valid
678 # readers.
679
680 # Have a write lock already
681 return $self if $self->{write_lock};
682
683 if ($self->{read_lock}) { # We are a becoming a writer now. So
684 # we release the read lock to avoid
685 # blocking ourselves.
686 $self->{read_lock}->release;
687 delete $self->{read_lock};
688 }
689
690 # Get the preliminary write lock
691 $self->{write_lock} = $lockmgr->lock($self->{file} . '/write')
692 or die "Can't lock '$self->{file}/write'";
693
694 # If we actually want to write we must check if there are any
695 # readers. The write lock is confirmed if wen cannot find any
696 # valid readers.
697
698 local *DIR;
699 opendir DIR, $lockdir or
700 die "Could not opendir '$lockdir': $!";
701 for my $lockfile (grep { -f "$lockdir/$_" } readdir DIR) {
702 # Check if the locks are still valid. Since we are protected by
703 # a write lock, we could use a plain file. But we want to use
704 # the stale testing from LockFile::Simple.
705 if (my $lck = $lockmgr->trylock("$lockdir/$lockfile")) {
706 warn "Removing stale lockfile '$lockdir/$lockfile'";
707 $lck->release;
708 } else { # Found an active reader, rats!
709 $self->{write_lock}->release;
710 die "Cannot write table '$file' while it's in use";
711 }
712 }
713 closedir DIR;
714 } else {
715 # Have a read lock already
716 return $self if $self->{read_lock};
717
718 # Get the preliminary write lock to protect the directory
719 # operations. If we already have a write lock, it will go.
720
721 $self->{write_lock} ||= $lockmgr->lock($self->{file} . '/write')
722 or die "Can't lock '$self->{file}/write'";
723
724 # Find a new read slot. Maybe the plain file would be better?
725 my $id = time;
726 while (-f "$lockdir/$id.lock") { # here assume ".lock" format!
727 $id++;
728 }
729
730 $self->{read_lock} = $lockmgr->lock("$lockdir/$id")
731 or die "Can't lock '$lockdir/$id'";
732
733 # We are a reader now. So we release the write lock
734 $self->{write_lock}->release;
735 delete $self->{write_lock};
736 }
737 return $self;
738 }
739
740 sub unlock {
741 my $self = shift;
742
743 # Either we have a read or a write lock (or we close the table already)
744 # unless ($self->{read_lock} || $self->{write_lock}) {
745 # warn "WAIT::Table::unlock: Table aparently hold's no lock"
746 # }
747 if ($self->{write_lock}) {
748 $self->{write_lock}->release();
749 delete $self->{write_lock};
750 }
751 if ($self->{read_lock}) {
752 $self->{read_lock}->release();
753 delete $self->{read_lock};
754 }
755
756 }
757
758 sub DESTROY {
759 my $self = shift;
760
761 if ($self->{write_lock} || $self->{read_lock}) {
762 warn "Table handle destroyed without closing it first";
763 $self->unlock;
764 }
765 }
766
767 sub open_scan {
768 my $self = shift;
769 my $code = shift;
770
771 $self->{dbh} or $self->open;
772 require WAIT::Scan;
773 new WAIT::Scan $self, $self->{nextk}-1, $code;
774 }
775
776 sub open_index_scan {
777 my $self = shift;
778 my $attr = shift;
779 my $code = shift;
780 my $name = join '-', @$attr;
781
782 if (defined $self->{indexes}->{$name}) {
783 $self->{indexes}->{$name}->open_scan($code);
784 } else {
785 croak "No such index '$name'";
786 }
787 }
788
789 eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
790
791 sub prefix {
792 my ($self , $attr, $prefix) = @_;
793 my %result;
794
795 defined $self->{db} or $self->open; # require layout
796
797 for (@{$self->{inverted}->{$attr}}) {
798 my $result = $_->prefix($prefix);
799 if (defined $result) {
800 $result{$_->name} = $result;
801 }
802 }
803 bless \%result, 'WAIT::Query::Raw';
804 }
805
806 sub intervall {
807 my ($self, $attr, $lb, $ub) = @_;
808 my %result;
809
810 defined $self->{db} or $self->open; # require layout
811
812 for (@{$self->{inverted}->{$attr}}) {
813 my $result = $_->intervall($lb, $ub);
814 if (defined $result) {
815 $result{$_->name} = $result;
816 }
817 }
818 bless \%result, 'WAIT::Query::Raw';
819 }
820
821 sub search {
822 my $self = shift;
823 my ($query, $attr, $cont, $raw);
824 if (ref $_[0]) {
825 $query = shift;
826
827 $attr = $query->{attr};
828 $cont = $query->{cont};
829 $raw = $query->{raw};
830 } else {
831 require Carp;
832 Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
833 $attr = shift;
834 $cont = shift;
835 $raw = shift;
836 $query = {
837 attr => $attr,
838 cont => $cont,
839 raw => $raw,
840 };
841 }
842
843 my %result;
844
845 defined $self->{db} or $self->open; # require layout
846
847 if ($raw) {
848 for (@{$self->{inverted}->{$attr}}) {
849 my $name = $_->name;
850 if (exists $raw->{$name} and @{$raw->{$name}}) {
851 my $scale = 1/scalar(@{$raw->{$name}});
852 my %r = $_->search_raw($query, @{$raw->{$name}});
853 my ($key, $val);
854 while (($key, $val) = each %r) {
855 if (exists $result{$key}) {
856 $result{$key} += $val*$scale;
857 } else {
858 $result{$key} = $val*$scale;
859 }
860 }
861 }
862 }
863 }
864 if (defined $cont and $cont ne '') {
865 for (@{$self->{inverted}->{$attr}}) {
866 my %r = $_->search($query, $cont);
867 my ($key, $val);
868 while (($key, $val) = each %r) {
869 if (exists $result{$key}) {
870 $result{$key} += $val;
871 } else {
872 $result{$key} = $val;
873 }
874 }
875 }
876 }
877 # sanity check for deleted documents.
878 # this should not be necessary !@#$
879 for (keys %result) {
880 delete $result{$_} if $self->{deleted}->{$_}
881 }
882 %result;
883 }
884
885 sub hilight_positions {
886 my ($self, $attr, $text, $query, $raw) = @_;
887 my %pos;
888
889 if (defined $raw) {
890 for (@{$self->{inverted}->{$attr}}) { # objects of type
891 # WAIT::InvertedIndex for
892 # this index field $attr
893 my $name = $_->name;
894 if (exists $raw->{$name}) {
895 my %qt;
896 grep $qt{$_}++, @{$raw->{$name}};
897 for ($_->parse_pos($text)) {
898 if (exists $qt{$_->[0]}) {
899 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
900 }
901 }
902 }
903 }
904 }
905 if (defined $query) {
906 for (@{$self->{inverted}->{$attr}}) {
907 my %qt;
908
909 grep $qt{$_}++, $_->parse($query);
910 for ($_->parse_pos($text)) {
911 if (exists $qt{$_->[0]}) {
912 if (exists $pos{$_->[1]}) { # perl -w ;-)
913 $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
914 } else {
915 $pos{$_->[1]} = length($_->[0]);
916 }
917 }
918 }
919 }
920 }
921
922 \%pos;
923 }
924
925 sub hilight {
926 my ($tb, $buf, $qplain, $qraw) = @_;
927 my $layout = $tb->layout();
928
929 my @result;
930
931 $qplain ||= {};
932 $qraw ||= {};
933 my @ttxt = $layout->tag($buf);
934 while (@ttxt) {
935 no strict 'refs';
936 my %tag = %{shift @ttxt};
937 my $txt = shift @ttxt;
938 my $fld;
939
940 my %hl;
941 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
942 my $hp = $tb->hilight_positions($fld, $txt,
943 $qplain->{$fld}, $qraw->{$fld});
944 for (keys %$hp) {
945 if (exists $hl{$_}) { # -w ;-(
946 $hl{$_} = max($hl{$_}, $hp->{$_});
947 } else {
948 $hl{$_} = $hp->{$_};
949 }
950 }
951 }
952 my $pos;
953 my $qt = {_qt => 1, %tag};
954 my $pl = \%tag;
955 my $last = length($txt);
956 my @tmp;
957 for $pos (sort {$b <=> $a} keys %hl) {
958 unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
959 unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
960 $last = $pos;
961 }
962 push @result, $pl, substr($txt,0,$last);
963 push @result, @tmp;
964 }
965 @result; # no speed necessary
966 }
967
968 1;

Properties

Name Value
cvs2svn:cvs-rev 1.5

  ViewVC Help
Powered by ViewVC 1.1.26