/[wait]/trunk/lib/WAIT/Table.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Table.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 111 - (hide annotations)
Tue Jul 13 19:06:46 2004 UTC (19 years, 10 months ago) by dpavlin
File size: 23735 byte(s)
change from $self->file to $self->path

1 ulpfr 13 # -*- Mode: Cperl -*-
2 ulpfr 10 # Table.pm --
3     # ITIID : $ITI$ $Header $__Header$
4     # Author : Ulrich Pfeifer
5     # Created On : Thu Aug 8 13:05:10 1996
6     # Last Modified By: Ulrich Pfeifer
7 dpavlin 89 # Last Modified On: Wed Jan 23 14:15:15 2002
8 ulpfr 10 # Language : CPerl
9 dpavlin 89 # Update Count : 152
10 ulpfr 10 # Status : Unknown, Use with caution!
11 ulpfr 13 #
12 ulpfr 10 # Copyright (c) 1996-1997, Ulrich Pfeifer
13 ulpfr 13 #
14 ulpfr 10
15     =head1 NAME
16    
17     WAIT::Table -- Module for maintaining Tables / Relations
18    
19     =head1 SYNOPSIS
20    
21     require WAIT::Table;
22    
23     =head1 DESCRIPTION
24    
25     =cut
26    
27     package WAIT::Table;
28 dpavlin 108 our $VERSION = "2.000";
29 ulpfr 13
30     use WAIT::Table::Handle ();
31 ulpfr 10 require WAIT::Parse::Base;
32 ulpfr 13
33 ulpfr 10 use strict;
34     use Carp;
35 ulpfr 13 # use autouse Carp => qw( croak($) );
36 dpavlin 108 use BerkeleyDB;
37 ulpfr 10 use Fcntl;
38    
39     =head2 Creating a Table.
40    
41 ulpfr 13 The constructor WAIT::Table-E<gt>new is normally called via the
42 ulpfr 10 create_table method of a database handle. This is not enforced, but
43 ulpfr 13 creating a table does not make any sense unless the table is
44 ulpfr 10 registered by the database because the latter implements persistence
45     of the meta data. Registering is done automatically by letting the
46 ulpfr 13 database handle the creation of a table.
47 ulpfr 10
48 ulpfr 13 my $db = WAIT::Database->create(name => 'sample');
49     my $tb = $db->create_table(name => 'test',
50     access => $access,
51     layout => $layout,
52     attr => ['docid', 'headline'],
53     );
54 ulpfr 10
55     The constructor returns a handle for the table. This handle is hidden by the
56     table module, to prevent direct access if called via Table.
57    
58     =over 10
59    
60 ulpfr 13 =item C<access> => I<accessobj>
61 ulpfr 10
62 ulpfr 13 A reference to an access object for the external parts (attributes) of
63 ulpfr 10 tuples. As you may remember, the WAIT System does not enforce that
64     objects are completely stored inside the system to avoid duplication.
65 ulpfr 13 There is no (strong) point in storing all your HTML documents inside
66 ulpfr 10 the system when indexing your WWW-Server.
67    
68 ulpfr 13 The access object is designed to work like as a tied hash. You pass
69     the refernce to the object, not the tied hash though. An example
70     implementation of an access class that works for manpages is
71     WAIT::Document::Nroff.
72    
73     The implementation needs to take into account that WAIT will keep this
74     object in a Data::Dumper or Storable database and re-use it when sman
75     is run. So it is not good enough if we can produce the index with it
76     now, when we create or actively access the table, WAIT also must be
77     able to retrieve documents on its own, when we are in a different
78     context. This happens specifically in a retrieval. To get this working
79     seemlessly, the access-defining class must implement a close method.
80     This method will be called before the Data::Dumper dump takes place.
81     In that moment the access-defining class must get rid of all data
82     structures that cannot be reconstructed via the Data::Dumper dump,
83     such as database handles or C pointers.
84    
85 ulpfr 10 =item C<file> => I<fname>
86    
87     The filename of the records file. Files for indexes will have I<fname>
88 ulpfr 13 as prefix. I<Mandatory>, but usually taken care of by the
89     WAIT::Database handle when the constructor is called via
90     WAIT::Database::create_table().
91 ulpfr 10
92     =item C<name> => I<name>
93    
94     The name of this table. I<Mandatory>
95    
96     =item C<attr> => [ I<attr> ... ]
97    
98 ulpfr 13 A reference to an array of attribute names. WAIT will keep the
99     contents of these attributes in its table. I<Mandatory>
100 ulpfr 10
101     =item C<djk> => [ I<attr> ... ]
102    
103     A reference to an array of attribute names which make up the
104 ulpfr 13 I<disjointness key>. Don't think about it - it's of no use yet;
105 ulpfr 10
106     =item C<layout> => I<layoutobj>
107    
108 ulpfr 13 A reference to an external parser object. Defaults to a new instance
109     of C<WAIT::Parse::Base>. For an example implementation see
110     WAIT::Parse::Nroff. A layout class can be implemented as a singleton
111     class if you so like.
112 ulpfr 10
113 ulpfr 13 =item C<keyset> => I<keyset>
114 ulpfr 10
115 ulpfr 13 The set of attributes needed to identify a record. Defaults to all
116     attributes.
117 ulpfr 10
118 ulpfr 13 =item C<invindex> => I<inverted index>
119    
120     A reference to an anon array defining attributes of each record that
121     need to be indexed. See the source of smakewhatis for how to set this
122     up.
123    
124 ulpfr 10 =back
125    
126     =cut
127    
128     sub new {
129     my $type = shift;
130     my %parm = @_;
131     my $self = {};
132    
133 ulpfr 13 # Check for mandatory attrs early
134 dpavlin 108 for my $x (qw(name attr env maindbfile tablename)) {
135     $self->{$x} = $parm{$x} or croak "No $x specified";
136     }
137 ulpfr 13
138 ulpfr 10 # Do that before we eventually add '_weight' to attributes.
139     $self->{keyset} = $parm{keyset} || [[@{$parm{attr}}]];
140 ulpfr 13
141 ulpfr 10 $self->{mode} = O_CREAT | O_RDWR;
142 ulpfr 13
143 ulpfr 10 # Determine and set up subclass
144     $type = ref($type) || $type;
145     if (defined $parm{djk}) {
146     if (@{$parm{djk}} == @{$parm{attr}}) {
147     # All attributes in DK (sloppy test here!)
148     $type .= '::Independent';
149     require WAIT::Table::Independent;
150     } else {
151     $type .= '::Disjoint';
152     require WAIT::Table::Disjoint;
153     }
154     # Add '_weight' to attributes
155     my %attr;
156     @attr{@{$parm{attr}}} = (1) x @{$parm{attr}};
157     unshift @{$parm{attr}}, '_weight' unless $attr{'_weight'};
158     }
159    
160 dpavlin 108 $self->{path} = $parm{path} or croak "No path specified";
161     bless $self, $type;
162 ulpfr 19
163 ulpfr 10 $self->{djk} = $parm{djk} if defined $parm{djk};
164     $self->{layout} = $parm{layout} || new WAIT::Parse::Base;
165     $self->{access} = $parm{access} if defined $parm{access};
166     $self->{nextk} = 1; # next record to insert; first record unused
167     $self->{deleted} = {}; # no deleted records yet
168     $self->{indexes} = {};
169    
170 ulpfr 35 # Checking for readers is not necessary, but let's go with the
171     # generic method.
172    
173 ulpfr 10 # Call create_index() and create_index() for compatibility
174     for (@{$self->{keyset}||[]}) {
175     #carp "Specification of indexes at table create time is deprecated";
176     $self->create_index(@$_);
177     }
178     while (@{$parm{invindex}||[]}) {
179     # carp "Specification of inverted indexes at table create time is deprecated";
180     my $att = shift @{$parm{invindex}};
181     my @spec = @{shift @{$parm{invindex}}};
182 dpavlin 108 my @opt = ();
183 ulpfr 13
184 ulpfr 10 if (ref($spec[0])) {
185 dpavlin 108 warn "Secondary pipelines are deprecated";
186 ulpfr 10 @opt = %{shift @spec};
187     }
188 dpavlin 108 $self->create_inverted_index(attribute => $att,
189     pipeline => \@spec,
190     @opt);
191 ulpfr 10 }
192 ulpfr 19
193 ulpfr 10 $self;
194     # end of backwarn compatibility stuff
195     }
196    
197 dpavlin 108 for my $accessor (qw(maindbfile tablename)) {
198     no strict 'refs';
199     *{$accessor} = sub {
200     my($self) = @_;
201     return $self->{$accessor} if $self->{$accessor};
202     require Carp;
203     Carp::confess("accessor $accessor not there");
204     }
205     }
206    
207 ulpfr 10 =head2 Creating an index
208    
209     $tb->create_index('docid');
210    
211 dpavlin 109 C<create_index>
212 ulpfr 10 must be called with a list of attributes. This must be a subset of the
213     attributes specified when the table was created. Currently this
214     method must be called before the first tuple is inserted in the
215     table!
216    
217     =cut
218    
219     sub create_index {
220     my $self= shift;
221 ulpfr 13
222 ulpfr 10 croak "Cannot create index for table aready populated"
223     if $self->{nextk} > 1;
224 ulpfr 13
225 ulpfr 10 require WAIT::Index;
226 ulpfr 13
227 ulpfr 10 my $name = join '-', @_;
228 dpavlin 108 #### warn "WARNING: Suspect use of \$_ in method create_index. name[$name]_[$_]";
229 ulpfr 10 $self->{indexes}->{$name} =
230 dpavlin 108 WAIT::Index->new(
231 dpavlin 111 file => $self->path.'/'.$name,
232 dpavlin 108 subname => $name,
233     env => $self->{env},
234     maindbfile => $self->maindbfile,
235     tablename => $self->tablename,
236     attr => $_,
237     );
238 ulpfr 10 }
239    
240     =head2 Creating an inverted index
241    
242     $tb->create_inverted_index
243     (attribute => 'au',
244     pipeline => ['detex', 'isotr', 'isolc', 'split2', 'stop'],
245     predicate => 'plain',
246     );
247    
248     =over 5
249    
250     =item C<attribute>
251    
252     The attribute to build the index on. This attribute may not be in the
253     set attributes specified when the table was created.
254    
255     =item C<pipeline>
256    
257 ulpfr 13 A piplines specification is a reference to an array of method names
258     (from package C<WAIT::Filter>) which are to be applied in sequence to
259     the contents of the named attribute. The attribute name may not be in
260     the attribute list.
261 ulpfr 10
262     =item C<predicate>
263    
264     An indication which predicate the index implements. This may be
265     e.g. 'plain', 'stemming' or 'soundex'. The indicator will be used for
266     query processing. Currently there is no standard set of predicate
267 ulpfr 13 names. The predicate defaults to the last member of the pipeline if
268 ulpfr 10 omitted.
269    
270     =back
271    
272     Currently this method must be called before the first tuple is
273     inserted in the table!
274    
275     =cut
276    
277     sub create_inverted_index {
278     my $self = shift;
279     my %parm = @_;
280    
281     croak "No attribute specified" unless $parm{attribute};
282     croak "No pipeline specified" unless $parm{pipeline};
283    
284     $parm{predicate} ||= $parm{pipeline}->[-1];
285 ulpfr 13
286 ulpfr 10 croak "Cannot create index for table aready populated"
287     if $self->{nextk} > 1;
288 ulpfr 13
289 ulpfr 10 require WAIT::InvertedIndex;
290    
291     # backward compatibility stuff
292     my %opt = %parm;
293     for (qw(attribute pipeline predicate)) {
294     delete $opt{$_};
295     }
296 ulpfr 13
297 ulpfr 10 my $name = join '_', ($parm{attribute}, @{$parm{pipeline}});
298 dpavlin 111 my $idx = WAIT::InvertedIndex->new(file => $self->path.'/'.$name,
299 dpavlin 108 subname=> $name,
300     env => $self->{env},
301     maindbfile => $self->maindbfile,
302     tablename => $self->tablename,
303     filter => [@{$parm{pipeline}}], # clone
304     name => $name,
305     attr => $parm{attribute},
306     %opt, # backward compatibility stuff
307     );
308 ulpfr 10 # We will have to use $parm{predicate} here
309     push @{$self->{inverted}->{$parm{attribute}}}, $idx;
310     }
311    
312     sub dir {
313 dpavlin 111 $_[0]->path;
314 ulpfr 10 }
315    
316     =head2 C<$tb-E<gt>layout>
317    
318     Returns the reference to the associated parser object.
319    
320     =cut
321    
322     sub layout { $_[0]->{layout} }
323    
324     =head2 C<$tb-E<gt>fields>
325    
326     Returns the array of attribute names.
327    
328     =cut
329    
330    
331     sub fields { keys %{$_[0]->{inverted}}}
332    
333     =head2 C<$tb-E<gt>drop>
334    
335     Must be called via C<WAIT::Database::drop_table>
336    
337     =cut
338    
339     sub drop {
340     my $self = shift;
341 ulpfr 35
342 ulpfr 10 if ((caller)[0] eq 'WAIT::Database') { # database knows about this
343     $self->close; # just make sure
344 ulpfr 35
345 dpavlin 111 my $path = $self->path;
346 ulpfr 10
347     for (values %{$self->{indexes}}) {
348     $_->drop;
349     }
350 dpavlin 111 unlink "$path/records";
351     rmdir "$path/read" or warn "Could not rmdir '$path/read'";
352 dpavlin 89
353 ulpfr 10 } else {
354     croak ref($self)."::drop called directly";
355     }
356     }
357    
358     sub mrequire ($) {
359     my $module = shift;
360    
361     $module =~ s{::}{/}g;
362     $module .= '.pm';
363     require $module;
364     }
365    
366 dpavlin 108 sub path {
367     my($self) = @_;
368     return $self->{path} if $self->{path};
369     require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
370     require Carp;
371 dpavlin 111 Carp::confess("NO path attr");
372 dpavlin 108 }
373    
374 ulpfr 10 sub open {
375     my $self = shift;
376 dpavlin 111 my $path = $self->path . '/records';
377 ulpfr 10
378     mrequire ref($self); # that's tricky eh?
379     if (defined $self->{'layout'}) {
380     mrequire ref($self->{'layout'});
381     }
382     if (defined $self->{'access'}) {
383     mrequire ref($self->{'access'});
384     }
385     if (exists $self->{indexes}) {
386     require WAIT::Index;
387 dpavlin 108 for my $Ind (values %{$self->{indexes}}) {
388     for my $x (qw(mode env maindbfile)) {
389     $Ind->{$x} = $self->{$x};
390     }
391 ulpfr 10 }
392     }
393     if (exists $self->{inverted}) {
394     my ($att, $idx);
395     for $att (keys %{$self->{inverted}}) {
396     for $idx (@{$self->{inverted}->{$att}}) {
397 dpavlin 108 for my $x (qw(mode env maindbfile)) {
398     $idx->{$x} = $self->{$x};
399     }
400 ulpfr 10 }
401     }
402     require WAIT::InvertedIndex;
403     }
404 ulpfr 42
405 dpavlin 108 # CONFUSION: WAIT knows two *modes*: read-only or read-write.
406     # BerkeleyDB means file permissions when talking about Mode.
407     # BerkeleyDB has the "Flags" attribute to specify
408     # read/write/lock/etc subsystems.
409 ulpfr 42
410 dpavlin 108 my $flags;
411     if ($self->{mode} & O_RDWR) {
412     $flags = DB_CREATE; # | DB_INIT_MPOOL | DB_PRIVATE | DB_INIT_CDB;
413 dpavlin 111 warn "Flags on table $path set to 'writing'";
414 dpavlin 108 } else {
415     $flags = DB_RDONLY;
416 dpavlin 111 # warn "Flags on table $path set to 'readonly'";
417 dpavlin 108 }
418 ulpfr 10 unless (defined $self->{dbh}) {
419 dpavlin 108 my $subname = $self->tablename . "/records";
420     $self->{dbh} =
421     tie(%{$self->{db}}, 'BerkeleyDB::Btree',
422     $self->{env} ? (Env => $self->{env}) : (),
423     # Filename => $file,
424     Filename => $self->maindbfile,
425     Subname => $subname,
426     Mode => 0664,
427     Flags => $flags,
428     $WAIT::Database::Cachesize?(Cachesize => $WAIT::Database::Cachesize):(),
429     $WAIT::Database::Pagesize?(Pagesize => $WAIT::Database::Pagesize):(),
430     )
431     or die "Cannot tie: $BerkeleyDB::Error;
432     DEBUG: Filename[$self->{maindbfile}]subname[$subname]Mode[0664]Flags[$flags]";
433 ulpfr 10 }
434     $self;
435     }
436    
437     sub fetch_extern {
438     my $self = shift;
439    
440 ulpfr 13 # print "#@_", $self->{'access'}->{Mode}, "\n"; # DEBUGGING?
441 ulpfr 10 if (exists $self->{'access'}) {
442     mrequire ref($self->{'access'});
443     $self->{'access'}->FETCH(@_);
444     }
445     }
446    
447     sub fetch_extern_by_id {
448     my $self = shift;
449    
450     $self->fetch_extern($self->fetch(@_));
451     }
452    
453     sub _find_index {
454     my $self = shift;
455     my (@att) = @_;
456     my %att;
457     my $name;
458 ulpfr 13
459 ulpfr 10 @att{@att} = @att;
460    
461     KEY: for $name (keys %{$self->{indexes}}) {
462     my @iat = split /-/, $name;
463     for (@iat) {
464     next KEY unless exists $att{$_};
465     }
466     return $self->{indexes}->{$name};
467     }
468     return undef;
469     }
470    
471     sub have {
472     my $self = shift;
473     my %parm = @_;
474    
475 ulpfr 13 my $index = $self->_find_index(keys %parm) or return; # no index-no have
476    
477 ulpfr 10 defined $self->{db} or $self->open;
478     return $index->have(@_);
479     }
480    
481     sub insert {
482     my $self = shift;
483     my %parm = @_;
484    
485     defined $self->{db} or $self->open;
486    
487 ulpfr 13 # We should move all writing methods to a subclass to check only once
488     $self->{mode} & O_RDWR or croak "Cannot insert into table opened in RD_ONLY mode";
489    
490 ulpfr 10 my $tuple = join($;, map($parm{$_} || '', @{$self->{attr}}));
491     my $key;
492     my @deleted = keys %{$self->{deleted}};
493 ulpfr 19 my $gotkey = 0;
494 ulpfr 10
495     if (@deleted) {
496     $key = pop @deleted;
497     delete $self->{deleted}->{$key};
498 ulpfr 19 # Sanity check
499     if ($key && $key>0) {
500     $gotkey=1;
501 ulpfr 10 } else {
502 ulpfr 19 warn(sprintf("WAIT database inconsistency during insert ".
503     "key[%s]: Please rebuild index\n",
504     $key
505     ));
506     }
507     }
508     unless ($gotkey) {
509 ulpfr 10 $key = $self->{nextk}++;
510     }
511 dpavlin 108 $self->{db}->{$key} = $tuple;
512 ulpfr 10 for (values %{$self->{indexes}}) {
513     unless ($_->insert($key, %parm)) {
514     # duplicate key, undo changes
515     if ($key == $self->{nextk}-1) {
516     $self->{nextk}--;
517     } else {
518 ulpfr 19 # warn "setting key[$key] deleted during insert";
519 ulpfr 10 $self->{deleted}->{$key}=1;
520     }
521     my $idx;
522     for $idx (values %{$self->{indexes}}) {
523     last if $idx eq $_;
524     $idx->remove($key, %parm);
525     }
526     return undef;
527 ulpfr 13 }
528 ulpfr 10 }
529     if (defined $self->{inverted}) {
530     my $att;
531     for $att (keys %{$self->{inverted}}) {
532     if (defined $parm{$att}) {
533     map $_->insert($key, $parm{$att}), @{$self->{inverted}->{$att}};
534     #map $_->sync, @{$self->{inverted}->{$att}}
535     }
536     }
537     }
538     $key
539     }
540    
541     sub sync {
542     my $self = shift;
543 ulpfr 13
544 ulpfr 10 for (values %{$self->{indexes}}) {
545     map $_->sync, $_;
546     }
547     if (defined $self->{inverted}) {
548     my $att;
549     for $att (keys %{$self->{inverted}}) {
550     map $_->sync, @{$self->{inverted}->{$att}}
551     }
552     }
553     }
554    
555     sub fetch {
556     my $self = shift;
557     my $key = shift;
558    
559     return () if exists $self->{deleted}->{$key};
560 ulpfr 13
561 ulpfr 10 defined $self->{db} or $self->open;
562 dpavlin 108 $self->unpack($self->{db}->{$key});
563 ulpfr 10 }
564    
565     sub delete_by_key {
566     my $self = shift;
567     my $key = shift;
568    
569 ulpfr 19 unless ($key) {
570     Carp::cluck "Warning: delete_by_key called without key. Looks like a bug in WAIT?";
571     return;
572     }
573    
574 ulpfr 10 return $self->{deleted}->{$key} if defined $self->{deleted}->{$key};
575     my %tuple = $self->fetch($key);
576     for (values %{$self->{indexes}}) {
577     $_->delete($key, %tuple);
578     }
579     if (defined $self->{inverted}) {
580     # User *must* provide the full record for this or the entries
581     # in the inverted index will not be removed
582     %tuple = (%tuple, @_);
583     my $att;
584     for $att (keys %{$self->{inverted}}) {
585     if (defined $tuple{$att}) {
586     map $_->delete($key, $tuple{$att}), @{$self->{inverted}->{$att}}
587     }
588     }
589     }
590 ulpfr 19 # warn "setting key[$key] deleted during delete_by_key";
591 ulpfr 10 ++$self->{deleted}->{$key};
592     }
593    
594     sub delete {
595     my $self = shift;
596     my $tkey = $self->have(@_);
597 ulpfr 19 # warn "tkey[$tkey]\@_[@_]";
598 ulpfr 10 defined $tkey && $self->delete_by_key($tkey, @_);
599     }
600    
601     sub unpack {
602 laperla 31 my($self, $tuple) = @_;
603 ulpfr 10
604 laperla 31 unless (defined $tuple){
605     # require Carp; # unfortunately gives us "bizarre copy...." :-(((((
606     warn("Debug: somebody called unpack without argument tuple!");
607     return;
608     }
609    
610 ulpfr 10 my $att;
611     my @result;
612     my @tuple = split /$;/, $tuple;
613    
614     for $att (@{$self->{attr}}) {
615     push @result, $att, shift @tuple;
616     }
617     @result;
618     }
619    
620 ulpfr 19 sub set {
621     my ($self, $iattr, $value) = @_;
622 laperla 77 # in the rare case that they haven't written a single record yet, we
623     # make sure, the inverted inherits our $self->{mode}:
624     defined $self->{db} or $self->open;
625    
626 ulpfr 19 for my $att (keys %{$self->{inverted}}) {
627 dpavlin 108 if ($] > 5.003) { # avoid bug in perl up to 5.003_05
628 ulpfr 19 my $idx;
629     for $idx (@{$self->{inverted}->{$att}}) {
630     $idx->set($iattr, $value);
631     }
632     } else {
633     map $_->set($iattr, $value), @{$self->{inverted}->{$att}};
634     }
635     }
636    
637     1;
638     }
639    
640 ulpfr 10 sub close {
641     my $self = shift;
642    
643 dpavlin 108 require Carp; Carp::cluck("------->Closing A Table<-------");
644    
645 ulpfr 10 if (exists $self->{'access'}) {
646     eval {$self->{'access'}->close}; # dont bother if not opened
647     }
648 laperla 31 if ($WAIT::Index::VERSION) {
649     for (values %{$self->{indexes}}) {
650     $_->close();
651     }
652 ulpfr 10 }
653 laperla 31 if (defined $self->{inverted} && $WAIT::InvertedIndex::VERSION) {
654     # require WAIT::InvertedIndex; Uli: we can avoid closing indexes:
655     # if WAIT::InvertedIndex has not been loaded, they cannot have
656     # been altered so far
657 ulpfr 10 my $att;
658     for $att (keys %{$self->{inverted}}) {
659     if ($] > 5.003) { # avoid bug in perl up to 5.003_05
660     my $idx;
661     for $idx (@{$self->{inverted}->{$att}}) {
662     $idx->close;
663     }
664     } else {
665     map $_->close(), @{$self->{inverted}->{$att}};
666     }
667     }
668     }
669     if ($self->{dbh}) {
670     delete $self->{dbh};
671     }
672 dpavlin 108 untie %{$self->{db}};
673 dpavlin 111 for my $att (qw(env db path maindbfile)) {
674 dpavlin 108 delete $self->{$att};
675     warn "----->Deleted att $att<-----";
676     }
677 ulpfr 10
678     1;
679     }
680    
681 dpavlin 108 sub DESTROY {
682 ulpfr 19 my $self = shift;
683    
684 dpavlin 108 delete $self->{env};
685 ulpfr 19
686 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$self],[qw(self)])->Indent(1)->Useqq(1)->Dump; # XXX
687 ulpfr 19
688 ulpfr 13 }
689    
690 ulpfr 10 sub open_scan {
691     my $self = shift;
692     my $code = shift;
693    
694     $self->{dbh} or $self->open;
695     require WAIT::Scan;
696     new WAIT::Scan $self, $self->{nextk}-1, $code;
697     }
698    
699     sub open_index_scan {
700     my $self = shift;
701     my $attr = shift;
702     my $code = shift;
703     my $name = join '-', @$attr;
704    
705     if (defined $self->{indexes}->{$name}) {
706     $self->{indexes}->{$name}->open_scan($code);
707     } else {
708     croak "No such index '$name'";
709     }
710     }
711    
712     eval {sub WAIT::Query::Raw::new} unless defined \&WAIT::Query::Raw::new;
713    
714     sub prefix {
715     my ($self , $attr, $prefix) = @_;
716     my %result;
717    
718     defined $self->{db} or $self->open; # require layout
719    
720     for (@{$self->{inverted}->{$attr}}) {
721     my $result = $_->prefix($prefix);
722     if (defined $result) {
723     $result{$_->name} = $result;
724     }
725     }
726     bless \%result, 'WAIT::Query::Raw';
727     }
728    
729     sub intervall {
730     my ($self, $attr, $lb, $ub) = @_;
731     my %result;
732    
733     defined $self->{db} or $self->open; # require layout
734    
735     for (@{$self->{inverted}->{$attr}}) {
736     my $result = $_->intervall($lb, $ub);
737     if (defined $result) {
738     $result{$_->name} = $result;
739     }
740     }
741     bless \%result, 'WAIT::Query::Raw';
742     }
743    
744 dpavlin 108 sub search_ref {
745 ulpfr 19 my $self = shift;
746     my ($query, $attr, $cont, $raw);
747     if (ref $_[0]) {
748     $query = shift;
749 dpavlin 108 # require Data::Dumper; print STDERR "Line " . __LINE__ . ", File: " . __FILE__ . "\n" . Data::Dumper->new([$query],[qw()])->Indent(1)->Useqq(1)->Dump; # XXX
750    
751 ulpfr 19 $attr = $query->{attr};
752     $cont = $query->{cont};
753     $raw = $query->{raw};
754     } else {
755     require Carp;
756     Carp::cluck("Using three argument search interface is deprecated, use hashref interface instead");
757     $attr = shift;
758     $cont = shift;
759     $raw = shift;
760     $query = {
761     attr => $attr,
762     cont => $cont,
763     raw => $raw,
764     };
765     }
766    
767 ulpfr 10 my %result;
768    
769     defined $self->{db} or $self->open; # require layout
770    
771     if ($raw) {
772     for (@{$self->{inverted}->{$attr}}) {
773     my $name = $_->name;
774     if (exists $raw->{$name} and @{$raw->{$name}}) {
775     my $scale = 1/scalar(@{$raw->{$name}});
776 ulpfr 19 my %r = $_->search_raw($query, @{$raw->{$name}});
777 ulpfr 10 my ($key, $val);
778     while (($key, $val) = each %r) {
779     if (exists $result{$key}) {
780     $result{$key} += $val*$scale;
781     } else {
782     $result{$key} = $val*$scale;
783     }
784     }
785     }
786     }
787     }
788     if (defined $cont and $cont ne '') {
789     for (@{$self->{inverted}->{$attr}}) {
790 dpavlin 108 my $r = $_->search_ref($query, $cont);
791 ulpfr 10 my ($key, $val);
792 dpavlin 108 while (($key, $val) = each %$r) {
793 ulpfr 10 if (exists $result{$key}) {
794     $result{$key} += $val;
795     } else {
796     $result{$key} = $val;
797     }
798     }
799     }
800     }
801     # sanity check for deleted documents.
802     # this should not be necessary !@#$
803     for (keys %result) {
804     delete $result{$_} if $self->{deleted}->{$_}
805     }
806 dpavlin 108 \%result;
807 ulpfr 10 }
808    
809 dpavlin 108 sub parse_query {
810     my($self, $attr, $query) = @_;
811     return unless defined $query && length $query;
812     my %qt;
813     for (@{$self->{inverted}->{$attr}}) {
814     grep $qt{$_}++, $_->parse($query);
815     }
816     [keys %qt];
817     }
818    
819 ulpfr 10 sub hilight_positions {
820     my ($self, $attr, $text, $query, $raw) = @_;
821     my %pos;
822    
823     if (defined $raw) {
824 ulpfr 13 for (@{$self->{inverted}->{$attr}}) { # objects of type
825     # WAIT::InvertedIndex for
826     # this index field $attr
827 ulpfr 10 my $name = $_->name;
828     if (exists $raw->{$name}) {
829     my %qt;
830     grep $qt{$_}++, @{$raw->{$name}};
831     for ($_->parse_pos($text)) {
832     if (exists $qt{$_->[0]}) {
833     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
834     }
835     }
836     }
837     }
838     }
839     if (defined $query) {
840     for (@{$self->{inverted}->{$attr}}) {
841     my %qt;
842    
843     grep $qt{$_}++, $_->parse($query);
844     for ($_->parse_pos($text)) {
845     if (exists $qt{$_->[0]}) {
846     if (exists $pos{$_->[1]}) { # perl -w ;-)
847     $pos{$_->[1]} = max($pos{$_->[1]}, length($_->[0]));
848     } else {
849     $pos{$_->[1]} = length($_->[0]);
850     }
851     }
852     }
853     }
854     }
855    
856     \%pos;
857     }
858    
859     sub hilight {
860 ulpfr 13 my ($tb, $buf, $qplain, $qraw) = @_;
861     my $layout = $tb->layout();
862    
863 ulpfr 10 my @result;
864    
865 ulpfr 13 $qplain ||= {};
866     $qraw ||= {};
867     my @ttxt = $layout->tag($buf);
868 ulpfr 10 while (@ttxt) {
869     no strict 'refs';
870     my %tag = %{shift @ttxt};
871     my $txt = shift @ttxt;
872     my $fld;
873    
874     my %hl;
875 ulpfr 13 for $fld (grep defined $tag{$_}, keys %$qplain, keys %$qraw) {
876 ulpfr 10 my $hp = $tb->hilight_positions($fld, $txt,
877 ulpfr 13 $qplain->{$fld}, $qraw->{$fld});
878 ulpfr 10 for (keys %$hp) {
879     if (exists $hl{$_}) { # -w ;-(
880     $hl{$_} = max($hl{$_}, $hp->{$_});
881     } else {
882     $hl{$_} = $hp->{$_};
883     }
884     }
885     }
886     my $pos;
887     my $qt = {_qt => 1, %tag};
888     my $pl = \%tag;
889     my $last = length($txt);
890     my @tmp;
891     for $pos (sort {$b <=> $a} keys %hl) {
892     unshift @tmp, $pl, substr($txt,$pos+$hl{$pos},$last-$pos-$hl{$pos});
893     unshift @tmp, $qt, substr($txt,$pos,$hl{$pos});
894     $last = $pos;
895     }
896     push @result, $pl, substr($txt,0,$last);
897     push @result, @tmp;
898     }
899     @result; # no speed necessary
900     }
901    
902     1;

Properties

Name Value
cvs2svn:cvs-rev 1.10

  ViewVC Help
Powered by ViewVC 1.1.26