--- trunk/Estraier.pm 2006/01/04 14:48:11 6 +++ trunk/Estraier.pm 2006/01/04 21:51:01 14 @@ -43,23 +43,69 @@ package Search::Estraier::Document; +use Carp qw/croak confess/; + =head1 Search::Estraier::Document +This class implements Document which is collection of attributes +(key=value), vectors (also key value) display text and hidden text. + Document for HyperEstraier =head2 new +Create new document, empty or from draft. + my $doc = new Search::HyperEstraier::Document; + my $doc2 = new Search::HyperEstraier::Document( $draft ); =cut sub new { my $class = shift; - my $self = {@_}; + my $self = {}; bless($self, $class); $self->{id} = -1; + my $draft = shift; + + if ($draft) { + my $in_text = 0; + foreach my $line (split(/\n/, $draft)) { + + if ($in_text) { + if ($line =~ /^\t/) { + push @{ $self->{htexts} }, substr($line, 1); + } else { + push @{ $self->{dtexts} }, $line; + } + next; + } + + if ($line =~ m/^%VECTOR\t(.+)$/) { + my @fields = split(/\t/, $1); + for my $i ( 0 .. ($#fields - 1) ) { + $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ]; + $i++; + } + next; + } elsif ($line =~ m/^%/) { + # What is this? comment? + #warn "$line\n"; + next; + } elsif ($line =~ m/^$/) { + $in_text = 1; + next; + } elsif ($line =~ m/^(.+)=(.+)$/) { + $self->{attrs}->{ $1 } = $2; + next; + } + + warn "draft ignored: $line\n"; + } + } + $self ? return $self : return undef; } @@ -70,7 +116,7 @@ $doc->add_attr( name => 'value' ); -B: delete attribute using +Delete attribute using $doc->add_attr( name => undef ); @@ -81,8 +127,14 @@ my $attrs = {@_}; while (my ($name, $value) = each %{ $attrs }) { - push @{ $self->{attrs}->{_s($name)} }, _s($value); + if (! defined($value)) { + delete( $self->{attrs}->{_s($name)} ); + } else { + $self->{attrs}->{_s($name)} = _s($value); + } } + + return 1; } @@ -132,13 +184,94 @@ return $self->{id}; } +=head2 attr_names + +Returns array with attribute names from document object. + + my @attrs = $doc->attr_names; + +=cut + +sub attr_names { + my $self = shift; + croak "attr_names return array, not scalar" if (! wantarray); + return sort keys %{ $self->{attrs} }; +} + + +=head2 attr + +Returns value of an attribute. + + my $value = $doc->attr( 'attribute' ); + +=cut + +sub attr { + my $self = shift; + my $name = shift; + + return $self->{'attrs'}->{ $name }; +} + + +=head2 texts + +Returns array with text sentences. + + my @texts = $doc->texts; + +=cut + +sub texts { + my $self = shift; + confess "texts return array, not scalar" if (! wantarray); + return @{ $self->{dtexts} }; +} + +=head2 cat_texts + +Return whole text as single scalar. + + my $text = $doc->cat_texts; + +=cut + +sub cat_texts { + my $self = shift; + return join(' ',@{ $self->{dtexts} }); +} + =head2 dump_draft +Dump draft data from document object. + print $doc->dump_draft; =cut sub dump_draft { + my $self = shift; + my $draft; + + foreach my $attr_name (sort keys %{ $self->{attrs} }) { + $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; + } + + if ($self->{kwords}) { + $draft .= '%%VECTOR'; + while (my ($key, $value) = each %{ $self->{kwords} }) { + $draft .= "\t$key\t$value"; + } + $draft .= "\n"; + } + + $draft .= "\n"; + + $draft .= join("\n", @{ $self->{dtexts} }) . "\n"; + $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n"; + + return $draft; } =head2 delete @@ -152,10 +285,12 @@ sub delete { my $self = shift; - foreach my $data (qw/attrs dtexts stexts/) { + foreach my $data (qw/attrs dtexts stexts kwords/) { delete($self->{$data}); } + $self->{id} = -1; + return 1; }