22 |
use XML::Simple; |
use XML::Simple; |
23 |
use File::Slurp; |
use File::Slurp; |
24 |
use HTML::ResolveLink; |
use HTML::ResolveLink; |
25 |
|
use HTML::TreeBuilder; |
26 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
27 |
|
|
28 |
sub import { |
sub import { |
86 |
|
|
87 |
# fetch full-text content and import it |
# fetch full-text content and import it |
88 |
|
|
89 |
my $content_path = $dir . '/data/' . $hash->{id} . '/index.html'; |
my $rel_path = '/data/' . $hash->{id} . '/index.html'; |
90 |
|
|
91 |
|
my $content_path = $dir . $rel_path; |
92 |
if ( ! -r $content_path ) { |
if ( ! -r $content_path ) { |
93 |
Jifty->log->warn("can't import $content_path: $!"); |
Jifty->log->warn("can't import $content_path: $!"); |
94 |
$stats->{failure}++; |
$stats->{failure}++; |
97 |
my $content = read_file( $content_path ) or |
my $content = read_file( $content_path ) or |
98 |
die "can't read $content_path: $!"; |
die "can't read $content_path: $!"; |
99 |
|
|
100 |
my $resolver = HTML::ResolveLink->new( base => $content_path ); |
my $tree = HTML::TreeBuilder->new or die "can't create html tree"; |
101 |
$content = $resolver->resolve( $content ); |
$tree->parse( $content ) or die "can't parse fetched content"; |
102 |
|
|
103 |
|
my $body = $tree->look_down( '_tag', 'body' ); |
104 |
|
|
105 |
|
my $resolver = HTML::ResolveLink->new( base => '/static/' . Jifty->config->app('ScrapBookDir') . $rel_path ); |
106 |
|
$content = $resolver->resolve( $body->as_HTML ); |
107 |
|
|
108 |
# create date from id |
# create date from id |
109 |
|
|