/[Grep]/lib/Grep/Import/ScrapBook.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Import/ScrapBook.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 158 - (show annotations)
Sun Jun 10 20:16:30 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 3259 byte(s)
fixed requirements, cleanup API, resolve links in imported content
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 package Grep::Import::ScrapBook;
7
8 =head1 NAME
9
10 Grep::Import::ScrapBook - importer for local ScrapBook pages
11
12 =head1 CONFIGURATION
13
14 You can symlink your ScrapBook directory
15
16 ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17
18 or modify L<ScrapBookDir> path (relative to Grep installation static root).
19
20 =cut
21
22 use XML::Simple;
23 use File::Slurp;
24 use HTML::ResolveLink;
25
26 use Data::Dump qw/dump/;
27
28 sub import {
29 my $self = shift;
30
31 my $dir =
32 Jifty::Util->app_root . '/' .
33 Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
34 Jifty->config->app('ScrapBookDir');
35
36 my $path = $dir . '/scrapbook.rdf';
37 $path =~ s!//+!/!g;
38
39 if ( ! -e $dir || ! -e $path ) {
40 Jifty->log->warn("Skipping ScrapBook import $path: $!");
41 return 1;
42 }
43
44 my $rdf = XMLin(
45 $path,
46 # KeyAttr => [ qw/RDF:about/ ],
47 ) || die "can't open $path: $!";
48
49 # warn "## original rdf -> ", dump( $rdf );
50
51 my $owner = Grep::Model::User->new();
52 $owner->load_by_cols( email => Jifty->config->app('ScrapBookOwner') );
53 die "can't find ScrapBookOwner ", Jifty->config->app('ScrapBookOwner') unless ( $owner->id );
54
55 Jifty->log->info( "Using user ", $owner->id, " from ", $owner->email, " for import" );
56
57 my $feed = Grep::Model::Feed->new( current_user => $owner );
58 $feed->load_or_create(
59 uri => 'file://' . $path,
60 title => 'ScrapBook',
61 #source => 'Grep::Source',
62 owner => $owner,
63 );
64
65 my $search = Grep::Search->new;
66
67 my $stats;
68
69 foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
70
71 $stats->{total}++;
72
73 #warn "## item = ",dump( $item );
74
75 my $hash;
76 foreach my $k ( keys %$item ) {
77 next if $k =~ m/^RDF:/;
78 next if ( $item->{$k} eq '' );
79 my $n = $k;
80 $n =~ s/^\w+://; # strip namespace
81 $hash->{$n} = $item->{$k};
82 }
83
84 #warn "## hash = ", dump( $hash );
85
86
87 # fetch full-text content and import it
88
89 my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
90 if ( ! -r $content_path ) {
91 Jifty->log->warn("can't import $content_path: $!");
92 $stats->{failure}++;
93 next;
94 }
95 my $content = read_file( $content_path ) or
96 die "can't read $content_path: $!";
97
98 my $resolver = HTML::ResolveLink->new( base => $content_path );
99 $content = $resolver->resolve( $content );
100
101 # create date from id
102
103 my $dt;
104 if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
105 $dt = DateTime->new(
106 year => $1,
107 month => $2,
108 day => $3,
109 hour => $4,
110 minute => $5,
111 second => $6,
112 #time_zone => 'UTC',
113 );
114 } else {
115 warn "can't parse date from ", $hash->{id};
116 }
117
118 my $i = Grep::Model::Item->new( current_user => $owner );
119 my ($ok,$msg) = $i->load_or_create(
120 in_feed => $feed,
121 title => $hash->{title},
122 link => $hash->{source},
123 content => $content,
124 issued => $hash->{id},
125 );
126
127 if ( ! $ok ) {
128 Jifty->log->error( $msg );
129 $stats->{failure}++;
130 next;
131 }
132
133 if ( $msg && $msg =~ m/^Found/ ) {
134 $stats->{old}++;
135 } else {
136 $stats->{new}++;
137 Jifty->log->info("created ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
138 $search->add( $i, $owner->id );
139 }
140
141 }
142
143 $search->finish;
144
145 return $stats;
146 }
147
148 =head1 SEE ALSO
149
150 L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
151
152 =cut
153
154 1;

  ViewVC Help
Powered by ViewVC 1.1.26