/[Grep]/lib/Grep/Import/ScrapBook.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Import/ScrapBook.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 158 - (hide annotations)
Sun Jun 10 20:16:30 2007 UTC (16 years, 11 months ago) by dpavlin
File size: 3259 byte(s)
fixed requirements, cleanup API, resolve links in imported content
1 dpavlin 154 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     package Grep::Import::ScrapBook;
7    
8     =head1 NAME
9    
10     Grep::Import::ScrapBook - importer for local ScrapBook pages
11    
12     =head1 CONFIGURATION
13    
14     You can symlink your ScrapBook directory
15    
16     ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17    
18     or modify L<ScrapBookDir> path (relative to Grep installation static root).
19    
20     =cut
21    
22     use XML::Simple;
23     use File::Slurp;
24 dpavlin 158 use HTML::ResolveLink;
25    
26 dpavlin 154 use Data::Dump qw/dump/;
27    
28     sub import {
29     my $self = shift;
30    
31     my $dir =
32     Jifty::Util->app_root . '/' .
33     Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
34     Jifty->config->app('ScrapBookDir');
35    
36     my $path = $dir . '/scrapbook.rdf';
37     $path =~ s!//+!/!g;
38    
39     if ( ! -e $dir || ! -e $path ) {
40     Jifty->log->warn("Skipping ScrapBook import $path: $!");
41     return 1;
42     }
43    
44     my $rdf = XMLin(
45     $path,
46     # KeyAttr => [ qw/RDF:about/ ],
47     ) || die "can't open $path: $!";
48    
49     # warn "## original rdf -> ", dump( $rdf );
50    
51 dpavlin 157 my $owner = Grep::Model::User->new();
52     $owner->load_by_cols( email => Jifty->config->app('ScrapBookOwner') );
53     die "can't find ScrapBookOwner ", Jifty->config->app('ScrapBookOwner') unless ( $owner->id );
54    
55     Jifty->log->info( "Using user ", $owner->id, " from ", $owner->email, " for import" );
56    
57     my $feed = Grep::Model::Feed->new( current_user => $owner );
58 dpavlin 154 $feed->load_or_create(
59     uri => 'file://' . $path,
60     title => 'ScrapBook',
61     #source => 'Grep::Source',
62 dpavlin 157 owner => $owner,
63 dpavlin 154 );
64    
65 dpavlin 158 my $search = Grep::Search->new;
66    
67 dpavlin 155 my $stats;
68    
69 dpavlin 154 foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
70    
71 dpavlin 155 $stats->{total}++;
72 dpavlin 154
73 dpavlin 155 #warn "## item = ",dump( $item );
74    
75 dpavlin 154 my $hash;
76     foreach my $k ( keys %$item ) {
77     next if $k =~ m/^RDF:/;
78     next if ( $item->{$k} eq '' );
79     my $n = $k;
80     $n =~ s/^\w+://; # strip namespace
81     $hash->{$n} = $item->{$k};
82     }
83    
84 dpavlin 155 #warn "## hash = ", dump( $hash );
85 dpavlin 154
86    
87     # fetch full-text content and import it
88    
89     my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
90     if ( ! -r $content_path ) {
91     Jifty->log->warn("can't import $content_path: $!");
92 dpavlin 155 $stats->{failure}++;
93 dpavlin 154 next;
94     }
95     my $content = read_file( $content_path ) or
96     die "can't read $content_path: $!";
97    
98 dpavlin 158 my $resolver = HTML::ResolveLink->new( base => $content_path );
99     $content = $resolver->resolve( $content );
100 dpavlin 154
101     # create date from id
102    
103     my $dt;
104     if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
105     $dt = DateTime->new(
106     year => $1,
107     month => $2,
108     day => $3,
109     hour => $4,
110     minute => $5,
111     second => $6,
112     #time_zone => 'UTC',
113     );
114     } else {
115     warn "can't parse date from ", $hash->{id};
116     }
117    
118 dpavlin 157 my $i = Grep::Model::Item->new( current_user => $owner );
119 dpavlin 155 my ($ok,$msg) = $i->load_or_create(
120 dpavlin 154 in_feed => $feed,
121     title => $hash->{title},
122     link => $hash->{source},
123     content => $content,
124     issued => $hash->{id},
125     );
126    
127 dpavlin 155 if ( ! $ok ) {
128     Jifty->log->error( $msg );
129     $stats->{failure}++;
130     next;
131     }
132 dpavlin 154
133 dpavlin 155 if ( $msg && $msg =~ m/^Found/ ) {
134     $stats->{old}++;
135     } else {
136     $stats->{new}++;
137 dpavlin 157 Jifty->log->info("created ", $i->id ," ", $i->link, " ", length( $content ), " bytes");
138     $search->add( $i, $owner->id );
139 dpavlin 155 }
140 dpavlin 154
141     }
142    
143 dpavlin 158 $search->finish;
144    
145 dpavlin 155 return $stats;
146 dpavlin 154 }
147    
148 dpavlin 155 =head1 SEE ALSO
149 dpavlin 154
150 dpavlin 155 L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
151    
152     =cut
153    
154 dpavlin 154 1;

  ViewVC Help
Powered by ViewVC 1.1.26