/[Grep]/lib/Grep/Import/ScrapBook.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Import/ScrapBook.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 154 - (hide annotations)
Sun Jun 10 18:41:00 2007 UTC (16 years, 11 months ago) by dpavlin
File size: 2503 byte(s)
starting to extend Grep so it can import data from local filesystem (for now
pages from ScrapBook fireFox plugin) and make them searchable
1 dpavlin 154 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     package Grep::Import::ScrapBook;
7    
8     =head1 NAME
9    
10     Grep::Import::ScrapBook - importer for local ScrapBook pages
11    
12     =head1 CONFIGURATION
13    
14     You can symlink your ScrapBook directory
15    
16     ~/Grep/share/web/static$ ln -sf /home/dpavlin/private/ScrapBook scrapbook
17    
18     or modify L<ScrapBookDir> path (relative to Grep installation static root).
19    
20     =head1 SEE ALSO
21    
22     L<http://amb.vis.ne.jp/mozilla/scrapbook/> - ScrapBook FireFox extension
23    
24     =cut
25    
26     use XML::Simple;
27     use File::Slurp;
28     use Data::Dump qw/dump/;
29    
30     sub import {
31     my $self = shift;
32     my $search = shift or die "need search";
33     die "search is ", ref($search), " and not Grep::Search" unless ($search->isa('Grep::Search'));
34    
35     my $dir =
36     Jifty::Util->app_root . '/' .
37     Jifty->config->framework('Web')->{'StaticRoot'} . '/' .
38     Jifty->config->app('ScrapBookDir');
39    
40     my $path = $dir . '/scrapbook.rdf';
41     $path =~ s!//+!/!g;
42    
43     if ( ! -e $dir || ! -e $path ) {
44     Jifty->log->warn("Skipping ScrapBook import $path: $!");
45     return 1;
46     }
47    
48     my $rdf = XMLin(
49     $path,
50     # KeyAttr => [ qw/RDF:about/ ],
51     ) || die "can't open $path: $!";
52    
53     # warn "## original rdf -> ", dump( $rdf );
54    
55     my $feed = Grep::Model::Feed->new();
56     $feed->load_or_create(
57     uri => 'file://' . $path,
58     title => 'ScrapBook',
59     #source => 'Grep::Source',
60     );
61    
62     foreach my $item ( @{ $rdf->{'RDF:Description'} } ) {
63    
64     warn "## item = ",dump( $item );
65    
66     my $hash;
67     foreach my $k ( keys %$item ) {
68     next if $k =~ m/^RDF:/;
69     next if ( $item->{$k} eq '' );
70     my $n = $k;
71     $n =~ s/^\w+://; # strip namespace
72     $hash->{$n} = $item->{$k};
73     }
74    
75     warn "## hash = ", dump( $hash );
76    
77    
78     # fetch full-text content and import it
79    
80     my $content_path = $dir . '/data/' . $hash->{id} . '/index.html';
81     if ( ! -r $content_path ) {
82     Jifty->log->warn("can't import $content_path: $!");
83     next;
84     }
85     my $content = read_file( $content_path ) or
86     die "can't read $content_path: $!";
87    
88    
89     # create date from id
90    
91     my $dt;
92     if ( $hash->{id} =~ m/^(\d{4})(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)$/ ) {
93     $dt = DateTime->new(
94     year => $1,
95     month => $2,
96     day => $3,
97     hour => $4,
98     minute => $5,
99     second => $6,
100     #time_zone => 'UTC',
101     );
102     } else {
103     warn "can't parse date from ", $hash->{id};
104     }
105    
106    
107     my $i = Grep::Model::Item->new();
108     my ($id,$msg) = $i->load_or_create(
109     in_feed => $feed,
110     title => $hash->{title},
111     link => $hash->{source},
112     content => $content,
113     issued => $hash->{id},
114     );
115    
116    
117     warn ">> item $id $msg\n";
118    
119     }
120    
121     return 1;
122     }
123    
124    
125     1;

  ViewVC Help
Powered by ViewVC 1.1.26