/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 73 - (show annotations)
Fri Feb 23 11:48:39 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 3036 byte(s)
each feed now has default source class which is called for it. Added PhpWiki
source. Code still has problems with Lucene locking.
1 use strict;
2 use warnings;
3
4 =head1 NAME
5
6 Grep::Action::AddFeed
7
8 =cut
9
10 package Grep::Action::AddFeed;
11 use base qw/Grep::Action::CreateFeed/;
12
13 use Feed::Find;
14 use LWP::UserAgent;
15 use Data::Dump qw/dump/;
16
17 =head2 canonicalize_uri
18
19 Replace C<grep>' with C<%s> in URI arguments
20
21 =cut
22
23 sub canonicalize_uri {
24 my $self = shift;
25 my $value = shift;
26 warn "uri: $value";
27 if ($value =~ s/\bgrep\b/%s/) {
28 $self->canonicalization_note( uri => 'Replaced grep with %s' );
29 }
30 return $value;
31 }
32
33 =head2 canonicalize_cookie
34
35 Remove C<Cookie:> header from beginning and replace EOL with space.
36
37 =cut
38
39 # disabled for now
40 sub xx_canonicalize_cookie {
41 my $self = shift;
42 my $value = shift;
43
44 warn "cookie: $value";
45
46 $self->canonicalization_note( uri => 'Removed Cookie: header' )
47 if ($value =~ s/^Cookie:\s+//);
48
49 $self->canonicalization_note( uri => 'Converted EOL to space' )
50 if ($value =~ s/[\n\r]/ /gs);
51
52 return $value;
53 }
54 =head2 take_action
55
56 =cut
57
58 sub take_action {
59 my $self = shift;
60
61 my @ARGS = @_;
62
63 # Custom action code
64
65 my $ua = LWP::UserAgent->new;
66
67 my $cookie = $self->argument_value('cookie');
68 if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
69 $self->argument_value('cookie', $cookie);
70 }
71
72 Jifty->log->debug("using cookie: $cookie");
73 $ua->default_header( 'Cookie' => $cookie );
74
75 my $search_moniker = 'grep';
76
77 my $uri = $self->argument_value('uri');
78 $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
79
80 Jifty->log->debug("trying to find feed on $uri");
81
82 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
83
84 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
85
86 my $ct = $r->header('Content-type') or warn "can't get Content-type";
87 my $content = $r->content;
88
89 Jifty->log->debug("got ", length( $content ), " bytes $ct");
90
91 if ( $ct =~ /xml/ ) {
92 $self->result->message( "Assuming $uri is feed from $ct" );
93 return $self->SUPER::take_action( @ARGS );
94 }
95
96 my @feeds = Feed::Find->find_in_html( \$content );
97
98 if (@feeds) {
99
100 Jifty->log->info("found possible feeds: ", dump( @feeds ));
101
102 my @search_feeds = map {
103 my $t = $_;
104 $t =~ s/\b$search_moniker\b/%s/;
105 $t
106 } grep(/\b$search_moniker\b/,@feeds);
107
108
109 if ( my $feed_uri = shift @search_feeds ) {
110
111 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
112 $self->argument_value('uri', $feed_uri);
113
114 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
115
116 return $self->SUPER::take_action( @ARGS );
117
118 } else {
119 Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
120 }
121 }
122
123 Jifty->log->debug("no feeds found, trying content_class detection");
124
125 my $source = Grep::Source->new();
126
127 if ( my $class = $source->content_class( $content ) ) {
128
129 Jifty->log->debug("$class registred for feed $uri");
130
131 $self->argument_value('source', $source);
132 $self->result->message("Found $class scraper for $uri" );
133 return $self->SUPER::take_action( @ARGS );
134
135 } else {
136
137 $self->result->error('No feeds found on supplied URI');
138 return 0;
139
140 }
141
142 }
143
144 1;
145

  ViewVC Help
Powered by ViewVC 1.1.26