/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 78 - (show annotations)
Fri Feb 23 17:34:20 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 3097 byte(s)
give base_uri to Feed::Find->find_in_html so that relative links works
1 use strict;
2 use warnings;
3
4 =head1 NAME
5
6 Grep::Action::AddFeed
7
8 =cut
9
10 package Grep::Action::AddFeed;
11 use base qw/Grep::Action::CreateFeed/;
12
13 use Feed::Find;
14 use LWP::UserAgent;
15 use Data::Dump qw/dump/;
16
17 =head2 canonicalize_uri
18
19 Replace C<grep>' with C<%s> in URI arguments
20
21 =cut
22
23 sub canonicalize_uri {
24 my $self = shift;
25 my $value = shift;
26 warn "uri: $value";
27 if ($value =~ s/\bgrep\b/%s/) {
28 $self->canonicalization_note( uri => 'Replaced grep with %s' );
29 }
30 return $value;
31 }
32
33 =head2 canonicalize_cookie
34
35 Remove C<Cookie:> header from beginning and replace EOL with space.
36
37 =cut
38
39 # disabled for now
40 sub xx_canonicalize_cookie {
41 my $self = shift;
42 my $value = shift;
43
44 warn "cookie: $value";
45
46 $self->canonicalization_note( uri => 'Removed Cookie: header' )
47 if ($value =~ s/^Cookie:\s+//);
48
49 $self->canonicalization_note( uri => 'Converted EOL to space' )
50 if ($value =~ s/[\n\r]/ /gs);
51
52 return $value;
53 }
54 =head2 take_action
55
56 =cut
57
58 sub take_action {
59 my $self = shift;
60
61 my @ARGS = @_;
62
63 # Custom action code
64
65 my $ua = LWP::UserAgent->new;
66
67 my $cookie = $self->argument_value('cookie');
68 if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
69 $self->argument_value('cookie', $cookie);
70 }
71
72 Jifty->log->debug("using cookie: $cookie");
73 $ua->default_header( 'Cookie' => $cookie );
74
75 my $search_moniker = 'grep';
76
77 my $uri = $self->argument_value('uri');
78 $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
79
80 Jifty->log->debug("trying to find feed on $uri");
81
82 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
83
84 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
85
86 my $ct = $r->header('Content-type') or warn "can't get Content-type";
87 my $content = $r->content;
88
89 Jifty->log->debug("got ", length( $content ), " bytes $ct");
90
91 if ( $ct =~ /xml/ ) {
92 $self->result->message( "Assuming $uri is feed from $ct" );
93 return $self->SUPER::take_action( @ARGS );
94 }
95
96 my $base_uri = $uri;
97 $base_uri =~ s!/[^/]+$!!;
98
99 my @feeds = Feed::Find->find_in_html( \$content, $base_uri );
100
101 if (@feeds) {
102
103 Jifty->log->info("found possible feeds: ", dump( @feeds ));
104
105 my @search_feeds = map {
106 my $t = $_;
107 $t =~ s/\b$search_moniker\b/%s/;
108 $t
109 } grep(/\b$search_moniker\b/,@feeds);
110
111
112 if ( my $feed_uri = shift @search_feeds ) {
113
114 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
115 $self->argument_value('uri', $feed_uri);
116
117 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
118
119 return $self->SUPER::take_action( @ARGS );
120
121 } else {
122 Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
123 }
124 }
125
126 Jifty->log->debug("no feeds found, trying content_class detection");
127
128 my $source = Grep::Source->new();
129
130 if ( my $class = $source->content_class( $content ) ) {
131
132 Jifty->log->debug("$class registred for feed $uri");
133
134 $self->argument_value('source', $source);
135 $self->result->message("Found $class scraper for $uri" );
136 return $self->SUPER::take_action( @ARGS );
137
138 } else {
139
140 $self->result->error('No feeds found on supplied URI');
141 return 0;
142
143 }
144
145 }
146
147 1;
148

  ViewVC Help
Powered by ViewVC 1.1.26