/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 78 - (hide annotations)
Fri Feb 23 17:34:20 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 3097 byte(s)
give base_uri to Feed::Find->find_in_html so that relative links works
1 dpavlin 21 use strict;
2     use warnings;
3    
4     =head1 NAME
5    
6     Grep::Action::AddFeed
7    
8     =cut
9    
10     package Grep::Action::AddFeed;
11     use base qw/Grep::Action::CreateFeed/;
12    
13     use Feed::Find;
14     use LWP::UserAgent;
15     use Data::Dump qw/dump/;
16    
17     =head2 canonicalize_uri
18    
19     Replace C<grep>' with C<%s> in URI arguments
20    
21     =cut
22    
23     sub canonicalize_uri {
24     my $self = shift;
25     my $value = shift;
26     warn "uri: $value";
27 dpavlin 26 if ($value =~ s/\bgrep\b/%s/) {
28 dpavlin 21 $self->canonicalization_note( uri => 'Replaced grep with %s' );
29     }
30     return $value;
31     }
32    
33 dpavlin 26 =head2 canonicalize_cookie
34    
35     Remove C<Cookie:> header from beginning and replace EOL with space.
36    
37     =cut
38    
39     # disabled for now
40     sub xx_canonicalize_cookie {
41     my $self = shift;
42     my $value = shift;
43    
44     warn "cookie: $value";
45    
46     $self->canonicalization_note( uri => 'Removed Cookie: header' )
47     if ($value =~ s/^Cookie:\s+//);
48    
49     $self->canonicalization_note( uri => 'Converted EOL to space' )
50     if ($value =~ s/[\n\r]/ /gs);
51    
52     return $value;
53     }
54 dpavlin 21 =head2 take_action
55    
56     =cut
57    
58     sub take_action {
59     my $self = shift;
60    
61     my @ARGS = @_;
62    
63     # Custom action code
64    
65     my $ua = LWP::UserAgent->new;
66    
67     my $cookie = $self->argument_value('cookie');
68     if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
69     $self->argument_value('cookie', $cookie);
70     }
71    
72     Jifty->log->debug("using cookie: $cookie");
73     $ua->default_header( 'Cookie' => $cookie );
74    
75     my $search_moniker = 'grep';
76    
77     my $uri = $self->argument_value('uri');
78     $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
79    
80     Jifty->log->debug("trying to find feed on $uri");
81    
82 dpavlin 69 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
83 dpavlin 21
84 dpavlin 69 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
85    
86 dpavlin 73 my $ct = $r->header('Content-type') or warn "can't get Content-type";
87     my $content = $r->content;
88    
89     Jifty->log->debug("got ", length( $content ), " bytes $ct");
90    
91     if ( $ct =~ /xml/ ) {
92     $self->result->message( "Assuming $uri is feed from $ct" );
93 dpavlin 69 return $self->SUPER::take_action( @ARGS );
94     }
95    
96 dpavlin 78 my $base_uri = $uri;
97     $base_uri =~ s!/[^/]+$!!;
98 dpavlin 69
99 dpavlin 78 my @feeds = Feed::Find->find_in_html( \$content, $base_uri );
100    
101 dpavlin 21 if (@feeds) {
102    
103     Jifty->log->info("found possible feeds: ", dump( @feeds ));
104    
105 dpavlin 73 my @search_feeds = map {
106 dpavlin 21 my $t = $_;
107 dpavlin 26 $t =~ s/\b$search_moniker\b/%s/;
108 dpavlin 21 $t
109 dpavlin 26 } grep(/\b$search_moniker\b/,@feeds);
110 dpavlin 21
111    
112 dpavlin 73 if ( my $feed_uri = shift @search_feeds ) {
113 dpavlin 21
114 dpavlin 73 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
115     $self->argument_value('uri', $feed_uri);
116 dpavlin 21
117 dpavlin 73 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
118 dpavlin 21
119 dpavlin 73 return $self->SUPER::take_action( @ARGS );
120 dpavlin 21
121 dpavlin 73 } else {
122     Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
123     }
124     }
125    
126     Jifty->log->debug("no feeds found, trying content_class detection");
127    
128     my $source = Grep::Source->new();
129    
130     if ( my $class = $source->content_class( $content ) ) {
131    
132     Jifty->log->debug("$class registred for feed $uri");
133    
134     $self->argument_value('source', $source);
135     $self->result->message("Found $class scraper for $uri" );
136 dpavlin 58 return $self->SUPER::take_action( @ARGS );
137 dpavlin 21
138     } else {
139    
140     $self->result->error('No feeds found on supplied URI');
141     return 0;
142 dpavlin 73
143 dpavlin 21 }
144    
145     }
146    
147     1;
148    

  ViewVC Help
Powered by ViewVC 1.1.26