/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 73 - (hide annotations)
Fri Feb 23 11:48:39 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 3036 byte(s)
each feed now has default source class which is called for it. Added PhpWiki
source. Code still has problems with Lucene locking.
1 dpavlin 21 use strict;
2     use warnings;
3    
4     =head1 NAME
5    
6     Grep::Action::AddFeed
7    
8     =cut
9    
10     package Grep::Action::AddFeed;
11     use base qw/Grep::Action::CreateFeed/;
12    
13     use Feed::Find;
14     use LWP::UserAgent;
15     use Data::Dump qw/dump/;
16    
17     =head2 canonicalize_uri
18    
19     Replace C<grep>' with C<%s> in URI arguments
20    
21     =cut
22    
23     sub canonicalize_uri {
24     my $self = shift;
25     my $value = shift;
26     warn "uri: $value";
27 dpavlin 26 if ($value =~ s/\bgrep\b/%s/) {
28 dpavlin 21 $self->canonicalization_note( uri => 'Replaced grep with %s' );
29     }
30     return $value;
31     }
32    
33 dpavlin 26 =head2 canonicalize_cookie
34    
35     Remove C<Cookie:> header from beginning and replace EOL with space.
36    
37     =cut
38    
39     # disabled for now
40     sub xx_canonicalize_cookie {
41     my $self = shift;
42     my $value = shift;
43    
44     warn "cookie: $value";
45    
46     $self->canonicalization_note( uri => 'Removed Cookie: header' )
47     if ($value =~ s/^Cookie:\s+//);
48    
49     $self->canonicalization_note( uri => 'Converted EOL to space' )
50     if ($value =~ s/[\n\r]/ /gs);
51    
52     return $value;
53     }
54 dpavlin 21 =head2 take_action
55    
56     =cut
57    
58     sub take_action {
59     my $self = shift;
60    
61     my @ARGS = @_;
62    
63     # Custom action code
64    
65     my $ua = LWP::UserAgent->new;
66    
67     my $cookie = $self->argument_value('cookie');
68     if ($cookie =~ s/{x!(26|3b)}/chr(hex($1))/gei) {
69     $self->argument_value('cookie', $cookie);
70     }
71    
72     Jifty->log->debug("using cookie: $cookie");
73     $ua->default_header( 'Cookie' => $cookie );
74    
75     my $search_moniker = 'grep';
76    
77     my $uri = $self->argument_value('uri');
78     $uri =~ s/{x!(26|3b)}/chr(hex($1))/gei;
79    
80     Jifty->log->debug("trying to find feed on $uri");
81    
82 dpavlin 69 my $r = $ua->get( sprintf( $uri, $search_moniker ) );
83 dpavlin 21
84 dpavlin 69 return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
85    
86 dpavlin 73 my $ct = $r->header('Content-type') or warn "can't get Content-type";
87     my $content = $r->content;
88    
89     Jifty->log->debug("got ", length( $content ), " bytes $ct");
90    
91     if ( $ct =~ /xml/ ) {
92     $self->result->message( "Assuming $uri is feed from $ct" );
93 dpavlin 69 return $self->SUPER::take_action( @ARGS );
94     }
95    
96 dpavlin 73 my @feeds = Feed::Find->find_in_html( \$content );
97 dpavlin 69
98 dpavlin 21 if (@feeds) {
99    
100     Jifty->log->info("found possible feeds: ", dump( @feeds ));
101    
102 dpavlin 73 my @search_feeds = map {
103 dpavlin 21 my $t = $_;
104 dpavlin 26 $t =~ s/\b$search_moniker\b/%s/;
105 dpavlin 21 $t
106 dpavlin 26 } grep(/\b$search_moniker\b/,@feeds);
107 dpavlin 21
108    
109 dpavlin 73 if ( my $feed_uri = shift @search_feeds ) {
110 dpavlin 21
111 dpavlin 73 $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
112     $self->argument_value('uri', $feed_uri);
113 dpavlin 21
114 dpavlin 73 Jifty->log->debug("calling parent take_action with new uri $feed_uri");
115 dpavlin 21
116 dpavlin 73 return $self->SUPER::take_action( @ARGS );
117 dpavlin 21
118 dpavlin 73 } else {
119     Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
120     }
121     }
122    
123     Jifty->log->debug("no feeds found, trying content_class detection");
124    
125     my $source = Grep::Source->new();
126    
127     if ( my $class = $source->content_class( $content ) ) {
128    
129     Jifty->log->debug("$class registred for feed $uri");
130    
131     $self->argument_value('source', $source);
132     $self->result->message("Found $class scraper for $uri" );
133 dpavlin 58 return $self->SUPER::take_action( @ARGS );
134 dpavlin 21
135     } else {
136    
137     $self->result->error('No feeds found on supplied URI');
138     return 0;
139 dpavlin 73
140 dpavlin 21 }
141    
142     }
143    
144     1;
145    

  ViewVC Help
Powered by ViewVC 1.1.26