/[Grep]/lib/Grep/Action/AddFeed.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /lib/Grep/Action/AddFeed.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 69 by dpavlin, Thu Feb 22 16:57:23 2007 UTC revision 102 by dpavlin, Sun Mar 4 22:16:23 2007 UTC
# Line 12  use base qw/Grep::Action::CreateFeed/; Line 12  use base qw/Grep::Action::CreateFeed/;
12    
13  use Feed::Find;  use Feed::Find;
14  use LWP::UserAgent;  use LWP::UserAgent;
15    
16  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
17    
18  =head2 canonicalize_uri  =head2 canonicalize_uri
# Line 23  Replace C<grep>' with C<%s> in URI argum Line 24  Replace C<grep>' with C<%s> in URI argum
24  sub canonicalize_uri {  sub canonicalize_uri {
25          my $self = shift;          my $self = shift;
26          my $value = shift;          my $value = shift;
27          warn "uri: $value";          $self->log->debug("canonicalize uri $value");
28          if ($value =~ s/\bgrep\b/%s/) {          if ($value =~ s/\bgrep\b/%s/) {
29                  $self->canonicalization_note( uri => 'Replaced grep with %s' );                  $self->canonicalization_note( uri => 'Replaced grep with %s' );
30          }          }
# Line 41  sub xx_canonicalize_cookie { Line 42  sub xx_canonicalize_cookie {
42          my $self = shift;          my $self = shift;
43          my $value = shift;          my $value = shift;
44    
45          warn "cookie: $value";          #warn "cookie: $value";
46    
47          $self->canonicalization_note( uri => 'Removed Cookie: header' )          $self->canonicalization_note( uri => 'Removed Cookie: header' )
48                  if ($value =~ s/^Cookie:\s+//);                  if ($value =~ s/^Cookie:\s+//);
# Line 83  sub take_action { Line 84  sub take_action {
84    
85          return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );          return $self->result->error( $r->status_line . " from $uri" ) unless ( $r->is_success );
86    
87          if ($r->header('Content-type') =~ /xml/) {          my $ct = $r->header('Content-type') or warn "can't get Content-type";
88                  $self->result->message( "Assuming $uri is feed and using it" );          my $content = $r->content;
89    
90            Jifty->log->debug("got ", length( $content ), " bytes $ct");
91    
92            if ( $ct =~ /xml/ ) {
93                    Grep::Source->save( 'addfeed.xml', $content );
94                    $self->result->message( "Assuming $uri is feed from $ct" );
95                  return $self->SUPER::take_action( @ARGS );                  return $self->SUPER::take_action( @ARGS );
96          }          }
97    
98          my @feeds = Feed::Find->find_in_html( $r->content );          Grep::Source->save( 'addfeed.html', $content );
99    
100            my $base_uri = $uri;
101            $base_uri =~ s!/[^/]+$!!;
102    
103            my @feeds = Feed::Find->find_in_html( \$content, $base_uri );
104    
105          if (@feeds) {          if (@feeds) {
106    
107                  Jifty->log->info("found possible feeds: ", dump( @feeds ));                  Jifty->log->info("found possible feeds: ", dump( @feeds ));
108    
109                  @feeds = map {                  my @search_feeds = map {
110                          my $t = $_;                          my $t = $_;
111                          $t =~ s/\b$search_moniker\b/%s/;                          $t =~ s/\b$search_moniker\b/%s/;
112                          $t                          $t
113                  } grep(/\b$search_moniker\b/,@feeds);                  } grep(/\b$search_moniker\b/,@feeds);
114    
115    
116                  my $feed_uri = shift @feeds;                  if ( my $feed_uri = shift @search_feeds ) {
117    
118                            $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );
119                            $self->argument_value('uri', $feed_uri);
120    
121                  return $self->result->error("Can't find any feed at $uri") unless ( $feed_uri );                          Jifty->log->debug("calling parent take_action with new uri $feed_uri");
122    
123                  $self->result->message('Found ' . @feeds . " feeds, using first: $feed_uri" );                          return $self->SUPER::take_action( @ARGS );
                 $self->argument_value('uri', $feed_uri);  
124    
125                  Jifty->log->debug("calling parent take_action with new uri $feed_uri");                  } else {
126                            Jifty->log->debug("found feeds ", dump( @feeds ), " but none of them has search moniker!");
127                    }
128            }
129    
130            Jifty->log->debug("no feeds found, trying content_class detection");
131    
132            my $source = Grep::Source->new();
133    
134            if ( my $class = $source->content_class( $content ) ) {
135    
136                    Jifty->log->debug("$class registred for feed $uri");
137    
138                    $self->argument_value('source', "$class" );
139                    $self->result->message("Found $class scraper for $uri" );
140                  return $self->SUPER::take_action( @ARGS );                  return $self->SUPER::take_action( @ARGS );
141    
142          } else {          } else {
143    
                 warn "no feeds in ", $r->content;  
   
144                  $self->result->error('No feeds found on supplied URI');                  $self->result->error('No feeds found on supplied URI');
145                  return 0;                  return 0;
146    
147          }          }
148    
149  }  }

Legend:
Removed from v.69  
changed lines
  Added in v.102

  ViewVC Help
Powered by ViewVC 1.1.26