Parent Directory | Revision Log
Refactor scraping by extracting element_by_triplet into own method, now every parametar accepts one argument (tag) or multiple number of triplets (tag, attribute, value)
1 | #!/usr/bin/perl |
2 | |
3 | use warnings; |
4 | use strict; |
5 | |
6 | package Grep::Source::DokuWiki; |
7 | |
8 | =head1 NAME |
9 | |
10 | Grep::Source::DokuWiki - scraiper for DokuWiki search results |
11 | |
12 | =head1 BUGS |
13 | |
14 | Since DokuWiki html isn't structured enough (there is no block element |
15 | around content of page, sic!), search results contain form elements and |
16 | buttons. |
17 | |
18 | =cut |
19 | |
20 | sub content_have { |
21 | qr(generator.*DokuWiki); |
22 | } |
23 | |
24 | sub fetch { |
25 | my $self = shift; |
26 | my $parent = shift; |
27 | |
28 | $parent->scrape( |
29 | wrapper => [ qw/ |
30 | div class page |
31 | div class dokuwiki |
32 | / |
33 | ], |
34 | results => [ |
35 | 'div', '', '', |
36 | qw/div class search_result/, |
37 | ], |
38 | scrape => [ qw/div class dokuwiki/ ], |
39 | ); |
40 | |
41 | } |
42 | |
43 | 1; |
ViewVC Help | |
Powered by ViewVC 1.1.26 |