/[socialtext-import]/tamtam/tamtam2socialtext.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /tamtam/tamtam2socialtext.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (hide annotations)
Sat Dec 15 13:52:40 2007 UTC (16 years, 2 months ago) by dpavlin
File MIME type: text/plain
File size: 5091 byte(s)
create tags from parts of page name
1 dpavlin 5 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     use XML::Simple;
7     use File::Find;
8     use Regexp::Common qw/balanced/;
9     use Socialtext::Resting;
10     use Encode;
11 dpavlin 6 use HTTP::Date;
12 dpavlin 15 use POSIX qw/strftime/;
13 dpavlin 18 use File::Slurp;
14     use File::MMagic::XS;
15 dpavlin 21 use Getopt::Long;
16 dpavlin 5 use Data::Dump qw/dump/;
17    
18 dpavlin 18 my $debug = 0;
19 dpavlin 11 my $max = 999;
20 dpavlin 23 my $attachments = 0;
21 dpavlin 6
22 dpavlin 27 my @create_tags = (qw/
23     Trazi
24     Nudi
25     SvakodnevneDovitljivosti
26     G33koSkop
27     /);
28    
29 dpavlin 21 GetOptions(
30     'debug+' => \$debug,
31     'max=i' => \$max,
32 dpavlin 23 'attachments' => \$attachments,
33 dpavlin 21 );
34    
35 dpavlin 5 my $page;
36 dpavlin 6 my $page_date;
37 dpavlin 5
38 dpavlin 9 my @page_names;
39    
40 dpavlin 24 print "Importing $max pages", $attachments ? " with attachments" : "", "...\n";
41 dpavlin 18
42 dpavlin 5 find({
43     wanted => sub {
44     my $path = $File::Find::name;
45     return unless -f $path;
46 dpavlin 6
47 dpavlin 18 warn "+ $path\n";
48     my $ref = XMLin( $path,
49     KeyAttr => {
50     'attachment' => '+name',
51     'meta' => 'name',
52     },
53 dpavlin 22 ForceArray => [ 'attachment', 'meta', 'widget' ],
54 dpavlin 18 ) || die "can't open $path: $!";
55 dpavlin 6
56 dpavlin 18 warn "## $path = ",dump( $ref ) if $debug;
57    
58 dpavlin 6 my $name = $ref->{name} || die "no name in $path";
59 dpavlin 8
60     return if $name =~ m/^TamSystem/;
61    
62 dpavlin 14 my $date = $ref->{meta}->{LastModified}->{value};
63 dpavlin 6 if ( ! $date ) {
64     warn "SKIP: no LastModified in $path $name";
65     return;
66     }
67 dpavlin 17
68 dpavlin 18 my $data;
69 dpavlin 17
70 dpavlin 18 foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
71 dpavlin 6
72 dpavlin 18 warn "## w = ",dump( $w ) if $debug;
73 dpavlin 17
74 dpavlin 18 $data .= "\n----\n" if $data;
75     $data .= $w->{data} || die "no data?";
76     }
77    
78     my $attachments;
79    
80     if ( my $a = $ref->{attachment} ) {
81     foreach my $name ( keys %$a ) {
82     my $full_path = $path;
83     $full_path =~ s,pages/,attachments/,;
84     $full_path .= '.' . $name;
85     die "$full_path doesn't exist" unless -e $full_path;
86     push @$attachments, {
87     full_path => $full_path,
88     name => ( $name || $a->{$name}->{desc} || 'noname' ),
89     };
90     }
91     }
92    
93 dpavlin 14 $page->{ $name } = {
94     content => convert_markup( $data ),
95 dpavlin 26 original => $data,
96 dpavlin 15 date => convert_date( $date ),
97 dpavlin 18 attachments => $attachments,
98 dpavlin 14 };
99 dpavlin 13
100 dpavlin 11 $name =~ s,^.+/([^/]+)$,$1,;
101     push @page_names, $name;
102    
103 dpavlin 5 },
104 dpavlin 18 no_chdir=>1,
105 dpavlin 5 }, shift @ARGV || '.');
106    
107 dpavlin 6 my @pages = ( keys %$page );
108 dpavlin 5
109 dpavlin 11 warn "found following pages: ", join(", ", @page_names),"\n";
110 dpavlin 5
111 dpavlin 11 my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
112    
113 dpavlin 5 my $Rester = Socialtext::Resting->new(
114     username => 'tamtam',
115     password => 'import',
116     server => 'http://saturn.ffzg.hr/',
117 dpavlin 18 workspace => 'razmjenavjestina',
118 dpavlin 5 );
119     $Rester->put_workspacetag('TamTam');
120    
121 dpavlin 15 sub convert_date {
122     my $date = shift;
123     # return time2str( $date );
124     return strftime('%F %T %z', gmtime( $date ));
125     }
126    
127 dpavlin 5 sub header {
128     my $h = shift;
129     if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
130     my $level = length($1);
131     return "\n" . ( '^' x $level ) . " $2\n";
132     } else {
133 dpavlin 11 return $h;
134 dpavlin 5 }
135     }
136    
137     sub surround {
138     my ( $with, $what ) = @_;
139     return $with . $what . $with;
140     }
141    
142 dpavlin 12 sub pre {
143     my $text = shift;
144 dpavlin 25 $text =~ s/^{{{\s*//s;
145     $text =~ s/\s*}}}$//s;
146     return "\n.pre\n" . $text . "\n.pre\n";
147 dpavlin 12 }
148    
149 dpavlin 14 sub convert_markup {
150     my $body = shift;
151 dpavlin 6
152 dpavlin 5 $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
153     $body =~ s/\Q[[BR]]\E/\n/gs;
154     $body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
155     $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
156     $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
157     $body =~ s/''(.+?)''/surround('_',$1)/gse;
158 dpavlin 12 $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
159 dpavlin 5
160 dpavlin 9 # fix bullets
161 dpavlin 7 $body =~ s/^\s+([\*])/$1/gm;
162    
163 dpavlin 9 # fix links
164     $body =~ s/\["([^"]+)"\]/[$1]/gs;
165 dpavlin 10 $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
166     $body =~ s,\[(http://[^\]]+)\],$1,gs;
167 dpavlin 9
168 dpavlin 14 # fix hr
169     $body =~ s,(\S+)----,$1\n----,gs;
170     $body =~ s,----(\S+),----\n$1,gs;
171 dpavlin 5
172 dpavlin 18 # attachments
173 dpavlin 19 $body =~ s,\[attachment:([^\]]+)(gif|png|jpg|jpeg)\],{image: $1$2},gis;
174 dpavlin 18 $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
175    
176 dpavlin 14 return $body;
177     }
178    
179     my $count = 0;
180    
181 dpavlin 18 my $m = File::MMagic::XS->new;
182    
183 dpavlin 14 foreach my $name ( keys %$page ) {
184     last if $count++ == $max;
185    
186     my $p = $page->{$name};
187 dpavlin 17
188     warn "## $name = ",dump( $p ) if $debug;
189    
190 dpavlin 14 my $body = $p->{content} || die "no content?";
191     my $date = $p->{date} || die "no date?";
192    
193 dpavlin 6 my @tags = ( 'TamTam' );
194 dpavlin 5
195 dpavlin 15 my $full_name = $name;
196    
197 dpavlin 6 if ( $name =~ m!/! ) {
198     my @page_tags = split(m!/!, $name);
199 dpavlin 8 $name = pop @page_tags; # remove page name
200 dpavlin 6 push @tags, @page_tags;
201     }
202    
203 dpavlin 14 # link named pages
204 dpavlin 16 $body =~ s,\b$page_link_re\b,[$1],gs;
205     $body =~ s,``,,gs;
206 dpavlin 14
207     $body .= qq{
208 dpavlin 22
209 dpavlin 14 ----
210    
211 dpavlin 20 "original"<http://www.razmjenavjestina.org/$full_name> {date: $date}
212 dpavlin 14 };
213    
214     Encode::_utf8_off( $body );
215    
216 dpavlin 18 print "$name $date\n";
217 dpavlin 26
218     # original markup
219     $Rester->put_page( $name, { content => $p->{original}, date => $date });
220    
221 dpavlin 27 foreach my $t ( @create_tags ) {
222     push @tags, $t if $full_name =~ m/$t/i;
223     }
224    
225 dpavlin 6 foreach ( @tags ) {
226 dpavlin 26 $Rester->put_pagetag( $name, $_, { date => $date } );
227 dpavlin 18 print "+ tag $_\n";
228 dpavlin 6 }
229 dpavlin 23
230     if ( $attachments ) {
231     foreach my $a ( @{ $p->{attachments} } ) {
232     my $type = $m->get_mime( $a->{full_path} );
233     my $content = read_file( $a->{full_path} );
234     print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
235     $Rester->post_attachment($name, $a->{name}, $content, $type );
236     }
237 dpavlin 18 }
238 dpavlin 6
239 dpavlin 26 # converted page
240     $Rester->put_page( $name, { content => $body, date => $date });
241    
242 dpavlin 5 }
243    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26