/[socialtext-import]/tamtam/tamtam2socialtext.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /tamtam/tamtam2socialtext.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (show annotations)
Sat Dec 15 13:52:40 2007 UTC (16 years, 2 months ago) by dpavlin
File MIME type: text/plain
File size: 5091 byte(s)
create tags from parts of page name
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 use XML::Simple;
7 use File::Find;
8 use Regexp::Common qw/balanced/;
9 use Socialtext::Resting;
10 use Encode;
11 use HTTP::Date;
12 use POSIX qw/strftime/;
13 use File::Slurp;
14 use File::MMagic::XS;
15 use Getopt::Long;
16 use Data::Dump qw/dump/;
17
18 my $debug = 0;
19 my $max = 999;
20 my $attachments = 0;
21
22 my @create_tags = (qw/
23 Trazi
24 Nudi
25 SvakodnevneDovitljivosti
26 G33koSkop
27 /);
28
29 GetOptions(
30 'debug+' => \$debug,
31 'max=i' => \$max,
32 'attachments' => \$attachments,
33 );
34
35 my $page;
36 my $page_date;
37
38 my @page_names;
39
40 print "Importing $max pages", $attachments ? " with attachments" : "", "...\n";
41
42 find({
43 wanted => sub {
44 my $path = $File::Find::name;
45 return unless -f $path;
46
47 warn "+ $path\n";
48 my $ref = XMLin( $path,
49 KeyAttr => {
50 'attachment' => '+name',
51 'meta' => 'name',
52 },
53 ForceArray => [ 'attachment', 'meta', 'widget' ],
54 ) || die "can't open $path: $!";
55
56 warn "## $path = ",dump( $ref ) if $debug;
57
58 my $name = $ref->{name} || die "no name in $path";
59
60 return if $name =~ m/^TamSystem/;
61
62 my $date = $ref->{meta}->{LastModified}->{value};
63 if ( ! $date ) {
64 warn "SKIP: no LastModified in $path $name";
65 return;
66 }
67
68 my $data;
69
70 foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
71
72 warn "## w = ",dump( $w ) if $debug;
73
74 $data .= "\n----\n" if $data;
75 $data .= $w->{data} || die "no data?";
76 }
77
78 my $attachments;
79
80 if ( my $a = $ref->{attachment} ) {
81 foreach my $name ( keys %$a ) {
82 my $full_path = $path;
83 $full_path =~ s,pages/,attachments/,;
84 $full_path .= '.' . $name;
85 die "$full_path doesn't exist" unless -e $full_path;
86 push @$attachments, {
87 full_path => $full_path,
88 name => ( $name || $a->{$name}->{desc} || 'noname' ),
89 };
90 }
91 }
92
93 $page->{ $name } = {
94 content => convert_markup( $data ),
95 original => $data,
96 date => convert_date( $date ),
97 attachments => $attachments,
98 };
99
100 $name =~ s,^.+/([^/]+)$,$1,;
101 push @page_names, $name;
102
103 },
104 no_chdir=>1,
105 }, shift @ARGV || '.');
106
107 my @pages = ( keys %$page );
108
109 warn "found following pages: ", join(", ", @page_names),"\n";
110
111 my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
112
113 my $Rester = Socialtext::Resting->new(
114 username => 'tamtam',
115 password => 'import',
116 server => 'http://saturn.ffzg.hr/',
117 workspace => 'razmjenavjestina',
118 );
119 $Rester->put_workspacetag('TamTam');
120
121 sub convert_date {
122 my $date = shift;
123 # return time2str( $date );
124 return strftime('%F %T %z', gmtime( $date ));
125 }
126
127 sub header {
128 my $h = shift;
129 if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
130 my $level = length($1);
131 return "\n" . ( '^' x $level ) . " $2\n";
132 } else {
133 return $h;
134 }
135 }
136
137 sub surround {
138 my ( $with, $what ) = @_;
139 return $with . $what . $with;
140 }
141
142 sub pre {
143 my $text = shift;
144 $text =~ s/^{{{\s*//s;
145 $text =~ s/\s*}}}$//s;
146 return "\n.pre\n" . $text . "\n.pre\n";
147 }
148
149 sub convert_markup {
150 my $body = shift;
151
152 $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
153 $body =~ s/\Q[[BR]]\E/\n/gs;
154 $body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
155 $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
156 $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
157 $body =~ s/''(.+?)''/surround('_',$1)/gse;
158 $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
159
160 # fix bullets
161 $body =~ s/^\s+([\*])/$1/gm;
162
163 # fix links
164 $body =~ s/\["([^"]+)"\]/[$1]/gs;
165 $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
166 $body =~ s,\[(http://[^\]]+)\],$1,gs;
167
168 # fix hr
169 $body =~ s,(\S+)----,$1\n----,gs;
170 $body =~ s,----(\S+),----\n$1,gs;
171
172 # attachments
173 $body =~ s,\[attachment:([^\]]+)(gif|png|jpg|jpeg)\],{image: $1$2},gis;
174 $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
175
176 return $body;
177 }
178
179 my $count = 0;
180
181 my $m = File::MMagic::XS->new;
182
183 foreach my $name ( keys %$page ) {
184 last if $count++ == $max;
185
186 my $p = $page->{$name};
187
188 warn "## $name = ",dump( $p ) if $debug;
189
190 my $body = $p->{content} || die "no content?";
191 my $date = $p->{date} || die "no date?";
192
193 my @tags = ( 'TamTam' );
194
195 my $full_name = $name;
196
197 if ( $name =~ m!/! ) {
198 my @page_tags = split(m!/!, $name);
199 $name = pop @page_tags; # remove page name
200 push @tags, @page_tags;
201 }
202
203 # link named pages
204 $body =~ s,\b$page_link_re\b,[$1],gs;
205 $body =~ s,``,,gs;
206
207 $body .= qq{
208
209 ----
210
211 "original"<http://www.razmjenavjestina.org/$full_name> {date: $date}
212 };
213
214 Encode::_utf8_off( $body );
215
216 print "$name $date\n";
217
218 # original markup
219 $Rester->put_page( $name, { content => $p->{original}, date => $date });
220
221 foreach my $t ( @create_tags ) {
222 push @tags, $t if $full_name =~ m/$t/i;
223 }
224
225 foreach ( @tags ) {
226 $Rester->put_pagetag( $name, $_, { date => $date } );
227 print "+ tag $_\n";
228 }
229
230 if ( $attachments ) {
231 foreach my $a ( @{ $p->{attachments} } ) {
232 my $type = $m->get_mime( $a->{full_path} );
233 my $content = read_file( $a->{full_path} );
234 print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
235 $Rester->post_attachment($name, $a->{name}, $content, $type );
236 }
237 }
238
239 # converted page
240 $Rester->put_page( $name, { content => $body, date => $date });
241
242 }
243

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26