/[socialtext-import]/tamtam/tamtam2socialtext.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /tamtam/tamtam2socialtext.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 20 - (show annotations)
Thu Dec 13 11:38:36 2007 UTC (15 years, 1 month ago) by dpavlin
File MIME type: text/plain
File size: 4546 byte(s)
better original link
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 use XML::Simple;
7 use File::Find;
8 use Regexp::Common qw/balanced/;
9 use Socialtext::Resting;
10 use Encode;
11 use HTTP::Date;
12 use POSIX qw/strftime/;
13 use File::Slurp;
14 use File::MMagic::XS;
15 use Data::Dump qw/dump/;
16
17 my $debug = 0;
18 my $max = 999;
19
20 my $page;
21 my $page_date;
22
23 my @page_names;
24
25 print "Collecting pages...\n";
26
27 find({
28 wanted => sub {
29 my $path = $File::Find::name;
30 return unless -f $path;
31
32 warn "+ $path\n";
33 my $ref = XMLin( $path,
34 KeyAttr => {
35 'attachment' => '+name',
36 'meta' => 'name',
37 },
38 ForceArray => [ 'attachment', 'widget' ],
39 ) || die "can't open $path: $!";
40
41 warn "## $path = ",dump( $ref ) if $debug;
42
43 my $name = $ref->{name} || die "no name in $path";
44
45 return if $name =~ m/^TamSystem/;
46
47 my $date = $ref->{meta}->{LastModified}->{value};
48 if ( ! $date ) {
49 warn "SKIP: no LastModified in $path $name";
50 return;
51 }
52
53 my $data;
54
55 foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
56
57 warn "## w = ",dump( $w ) if $debug;
58
59 $data .= "\n----\n" if $data;
60 $data .= $w->{data} || die "no data?";
61 }
62
63 my $attachments;
64
65 if ( my $a = $ref->{attachment} ) {
66 foreach my $name ( keys %$a ) {
67 my $full_path = $path;
68 $full_path =~ s,pages/,attachments/,;
69 $full_path .= '.' . $name;
70 die "$full_path doesn't exist" unless -e $full_path;
71 push @$attachments, {
72 full_path => $full_path,
73 name => ( $name || $a->{$name}->{desc} || 'noname' ),
74 };
75 }
76 }
77
78 $page->{ $name } = {
79 content => convert_markup( $data ),
80 date => convert_date( $date ),
81 attachments => $attachments,
82 };
83
84 $name =~ s,^.+/([^/]+)$,$1,;
85 push @page_names, $name;
86
87 },
88 no_chdir=>1,
89 }, shift @ARGV || '.');
90
91 my @pages = ( keys %$page );
92
93 warn "found following pages: ", join(", ", @page_names),"\n";
94
95 my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
96
97 my $Rester = Socialtext::Resting->new(
98 username => 'tamtam',
99 password => 'import',
100 server => 'http://saturn.ffzg.hr/',
101 workspace => 'razmjenavjestina',
102 );
103 $Rester->put_workspacetag('TamTam');
104
105 sub convert_date {
106 my $date = shift;
107 # return time2str( $date );
108 return strftime('%F %T %z', gmtime( $date ));
109 }
110
111 sub header {
112 my $h = shift;
113 if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
114 my $level = length($1);
115 return "\n" . ( '^' x $level ) . " $2\n";
116 } else {
117 return $h;
118 }
119 }
120
121 sub surround {
122 my ( $with, $what ) = @_;
123 return $with . $what . $with;
124 }
125
126 sub pre {
127 my $text = shift;
128 $text =~ s/^{{{//;
129 $text =~ s/}}}$//;
130 return '.pre' . $text . '.pre';
131 }
132
133 sub convert_markup {
134 my $body = shift;
135
136 $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
137 $body =~ s/\Q[[BR]]\E/\n/gs;
138 $body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
139 $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
140 $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
141 $body =~ s/''(.+?)''/surround('_',$1)/gse;
142 $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
143
144 # fix bullets
145 $body =~ s/^\s+([\*])/$1/gm;
146
147 # fix links
148 $body =~ s/\["([^"]+)"\]/[$1]/gs;
149 $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
150 $body =~ s,\[(http://[^\]]+)\],$1,gs;
151
152 # fix hr
153 $body =~ s,(\S+)----,$1\n----,gs;
154 $body =~ s,----(\S+),----\n$1,gs;
155
156 # attachments
157 $body =~ s,\[attachment:([^\]]+)(gif|png|jpg|jpeg)\],{image: $1$2},gis;
158 $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
159
160 return $body;
161 }
162
163 my $count = 0;
164
165 my $m = File::MMagic::XS->new;
166
167 foreach my $name ( keys %$page ) {
168 last if $count++ == $max;
169
170 my $p = $page->{$name};
171
172 warn "## $name = ",dump( $p ) if $debug;
173
174 my $body = $p->{content} || die "no content?";
175 my $date = $p->{date} || die "no date?";
176
177 my @tags = ( 'TamTam' );
178
179 my $full_name = $name;
180
181 if ( $name =~ m!/! ) {
182 my @page_tags = split(m!/!, $name);
183 $name = pop @page_tags; # remove page name
184 push @tags, @page_tags;
185 }
186
187 # link named pages
188 $body =~ s,\b$page_link_re\b,[$1],gs;
189 $body =~ s,``,,gs;
190
191 $body .= qq{
192 ----
193
194 "original"<http://www.razmjenavjestina.org/$full_name> {date: $date}
195 };
196
197 Encode::_utf8_off( $body );
198
199 $Rester->put_page( $name, { content => $body, date => $date });
200 print "$name $date\n";
201 foreach ( @tags ) {
202 $Rester->put_pagetag( $name, $_ );
203 print "+ tag $_\n";
204 }
205 foreach my $a ( @{ $p->{attachments} } ) {
206 my $type = $m->get_mime( $a->{full_path} );
207 my $content = read_file( $a->{full_path} );
208 print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
209 $Rester->post_attachment($name, $a->{name}, $content, $type );
210 }
211
212 }
213

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26