/[socialtext-import]/tamtam/tamtam2socialtext.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /tamtam/tamtam2socialtext.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (hide annotations)
Wed Dec 12 23:08:14 2007 UTC (16 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 4543 byte(s)
image attachments now generate in-line images
1 dpavlin 5 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     use XML::Simple;
7     use File::Find;
8     use Regexp::Common qw/balanced/;
9     use Socialtext::Resting;
10     use Encode;
11 dpavlin 6 use HTTP::Date;
12 dpavlin 15 use POSIX qw/strftime/;
13 dpavlin 18 use File::Slurp;
14     use File::MMagic::XS;
15 dpavlin 5 use Data::Dump qw/dump/;
16    
17 dpavlin 18 my $debug = 0;
18 dpavlin 11 my $max = 999;
19 dpavlin 6
20 dpavlin 5 my $page;
21 dpavlin 6 my $page_date;
22 dpavlin 5
23 dpavlin 9 my @page_names;
24    
25 dpavlin 18 print "Collecting pages...\n";
26    
27 dpavlin 5 find({
28     wanted => sub {
29     my $path = $File::Find::name;
30     return unless -f $path;
31 dpavlin 6
32 dpavlin 18 warn "+ $path\n";
33     my $ref = XMLin( $path,
34     KeyAttr => {
35     'attachment' => '+name',
36     'meta' => 'name',
37     },
38     ForceArray => [ 'attachment', 'widget' ],
39     ) || die "can't open $path: $!";
40 dpavlin 6
41 dpavlin 18 warn "## $path = ",dump( $ref ) if $debug;
42    
43 dpavlin 6 my $name = $ref->{name} || die "no name in $path";
44 dpavlin 8
45     return if $name =~ m/^TamSystem/;
46    
47 dpavlin 14 my $date = $ref->{meta}->{LastModified}->{value};
48 dpavlin 6 if ( ! $date ) {
49     warn "SKIP: no LastModified in $path $name";
50     return;
51     }
52 dpavlin 17
53 dpavlin 18 my $data;
54 dpavlin 17
55 dpavlin 18 foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
56 dpavlin 6
57 dpavlin 18 warn "## w = ",dump( $w ) if $debug;
58 dpavlin 17
59 dpavlin 18 $data .= "\n----\n" if $data;
60     $data .= $w->{data} || die "no data?";
61     }
62    
63     my $attachments;
64    
65     if ( my $a = $ref->{attachment} ) {
66     foreach my $name ( keys %$a ) {
67     my $full_path = $path;
68     $full_path =~ s,pages/,attachments/,;
69     $full_path .= '.' . $name;
70     die "$full_path doesn't exist" unless -e $full_path;
71     push @$attachments, {
72     full_path => $full_path,
73     name => ( $name || $a->{$name}->{desc} || 'noname' ),
74     };
75     }
76     }
77    
78 dpavlin 14 $page->{ $name } = {
79     content => convert_markup( $data ),
80 dpavlin 15 date => convert_date( $date ),
81 dpavlin 18 attachments => $attachments,
82 dpavlin 14 };
83 dpavlin 13
84 dpavlin 11 $name =~ s,^.+/([^/]+)$,$1,;
85     push @page_names, $name;
86    
87 dpavlin 5 },
88 dpavlin 18 no_chdir=>1,
89 dpavlin 5 }, shift @ARGV || '.');
90    
91 dpavlin 6 my @pages = ( keys %$page );
92 dpavlin 5
93 dpavlin 11 warn "found following pages: ", join(", ", @page_names),"\n";
94 dpavlin 5
95 dpavlin 11 my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
96    
97 dpavlin 5 my $Rester = Socialtext::Resting->new(
98     username => 'tamtam',
99     password => 'import',
100     server => 'http://saturn.ffzg.hr/',
101 dpavlin 18 workspace => 'razmjenavjestina',
102 dpavlin 5 );
103     $Rester->put_workspacetag('TamTam');
104    
105 dpavlin 15 sub convert_date {
106     my $date = shift;
107     # return time2str( $date );
108     return strftime('%F %T %z', gmtime( $date ));
109     }
110    
111 dpavlin 5 sub header {
112     my $h = shift;
113     if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
114     my $level = length($1);
115     return "\n" . ( '^' x $level ) . " $2\n";
116     } else {
117 dpavlin 11 return $h;
118 dpavlin 5 }
119     }
120    
121     sub surround {
122     my ( $with, $what ) = @_;
123     return $with . $what . $with;
124     }
125    
126 dpavlin 12 sub pre {
127     my $text = shift;
128     $text =~ s/^{{{//;
129     $text =~ s/}}}$//;
130     return '.pre' . $text . '.pre';
131     }
132    
133 dpavlin 14 sub convert_markup {
134     my $body = shift;
135 dpavlin 6
136 dpavlin 5 $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
137     $body =~ s/\Q[[BR]]\E/\n/gs;
138     $body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
139     $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
140     $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
141     $body =~ s/''(.+?)''/surround('_',$1)/gse;
142 dpavlin 12 $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
143 dpavlin 5
144 dpavlin 9 # fix bullets
145 dpavlin 7 $body =~ s/^\s+([\*])/$1/gm;
146    
147 dpavlin 9 # fix links
148     $body =~ s/\["([^"]+)"\]/[$1]/gs;
149 dpavlin 10 $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
150     $body =~ s,\[(http://[^\]]+)\],$1,gs;
151 dpavlin 9
152 dpavlin 14 # fix hr
153     $body =~ s,(\S+)----,$1\n----,gs;
154     $body =~ s,----(\S+),----\n$1,gs;
155 dpavlin 5
156 dpavlin 18 # attachments
157 dpavlin 19 $body =~ s,\[attachment:([^\]]+)(gif|png|jpg|jpeg)\],{image: $1$2},gis;
158 dpavlin 18 $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
159    
160 dpavlin 14 return $body;
161     }
162    
163     my $count = 0;
164    
165 dpavlin 18 my $m = File::MMagic::XS->new;
166    
167 dpavlin 14 foreach my $name ( keys %$page ) {
168     last if $count++ == $max;
169    
170     my $p = $page->{$name};
171 dpavlin 17
172     warn "## $name = ",dump( $p ) if $debug;
173    
174 dpavlin 14 my $body = $p->{content} || die "no content?";
175     my $date = $p->{date} || die "no date?";
176    
177 dpavlin 6 my @tags = ( 'TamTam' );
178 dpavlin 5
179 dpavlin 15 my $full_name = $name;
180    
181 dpavlin 6 if ( $name =~ m!/! ) {
182     my @page_tags = split(m!/!, $name);
183 dpavlin 8 $name = pop @page_tags; # remove page name
184 dpavlin 6 push @tags, @page_tags;
185     }
186    
187 dpavlin 14 # link named pages
188 dpavlin 16 $body =~ s,\b$page_link_re\b,[$1],gs;
189     $body =~ s,``,,gs;
190 dpavlin 14
191     $body .= qq{
192     ----
193    
194 dpavlin 18 Original http://www.razmjenavjestina.org/$full_name {date: $date}
195 dpavlin 14 };
196    
197     Encode::_utf8_off( $body );
198    
199 dpavlin 18 $Rester->put_page( $name, { content => $body, date => $date });
200     print "$name $date\n";
201 dpavlin 6 foreach ( @tags ) {
202     $Rester->put_pagetag( $name, $_ );
203 dpavlin 18 print "+ tag $_\n";
204 dpavlin 6 }
205 dpavlin 18 foreach my $a ( @{ $p->{attachments} } ) {
206     my $type = $m->get_mime( $a->{full_path} );
207     my $content = read_file( $a->{full_path} );
208     print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
209     $Rester->post_attachment($name, $a->{name}, $content, $type );
210     }
211 dpavlin 6
212 dpavlin 5 }
213    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26