/[socialtext-import]/tamtam/tamtam2socialtext.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /tamtam/tamtam2socialtext.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 17 - (hide annotations)
Wed Dec 12 18:46:04 2007 UTC (16 years, 3 months ago) by dpavlin
File MIME type: text/plain
File size: 3647 byte(s)
better support for empty pages
1 dpavlin 5 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     use XML::Simple;
7     use File::Find;
8     use Regexp::Common qw/balanced/;
9     use Socialtext::Resting;
10     use Encode;
11 dpavlin 6 use HTTP::Date;
12 dpavlin 15 use POSIX qw/strftime/;
13 dpavlin 5 use Data::Dump qw/dump/;
14    
15 dpavlin 17 my $debug = 1;
16 dpavlin 11 my $max = 999;
17 dpavlin 6
18 dpavlin 5 my $page;
19 dpavlin 6 my $page_date;
20 dpavlin 5
21 dpavlin 9 my @page_names;
22    
23 dpavlin 5 find({
24     wanted => sub {
25     my $path = $File::Find::name;
26     return unless -f $path;
27 dpavlin 6
28 dpavlin 5 warn "# $path\n";
29     my $ref = XMLin( $path ) || die "can't open $path: $!";
30 dpavlin 6
31     my $name = $ref->{name} || die "no name in $path";
32 dpavlin 8
33     return if $name =~ m/^TamSystem/;
34    
35 dpavlin 14 my $date = $ref->{meta}->{LastModified}->{value};
36 dpavlin 6 if ( ! $date ) {
37     warn "SKIP: no LastModified in $path $name";
38     return;
39     }
40 dpavlin 17
41     my $w = $ref->{widgets}->{widget};
42    
43 dpavlin 6 my $data =
44 dpavlin 17 defined( $w->{data} ) ? $w->{data} :
45     defined( $w->{Body}->{date} ) ? $w->{Body}->{date} :
46 dpavlin 6 die "no data in $path ",dump( $ref );
47    
48 dpavlin 17 # empty data is returned like empty hash. yack.
49     $data = "\n" if ref($data) eq 'HASH' and ! keys %$data;
50    
51 dpavlin 14 $page->{ $name } = {
52     content => convert_markup( $data ),
53 dpavlin 15 date => convert_date( $date ),
54 dpavlin 14 };
55 dpavlin 13
56 dpavlin 11 # strip path from page name
57     $name =~ s,^.+/([^/]+)$,$1,;
58     push @page_names, $name;
59    
60 dpavlin 17 warn "## $path = ",dump( $ref ) if $debug;
61 dpavlin 5 },
62     }, shift @ARGV || '.');
63    
64 dpavlin 6 my @pages = ( keys %$page );
65 dpavlin 5
66 dpavlin 11 warn "found following pages: ", join(", ", @page_names),"\n";
67 dpavlin 5
68 dpavlin 11 my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
69    
70 dpavlin 5 my $Rester = Socialtext::Resting->new(
71     username => 'tamtam',
72     password => 'import',
73     server => 'http://saturn.ffzg.hr/',
74     );
75     $Rester->workspace('razmjenavjestina');
76     $Rester->put_workspacetag('TamTam');
77    
78 dpavlin 15 sub convert_date {
79     my $date = shift;
80     # return time2str( $date );
81     return strftime('%F %T %z', gmtime( $date ));
82     }
83    
84 dpavlin 5 sub header {
85     my $h = shift;
86     if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
87     my $level = length($1);
88     return "\n" . ( '^' x $level ) . " $2\n";
89     } else {
90 dpavlin 11 return $h;
91 dpavlin 5 }
92     }
93    
94     sub surround {
95     my ( $with, $what ) = @_;
96     return $with . $what . $with;
97     }
98    
99 dpavlin 12 sub pre {
100     my $text = shift;
101     $text =~ s/^{{{//;
102     $text =~ s/}}}$//;
103     return '.pre' . $text . '.pre';
104     }
105    
106 dpavlin 14 sub convert_markup {
107     my $body = shift;
108 dpavlin 6
109 dpavlin 5 $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
110     $body =~ s/\Q[[BR]]\E/\n/gs;
111     $body =~ s/$RE{balanced}{-begin => "= |== |=== |==== |===== |===== "}{-end => " =| ==| ===| ====| ====="}{-keep}/header($1)/gse;
112     $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
113     $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
114     $body =~ s/''(.+?)''/surround('_',$1)/gse;
115 dpavlin 12 $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
116 dpavlin 5
117 dpavlin 9 # fix bullets
118 dpavlin 7 $body =~ s/^\s+([\*])/$1/gm;
119    
120 dpavlin 9 # fix links
121     $body =~ s/\["([^"]+)"\]/[$1]/gs;
122 dpavlin 10 $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
123     $body =~ s,\[(http://[^\]]+)\],$1,gs;
124 dpavlin 9
125 dpavlin 14 # fix hr
126     $body =~ s,(\S+)----,$1\n----,gs;
127     $body =~ s,----(\S+),----\n$1,gs;
128 dpavlin 5
129 dpavlin 14 return $body;
130     }
131    
132     my $count = 0;
133    
134     foreach my $name ( keys %$page ) {
135     last if $count++ == $max;
136    
137     my $p = $page->{$name};
138 dpavlin 17
139     warn "## $name = ",dump( $p ) if $debug;
140    
141 dpavlin 14 my $body = $p->{content} || die "no content?";
142     my $date = $p->{date} || die "no date?";
143    
144 dpavlin 6 my @tags = ( 'TamTam' );
145 dpavlin 5
146 dpavlin 15 my $full_name = $name;
147    
148 dpavlin 6 if ( $name =~ m!/! ) {
149     my @page_tags = split(m!/!, $name);
150 dpavlin 8 $name = pop @page_tags; # remove page name
151 dpavlin 6 push @tags, @page_tags;
152     }
153    
154 dpavlin 14 # link named pages
155 dpavlin 16 $body =~ s,\b$page_link_re\b,[$1],gs;
156     $body =~ s,``,,gs;
157 dpavlin 14
158     $body .= qq{
159     ----
160    
161 dpavlin 15 Original sa http://www.razmjenavjestina.org/$full_name zadnja promjena {date: $date}
162 dpavlin 14 };
163    
164     Encode::_utf8_off( $body );
165    
166 dpavlin 6 $Rester->put_page( $name, {
167     content => $body,
168     date => $date,
169     });
170 dpavlin 14 print "+ $name $date\n";
171 dpavlin 6 foreach ( @tags ) {
172     $Rester->put_pagetag( $name, $_ );
173     print "+ $name [$_]\n";
174     }
175    
176 dpavlin 5 }
177    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26