--- tamtam/tamtam2socialtext.pl 2007/12/12 13:09:29 7 +++ tamtam/tamtam2socialtext.pl 2007/12/12 17:14:58 13 @@ -11,11 +11,13 @@ use HTTP::Date; use Data::Dump qw/dump/; -my $max = 2; +my $max = 999; my $page; my $page_date; +my @page_names; + find({ wanted => sub { my $path = $File::Find::name; @@ -25,6 +27,9 @@ my $ref = XMLin( $path ) || die "can't open $path: $!"; my $name = $ref->{name} || die "no name in $path"; + + return if $name =~ m/^TamSystem/; + my $date = $ref->{meta}->{LastModified}; if ( ! $date ) { warn "SKIP: no LastModified in $path $name"; @@ -35,15 +40,27 @@ $ref->{widgets}->{widget}->{Body}->{data} || die "no data in $path ",dump( $ref ); + $data .= qq{ +---- + +Original: http://www.razmjenavjestina.org/$path +}; + $page->{ $name } = [ $data, $date ]; - #warn dump( $ref ); + # strip path from page name + $name =~ s,^.+/([^/]+)$,$1,; + push @page_names, $name; + +# warn dump( $ref ); }, }, shift @ARGV || '.'); my @pages = ( keys %$page ); -warn "found following pages: ", join(", ", @pages),"\n"; +warn "found following pages: ", join(", ", @page_names),"\n"; + +my $page_link_re = '\b(' . join('|', @page_names) . ')\b'; my $Rester = Socialtext::Resting->new( username => 'tamtam', @@ -59,7 +76,7 @@ my $level = length($1); return "\n" . ( '^' x $level ) . " $2\n"; } else { - die "can't parse header: $h"; + return $h; } } @@ -68,6 +85,13 @@ return $with . $what . $with; } +sub pre { + my $text = shift; + $text =~ s/^{{{//; + $text =~ s/}}}$//; + return '.pre' . $text . '.pre'; +} + my $count = 0; foreach my $name ( keys %$page ) { @@ -82,18 +106,25 @@ $body =~ s/''''(.+?)''''/surround('`',$1)/gse; $body =~ s/'''(.+?)'''/surround('*',$1)/gse; $body =~ s/''(.+?)''/surround('_',$1)/gse; + $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse; + # fix bullets $body =~ s/^\s+([\*])/$1/gm; + # fix links + $body =~ s/\["([^"]+)"\]/[$1]/gs; + $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs; + $body =~ s,\[(http://[^\]]+)\],$1,gs; + $body =~ s,$page_link_re,[$1],gs; + Encode::_utf8_off( $body ); my @tags = ( 'TamTam' ); if ( $name =~ m!/! ) { my @page_tags = split(m!/!, $name); - pop @page_tags; # remove page name + $name = pop @page_tags; # remove page name push @tags, @page_tags; - $name =~ s!/!_!g; } $Rester->put_page( $name, {