--- tamtam/tamtam2socialtext.pl 2007/12/12 13:22:32 9 +++ tamtam/tamtam2socialtext.pl 2007/12/12 18:46:04 17 @@ -9,9 +9,11 @@ use Socialtext::Resting; use Encode; use HTTP::Date; +use POSIX qw/strftime/; use Data::Dump qw/dump/; -my $max = 5; +my $debug = 1; +my $max = 999; my $page; my $page_date; @@ -30,25 +32,40 @@ return if $name =~ m/^TamSystem/; - my $date = $ref->{meta}->{LastModified}; + my $date = $ref->{meta}->{LastModified}->{value}; if ( ! $date ) { warn "SKIP: no LastModified in $path $name"; return; } + + my $w = $ref->{widgets}->{widget}; + my $data = - $ref->{widgets}->{widget}->{data} || - $ref->{widgets}->{widget}->{Body}->{data} || + defined( $w->{data} ) ? $w->{data} : + defined( $w->{Body}->{date} ) ? $w->{Body}->{date} : die "no data in $path ",dump( $ref ); - $page->{ $name } = [ $data, $date ]; + # empty data is returned like empty hash. yack. + $data = "\n" if ref($data) eq 'HASH' and ! keys %$data; - #warn dump( $ref ); + $page->{ $name } = { + content => convert_markup( $data ), + date => convert_date( $date ), + }; + + # strip path from page name + $name =~ s,^.+/([^/]+)$,$1,; + push @page_names, $name; + + warn "## $path = ",dump( $ref ) if $debug; }, }, shift @ARGV || '.'); my @pages = ( keys %$page ); -warn "found following pages: ", join(", ", @pages),"\n"; +warn "found following pages: ", join(", ", @page_names),"\n"; + +my $page_link_re = '\b(' . join('|', @page_names) . ')\b'; my $Rester = Socialtext::Resting->new( username => 'tamtam', @@ -58,13 +75,19 @@ $Rester->workspace('razmjenavjestina'); $Rester->put_workspacetag('TamTam'); +sub convert_date { + my $date = shift; +# return time2str( $date ); + return strftime('%F %T %z', gmtime( $date )); +} + sub header { my $h = shift; if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) { my $level = length($1); return "\n" . ( '^' x $level ) . " $2\n"; } else { - die "can't parse header: $h"; + return $h; } } @@ -73,13 +96,15 @@ return $with . $what . $with; } -my $count = 0; - -foreach my $name ( keys %$page ) { - last if $count++ == $max; +sub pre { + my $text = shift; + $text =~ s/^{{{//; + $text =~ s/}}}$//; + return '.pre' . $text . '.pre'; +} - my ( $body, $date ) = @{ $page->{$name} }; - $date = time2str( $date ); +sub convert_markup { + my $body = shift; $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs; $body =~ s/\Q[[BR]]\E/\n/gs; @@ -87,28 +112,62 @@ $body =~ s/''''(.+?)''''/surround('`',$1)/gse; $body =~ s/'''(.+?)'''/surround('*',$1)/gse; $body =~ s/''(.+?)''/surround('_',$1)/gse; + $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse; # fix bullets $body =~ s/^\s+([\*])/$1/gm; # fix links $body =~ s/\["([^"]+)"\]/[$1]/gs; + $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs; + $body =~ s,\[(http://[^\]]+)\],$1,gs; - Encode::_utf8_off( $body ); + # fix hr + $body =~ s,(\S+)----,$1\n----,gs; + $body =~ s,----(\S+),----\n$1,gs; + + return $body; +} + +my $count = 0; + +foreach my $name ( keys %$page ) { + last if $count++ == $max; + + my $p = $page->{$name}; + + warn "## $name = ",dump( $p ) if $debug; + + my $body = $p->{content} || die "no content?"; + my $date = $p->{date} || die "no date?"; my @tags = ( 'TamTam' ); + my $full_name = $name; + if ( $name =~ m!/! ) { my @page_tags = split(m!/!, $name); $name = pop @page_tags; # remove page name push @tags, @page_tags; } + # link named pages + $body =~ s,\b$page_link_re\b,[$1],gs; + $body =~ s,``,,gs; + + $body .= qq{ +---- + +Original sa http://www.razmjenavjestina.org/$full_name zadnja promjena {date: $date} +}; + + Encode::_utf8_off( $body ); + $Rester->put_page( $name, { content => $body, date => $date, }); - print "+ $name\n"; + print "+ $name $date\n"; foreach ( @tags ) { $Rester->put_pagetag( $name, $_ ); print "+ $name [$_]\n";