--- tamtam/tamtam2socialtext.pl 2007/12/12 12:48:42 6 +++ tamtam/tamtam2socialtext.pl 2007/12/12 13:28:40 10 @@ -11,11 +11,13 @@ use HTTP::Date; use Data::Dump qw/dump/; -my $max = 2; +my $max = 5; my $page; my $page_date; +my @page_names; + find({ wanted => sub { my $path = $File::Find::name; @@ -25,6 +27,9 @@ my $ref = XMLin( $path ) || die "can't open $path: $!"; my $name = $ref->{name} || die "no name in $path"; + + return if $name =~ m/^TamSystem/; + my $date = $ref->{meta}->{LastModified}; if ( ! $date ) { warn "SKIP: no LastModified in $path $name"; @@ -83,15 +88,22 @@ $body =~ s/'''(.+?)'''/surround('*',$1)/gse; $body =~ s/''(.+?)''/surround('_',$1)/gse; + # fix bullets + $body =~ s/^\s+([\*])/$1/gm; + + # fix links + $body =~ s/\["([^"]+)"\]/[$1]/gs; + $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs; + $body =~ s,\[(http://[^\]]+)\],$1,gs; + Encode::_utf8_off( $body ); my @tags = ( 'TamTam' ); if ( $name =~ m!/! ) { my @page_tags = split(m!/!, $name); - pop @page_tags; # remove page name + $name = pop @page_tags; # remove page name push @tags, @page_tags; - $name =~ s!/!_!g; } $Rester->put_page( $name, {