--- tamtam/tamtam2socialtext.pl 2007/12/12 18:46:04 17 +++ tamtam/tamtam2socialtext.pl 2007/12/12 23:01:18 18 @@ -10,9 +10,11 @@ use Encode; use HTTP::Date; use POSIX qw/strftime/; +use File::Slurp; +use File::MMagic::XS; use Data::Dump qw/dump/; -my $debug = 1; +my $debug = 0; my $max = 999; my $page; @@ -20,13 +22,23 @@ my @page_names; +print "Collecting pages...\n"; + find({ wanted => sub { my $path = $File::Find::name; return unless -f $path; - warn "# $path\n"; - my $ref = XMLin( $path ) || die "can't open $path: $!"; + warn "+ $path\n"; + my $ref = XMLin( $path, + KeyAttr => { + 'attachment' => '+name', + 'meta' => 'name', + }, + ForceArray => [ 'attachment', 'widget' ], + ) || die "can't open $path: $!"; + + warn "## $path = ",dump( $ref ) if $debug; my $name = $ref->{name} || die "no name in $path"; @@ -38,27 +50,42 @@ return; } - my $w = $ref->{widgets}->{widget}; + my $data; + + foreach my $w ( @{ $ref->{widgets}->{widget} } ) { + + warn "## w = ",dump( $w ) if $debug; + + $data .= "\n----\n" if $data; + $data .= $w->{data} || die "no data?"; + } - my $data = - defined( $w->{data} ) ? $w->{data} : - defined( $w->{Body}->{date} ) ? $w->{Body}->{date} : - die "no data in $path ",dump( $ref ); + my $attachments; - # empty data is returned like empty hash. yack. - $data = "\n" if ref($data) eq 'HASH' and ! keys %$data; + if ( my $a = $ref->{attachment} ) { + foreach my $name ( keys %$a ) { + my $full_path = $path; + $full_path =~ s,pages/,attachments/,; + $full_path .= '.' . $name; + die "$full_path doesn't exist" unless -e $full_path; + push @$attachments, { + full_path => $full_path, + name => ( $name || $a->{$name}->{desc} || 'noname' ), + }; + } + } $page->{ $name } = { content => convert_markup( $data ), date => convert_date( $date ), + attachments => $attachments, }; - # strip path from page name $name =~ s,^.+/([^/]+)$,$1,; push @page_names, $name; - warn "## $path = ",dump( $ref ) if $debug; }, + no_chdir=>1, }, shift @ARGV || '.'); my @pages = ( keys %$page ); @@ -71,8 +98,8 @@ username => 'tamtam', password => 'import', server => 'http://saturn.ffzg.hr/', + workspace => 'razmjenavjestina', ); -$Rester->workspace('razmjenavjestina'); $Rester->put_workspacetag('TamTam'); sub convert_date { @@ -126,11 +153,16 @@ $body =~ s,(\S+)----,$1\n----,gs; $body =~ s,----(\S+),----\n$1,gs; + # attachments + $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs; + return $body; } my $count = 0; +my $m = File::MMagic::XS->new; + foreach my $name ( keys %$page ) { last if $count++ == $max; @@ -158,19 +190,22 @@ $body .= qq{ ---- -Original sa http://www.razmjenavjestina.org/$full_name zadnja promjena {date: $date} +Original http://www.razmjenavjestina.org/$full_name {date: $date} }; Encode::_utf8_off( $body ); - $Rester->put_page( $name, { - content => $body, - date => $date, - }); - print "+ $name $date\n"; + $Rester->put_page( $name, { content => $body, date => $date }); + print "$name $date\n"; foreach ( @tags ) { $Rester->put_pagetag( $name, $_ ); - print "+ $name [$_]\n"; + print "+ tag $_\n"; + } + foreach my $a ( @{ $p->{attachments} } ) { + my $type = $m->get_mime( $a->{full_path} ); + my $content = read_file( $a->{full_path} ); + print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n"; + $Rester->post_attachment($name, $a->{name}, $content, $type ); } }