/[socialtext-import]/tamtam/tamtam2socialtext.pl

This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!

Diff of /tamtam/tamtam2socialtext.pl

Parent Directory | Revision Log | View Patch Patch

-revision 8 by dpavlin,
Wed Dec 12 13:17:15 2007 UTC
+revision 18 by dpavlin,
Wed Dec 12 23:01:18 2007 UTC
 Line 9 
 use Regexp::Common qw/balanced/;
  use Socialtext::Resting;
  use Encode;
  use HTTP::Date;
+ use POSIX qw/strftime/;
+ use File::Slurp;
+ use File::MMagic::XS;
  use Data::Dump qw/dump/;
- my $max = 5;
+ my $debug = 0;
+ my $max = 999;
  my $page;
  my $page_date;
+ my @page_names;
+ print "Collecting pages...\n";
  find({
          wanted => sub {
                  my $path = $File::Find::name;
                  return unless -f $path;
-                 warn "# $path\n";
+                 warn "+ $path\n";
-                 my $ref = XMLin( $path ) || die "can't open $path: $!";
+                 my $ref = XMLin( $path,
+                         KeyAttr => {
+                                 'attachment' => '+name',
+                                 'meta' => 'name',
+                         },
+                         ForceArray => [ 'attachment', 'widget' ],
+                 ) || die "can't open $path: $!";
+                 warn "## $path = ",dump( $ref ) if $debug;
                  my $name = $ref->{name} || die "no name in $path";
                  return if $name =~ m/^TamSystem/;
-                 my $date =  $ref->{meta}->{LastModified};
+                 my $date =  $ref->{meta}->{LastModified}->{value};
                  if ( ! $date ) {
                          warn "SKIP: no LastModified in $path $name";
                          return;
                  }
-                 my $data =
-                         $ref->{widgets}->{widget}->{data} ||
-                         $ref->{widgets}->{widget}->{Body}->{data} ||
-                         die "no data in $path ",dump( $ref );
-                 $page->{ $name } = [ $data, $date ];
+                 my $data;
+                 foreach my $w ( @{ $ref->{widgets}->{widget} } ) {
+                         warn "## w = ",dump( $w ) if $debug;
+                         $data .= "\n----\n" if $data;
+                         $data .= $w->{data} || die "no data?";
+                 }
+                 my $attachments;
+                 if ( my $a = $ref->{attachment} ) {
+                         foreach my $name ( keys %$a ) {
+                                 my $full_path = $path;
+                                 $full_path =~ s,pages/,attachments/,;
+                                 $full_path .= '.' . $name;
+                                 die "$full_path doesn't exist" unless -e $full_path;
+                                 push @$attachments, {
+                                         full_path => $full_path,
+                                         name => ( $name || $a->{$name}->{desc} || 'noname' ),
+                                 };
+                         }
+                 }
+                 $page->{ $name } = {
+                         content => convert_markup( $data ),
+                         date => convert_date( $date ),
+                         attachments => $attachments,
+                 };
+                 $name =~ s,^.+/([^/]+)$,$1,;
+                 push @page_names, $name;
-                 #warn dump( $ref );
          },
+         no_chdir=>1,
  }, shift @ARGV || '.');
  my @pages = ( keys %$page );
- warn "found following pages: ", join(", ", @pages),"\n";
+ warn "found following pages: ", join(", ", @page_names),"\n";
+ my $page_link_re = '\b(' . join('|', @page_names) . ')\b';
  my $Rester = Socialtext::Resting->new(
          username => 'tamtam',
          password => 'import',
          server   => 'http://saturn.ffzg.hr/',
+         workspace => 'razmjenavjestina',
  );
- $Rester->workspace('razmjenavjestina');
  $Rester->put_workspacetag('TamTam');
+ sub convert_date {
+         my $date = shift;
+ #       return time2str( $date );
+         return strftime('%F %T %z', gmtime( $date ));
+ }
  sub header {
          my $h = shift;
          if ( $h =~ m/^(=+)\s+(.+?)\s+\1$/ ) {
                  my $level = length($1);
                  return "\n" . ( '^' x $level ) . " $2\n";
          } else {
-                 die "can't parse header: $h";
+                 return $h;
          }
  }
-Line 71 
 sub surround {
+Line 123 
 sub surround {
          return $with . $what . $with;
  }
- my $count = 0;
+ sub pre {
+         my $text = shift;
- foreach my $name ( keys %$page ) {
+         $text =~ s/^{{{//;
-         last if $count++ == $max;
+         $text =~ s/}}}$//;
+         return '.pre' . $text . '.pre';
+ }
-         my ( $body, $date ) = @{ $page->{$name} };
+ sub convert_markup {
-         $date = time2str( $date );
+         my $body = shift;
          $body =~ s/\Q[[TableOfContents]]\E/{toc}/gs;
          $body =~ s/\Q[[BR]]\E/\n/gs;
-Line 85 
 foreach my $name ( keys %$page ) {
+Line 139 
 foreach my $name ( keys %$page ) {
          $body =~ s/''''(.+?)''''/surround('`',$1)/gse;
          $body =~ s/'''(.+?)'''/surround('*',$1)/gse;
          $body =~ s/''(.+?)''/surround('_',$1)/gse;
+         $body =~ s/$RE{balanced}{-begin => "{{{"}{-end => "}}}"}{-keep}/pre($1)/gse;
+         # fix bullets
          $body =~ s/^\s+([\*])/$1/gm;
-         Encode::_utf8_off( $body );
+         # fix links
+         $body =~ s/\["([^"]+)"\]/[$1]/gs;
+         $body =~ s,\[(http://\S+)\s+([^\]]+)\],"$2"<$1>,gs;
+         $body =~ s,\[(http://[^\]]+)\],$1,gs;
+         # fix hr
+         $body =~ s,(\S+)----,$1\n----,gs;
+         $body =~ s,----(\S+),----\n$1,gs;
+         # attachments
+         $body =~ s,\[attachment:([^\]]+)\],{file: $1},gs;
+         return $body;
+ }
+ my $count = 0;
+ my $m = File::MMagic::XS->new;
+ foreach my $name ( keys %$page ) {
+         last if $count++ == $max;
+         my $p = $page->{$name};
+         warn "## $name = ",dump( $p ) if $debug;
+         my $body = $p->{content} || die "no content?";
+         my $date = $p->{date} || die "no date?";
          my @tags = ( 'TamTam' );
+         my $full_name = $name;
          if ( $name =~ m!/! ) {
                  my @page_tags = split(m!/!, $name);
                  $name = pop @page_tags; # remove page name
                  push @tags, @page_tags;
          }
-         $Rester->put_page( $name, {
+         # link named pages
-                 content => $body,
+         $body =~ s,\b$page_link_re\b,[$1],gs;
-                 date => $date,
+         $body =~ s,``,,gs;
-         });
-         print "+ $name\n";
+         $body .= qq{
+ ----
+ Original http://www.razmjenavjestina.org/$full_name {date: $date}
+ };
+         Encode::_utf8_off( $body );
+         $Rester->put_page( $name, { content => $body, date => $date });
+         print "$name $date\n";
          foreach ( @tags ) {
                  $Rester->put_pagetag( $name, $_ );
-                 print "+ $name [$_]\n";
+                 print "+ tag $_\n";
+         }
+         foreach my $a ( @{ $p->{attachments} } ) {
+                 my $type = $m->get_mime( $a->{full_path} );
+                 my $content = read_file( $a->{full_path} );
+                 print "+ attachment ", $a->{name}," $type ", length($content), " bytes\n";
+                 $Rester->post_attachment($name, $a->{name}, $content, $type );
          }
  }

 Legend:



Removed from v.8
 


changed lines


 
Added in v.18
 Legend:



Removed from v.8
 


changed lines


 
Added in v.18
-Removed from v.8
+Added in v.18

	ViewVC Help
Powered by ViewVC 1.1.26