/[webpac2]/trunk/bin/isi-merge.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/isi-merge.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1301 - (hide annotations)
Sun Sep 20 01:21:17 2009 UTC (14 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1077 byte(s)
nicer output for merge with . for record and d for duplicate
1 dpavlin 1283 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5     use autodie;
6 dpavlin 1284 use Digest::MD5 qw(md5_hex);
7     use Data::Dump qw(dump);
8 dpavlin 1283
9     my @files = @ARGV;
10     @files = glob '/tmp/isi.*-*.txt' unless @files;
11    
12     my $path = '/tmp/isi.full.txt';
13     open(my $out_fh, '>', $path);
14     print $out_fh "FN ISI Export Format\nVR 1.0\n";
15    
16 dpavlin 1284 my $rec;
17     my $nr = 0;
18    
19     my $md5;
20    
21     my $report;
22    
23 dpavlin 1283 foreach my $file ( sort {
24     my $a_r = $1 if $a =~ m{(\d+)-\d+};
25     my $b_r = $1 if $b =~ m{(\d+)-\d+};
26     $a_r <=> $b_r;
27     } @files ) {
28 dpavlin 1301 print STDERR $file;
29 dpavlin 1283
30 dpavlin 1284 push @{ $report->{files} }, $file;
31    
32 dpavlin 1283 open(my $fh, '<', $file);
33     while(<$fh>) {
34     next if m/^(FN|VR|EF)/;
35    
36 dpavlin 1284 if ( ! m/^[\r\n]+$/s ) {
37     $rec .= $_;
38     } else {
39     $nr++;
40     my $digest = md5_hex $rec;
41     if ( my $times = $md5->{$digest} ) {
42 dpavlin 1301 print STDERR 'd';
43 dpavlin 1284 $report->{file}->{$file}->{duplicates}++;
44     } else {
45     print $out_fh $rec . $_;
46     $report->{file}->{$file}->{records}++;
47     $report->{total_records}++;
48 dpavlin 1301 print STDERR '.';
49 dpavlin 1284 }
50    
51     $md5->{$digest}++;
52     $rec = '';
53     }
54 dpavlin 1283 }
55 dpavlin 1301 warn "\n";
56 dpavlin 1283 }
57    
58     print $out_fh "EF\n";
59     close $out_fh;
60    
61 dpavlin 1284
62     warn "# $path ", -s $path, dump $report;
63    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26