/[webpac2]/trunk/bin/isi-merge.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/isi-merge.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1342 - (hide annotations)
Fri Oct 15 15:30:26 2010 UTC (13 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 1230 byte(s)
added SHA1 hash
1 dpavlin 1283 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5     use autodie;
6 dpavlin 1284 use Digest::MD5 qw(md5_hex);
7 dpavlin 1342 use Digest::SHA1 qw(sha1_hex);
8 dpavlin 1284 use Data::Dump qw(dump);
9 dpavlin 1283
10 dpavlin 1342 my $use_sha1 = $ENV{SHA1} || 1;
11    
12 dpavlin 1283 my @files = @ARGV;
13     @files = glob '/tmp/isi.*-*.txt' unless @files;
14    
15     my $path = '/tmp/isi.full.txt';
16 dpavlin 1342
17     warn "# ", $#files + 1, " files to $path sha:$use_sha1\n";
18    
19 dpavlin 1283 open(my $out_fh, '>', $path);
20     print $out_fh "FN ISI Export Format\nVR 1.0\n";
21    
22 dpavlin 1284 my $rec;
23     my $nr = 0;
24    
25     my $md5;
26    
27     my $report;
28    
29 dpavlin 1283 foreach my $file ( sort {
30     my $a_r = $1 if $a =~ m{(\d+)-\d+};
31     my $b_r = $1 if $b =~ m{(\d+)-\d+};
32     $a_r <=> $b_r;
33     } @files ) {
34 dpavlin 1301 print STDERR $file;
35 dpavlin 1283
36 dpavlin 1284 push @{ $report->{files} }, $file;
37    
38 dpavlin 1283 open(my $fh, '<', $file);
39     while(<$fh>) {
40     next if m/^(FN|VR|EF)/;
41    
42 dpavlin 1284 if ( ! m/^[\r\n]+$/s ) {
43     $rec .= $_;
44     } else {
45     $nr++;
46 dpavlin 1342 my $digest = $use_sha1 ? sha1_hex $rec : md5_hex $rec;
47 dpavlin 1284 if ( my $times = $md5->{$digest} ) {
48 dpavlin 1301 print STDERR 'd';
49 dpavlin 1284 $report->{file}->{$file}->{duplicates}++;
50     } else {
51     print $out_fh $rec . $_;
52     $report->{file}->{$file}->{records}++;
53     $report->{total_records}++;
54 dpavlin 1301 print STDERR '.';
55 dpavlin 1284 }
56    
57     $md5->{$digest}++;
58     $rec = '';
59     }
60 dpavlin 1283 }
61 dpavlin 1301 warn "\n";
62 dpavlin 1283 }
63    
64     print $out_fh "EF\n";
65     close $out_fh;
66    
67 dpavlin 1284
68     warn "# $path ", -s $path, dump $report;
69    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26