/[webpac2]/trunk/bin/isi-merge.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/isi-merge.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1342 - (show annotations)
Fri Oct 15 15:30:26 2010 UTC (13 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 1230 byte(s)
added SHA1 hash
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5 use autodie;
6 use Digest::MD5 qw(md5_hex);
7 use Digest::SHA1 qw(sha1_hex);
8 use Data::Dump qw(dump);
9
10 my $use_sha1 = $ENV{SHA1} || 1;
11
12 my @files = @ARGV;
13 @files = glob '/tmp/isi.*-*.txt' unless @files;
14
15 my $path = '/tmp/isi.full.txt';
16
17 warn "# ", $#files + 1, " files to $path sha:$use_sha1\n";
18
19 open(my $out_fh, '>', $path);
20 print $out_fh "FN ISI Export Format\nVR 1.0\n";
21
22 my $rec;
23 my $nr = 0;
24
25 my $md5;
26
27 my $report;
28
29 foreach my $file ( sort {
30 my $a_r = $1 if $a =~ m{(\d+)-\d+};
31 my $b_r = $1 if $b =~ m{(\d+)-\d+};
32 $a_r <=> $b_r;
33 } @files ) {
34 print STDERR $file;
35
36 push @{ $report->{files} }, $file;
37
38 open(my $fh, '<', $file);
39 while(<$fh>) {
40 next if m/^(FN|VR|EF)/;
41
42 if ( ! m/^[\r\n]+$/s ) {
43 $rec .= $_;
44 } else {
45 $nr++;
46 my $digest = $use_sha1 ? sha1_hex $rec : md5_hex $rec;
47 if ( my $times = $md5->{$digest} ) {
48 print STDERR 'd';
49 $report->{file}->{$file}->{duplicates}++;
50 } else {
51 print $out_fh $rec . $_;
52 $report->{file}->{$file}->{records}++;
53 $report->{total_records}++;
54 print STDERR '.';
55 }
56
57 $md5->{$digest}++;
58 $rec = '';
59 }
60 }
61 warn "\n";
62 }
63
64 print $out_fh "EF\n";
65 close $out_fh;
66
67
68 warn "# $path ", -s $path, dump $report;
69

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26