1 |
#!/usr/bin/perl |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
use autodie; |
6 |
use Digest::MD5 qw(md5_hex); |
7 |
use Digest::SHA1 qw(sha1_hex); |
8 |
use Data::Dump qw(dump); |
9 |
|
10 |
my $use_sha1 = $ENV{SHA1} || 1; |
11 |
|
12 |
my @files = @ARGV; |
13 |
@files = glob '/tmp/isi.*-*.txt' unless @files; |
14 |
|
15 |
my $path = '/tmp/isi.full.txt'; |
16 |
|
17 |
warn "# ", $#files + 1, " files to $path sha:$use_sha1\n"; |
18 |
|
19 |
open(my $out_fh, '>', $path); |
20 |
print $out_fh "FN ISI Export Format\nVR 1.0\n"; |
21 |
|
22 |
my $rec; |
23 |
my $nr = 0; |
24 |
|
25 |
my $md5; |
26 |
|
27 |
my $report; |
28 |
|
29 |
foreach my $file ( sort { |
30 |
my $a_r = $1 if $a =~ m{(\d+)-\d+}; |
31 |
my $b_r = $1 if $b =~ m{(\d+)-\d+}; |
32 |
$a_r <=> $b_r; |
33 |
} @files ) { |
34 |
print STDERR $file; |
35 |
|
36 |
push @{ $report->{files} }, $file; |
37 |
|
38 |
open(my $fh, '<', $file); |
39 |
while(<$fh>) { |
40 |
next if m/^(FN|VR|EF)/; |
41 |
|
42 |
if ( ! m/^[\r\n]+$/s ) { |
43 |
$rec .= $_; |
44 |
} else { |
45 |
$nr++; |
46 |
my $digest = $use_sha1 ? sha1_hex $rec : md5_hex $rec; |
47 |
if ( my $times = $md5->{$digest} ) { |
48 |
print STDERR 'd'; |
49 |
$report->{file}->{$file}->{duplicates}++; |
50 |
} else { |
51 |
print $out_fh $rec . $_; |
52 |
$report->{file}->{$file}->{records}++; |
53 |
$report->{total_records}++; |
54 |
print STDERR '.'; |
55 |
} |
56 |
|
57 |
$md5->{$digest}++; |
58 |
$rec = ''; |
59 |
} |
60 |
} |
61 |
warn "\n"; |
62 |
} |
63 |
|
64 |
print $out_fh "EF\n"; |
65 |
close $out_fh; |
66 |
|
67 |
|
68 |
warn "# $path ", -s $path, dump $report; |
69 |
|