1 |
#!/usr/bin/perl |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
use autodie; |
6 |
use Digest::MD5 qw(md5_hex); |
7 |
use Data::Dump qw(dump); |
8 |
|
9 |
my @files = @ARGV; |
10 |
@files = glob '/tmp/isi.*-*.txt' unless @files; |
11 |
|
12 |
my $path = '/tmp/isi.full.txt'; |
13 |
open(my $out_fh, '>', $path); |
14 |
print $out_fh "FN ISI Export Format\nVR 1.0\n"; |
15 |
|
16 |
my $rec; |
17 |
my $nr = 0; |
18 |
|
19 |
my $md5; |
20 |
|
21 |
my $report; |
22 |
|
23 |
foreach my $file ( sort { |
24 |
my $a_r = $1 if $a =~ m{(\d+)-\d+}; |
25 |
my $b_r = $1 if $b =~ m{(\d+)-\d+}; |
26 |
$a_r <=> $b_r; |
27 |
} @files ) { |
28 |
warn $file; |
29 |
|
30 |
push @{ $report->{files} }, $file; |
31 |
|
32 |
open(my $fh, '<', $file); |
33 |
while(<$fh>) { |
34 |
next if m/^(FN|VR|EF)/; |
35 |
|
36 |
if ( ! m/^[\r\n]+$/s ) { |
37 |
$rec .= $_; |
38 |
} else { |
39 |
$nr++; |
40 |
my $digest = md5_hex $rec; |
41 |
if ( my $times = $md5->{$digest} ) { |
42 |
warn "dumplicate $nr $digest $times\n"; |
43 |
$report->{file}->{$file}->{duplicates}++; |
44 |
} else { |
45 |
print $out_fh $rec . $_; |
46 |
$report->{file}->{$file}->{records}++; |
47 |
$report->{total_records}++; |
48 |
} |
49 |
|
50 |
$md5->{$digest}++; |
51 |
$rec = ''; |
52 |
} |
53 |
} |
54 |
} |
55 |
|
56 |
print $out_fh "EF\n"; |
57 |
close $out_fh; |
58 |
|
59 |
|
60 |
warn "# $path ", -s $path, dump $report; |
61 |
|