1 |
#!/usr/bin/perl |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
use autodie; |
6 |
use Digest::MD5 qw(md5_hex); |
7 |
use Data::Dump qw(dump); |
8 |
|
9 |
my @files = @ARGV; |
10 |
@files = glob '/tmp/isi.*-*.txt' unless @files; |
11 |
|
12 |
my $path = '/tmp/isi.full.txt'; |
13 |
open(my $out_fh, '>', $path); |
14 |
print $out_fh "FN ISI Export Format\nVR 1.0\n"; |
15 |
|
16 |
my $rec; |
17 |
my $nr = 0; |
18 |
|
19 |
my $md5; |
20 |
|
21 |
my $report; |
22 |
|
23 |
foreach my $file ( sort { |
24 |
my $a_r = $1 if $a =~ m{(\d+)-\d+}; |
25 |
my $b_r = $1 if $b =~ m{(\d+)-\d+}; |
26 |
$a_r <=> $b_r; |
27 |
} @files ) { |
28 |
print STDERR $file; |
29 |
|
30 |
push @{ $report->{files} }, $file; |
31 |
|
32 |
open(my $fh, '<', $file); |
33 |
while(<$fh>) { |
34 |
next if m/^(FN|VR|EF)/; |
35 |
|
36 |
if ( ! m/^[\r\n]+$/s ) { |
37 |
$rec .= $_; |
38 |
} else { |
39 |
$nr++; |
40 |
my $digest = md5_hex $rec; |
41 |
if ( my $times = $md5->{$digest} ) { |
42 |
print STDERR 'd'; |
43 |
$report->{file}->{$file}->{duplicates}++; |
44 |
} else { |
45 |
print $out_fh $rec . $_; |
46 |
$report->{file}->{$file}->{records}++; |
47 |
$report->{total_records}++; |
48 |
print STDERR '.'; |
49 |
} |
50 |
|
51 |
$md5->{$digest}++; |
52 |
$rec = ''; |
53 |
} |
54 |
} |
55 |
warn "\n"; |
56 |
} |
57 |
|
58 |
print $out_fh "EF\n"; |
59 |
close $out_fh; |
60 |
|
61 |
|
62 |
warn "# $path ", -s $path, dump $report; |
63 |
|