3 |
use warnings; |
use warnings; |
4 |
use strict; |
use strict; |
5 |
use autodie; |
use autodie; |
6 |
|
use Digest::MD5 qw(md5_hex); |
7 |
|
use Data::Dump qw(dump); |
8 |
|
|
9 |
my @files = @ARGV; |
my @files = @ARGV; |
10 |
@files = glob '/tmp/isi.*-*.txt' unless @files; |
@files = glob '/tmp/isi.*-*.txt' unless @files; |
13 |
open(my $out_fh, '>', $path); |
open(my $out_fh, '>', $path); |
14 |
print $out_fh "FN ISI Export Format\nVR 1.0\n"; |
print $out_fh "FN ISI Export Format\nVR 1.0\n"; |
15 |
|
|
16 |
|
my $rec; |
17 |
|
my $nr = 0; |
18 |
|
|
19 |
|
my $md5; |
20 |
|
|
21 |
|
my $report; |
22 |
|
|
23 |
foreach my $file ( sort { |
foreach my $file ( sort { |
24 |
my $a_r = $1 if $a =~ m{(\d+)-\d+}; |
my $a_r = $1 if $a =~ m{(\d+)-\d+}; |
25 |
my $b_r = $1 if $b =~ m{(\d+)-\d+}; |
my $b_r = $1 if $b =~ m{(\d+)-\d+}; |
27 |
} @files ) { |
} @files ) { |
28 |
warn $file; |
warn $file; |
29 |
|
|
30 |
|
push @{ $report->{files} }, $file; |
31 |
|
|
32 |
open(my $fh, '<', $file); |
open(my $fh, '<', $file); |
33 |
while(<$fh>) { |
while(<$fh>) { |
34 |
next if m/^(FN|VR|EF)/; |
next if m/^(FN|VR|EF)/; |
35 |
|
|
36 |
print $out_fh $_; |
if ( ! m/^[\r\n]+$/s ) { |
37 |
|
$rec .= $_; |
38 |
|
} else { |
39 |
|
$nr++; |
40 |
|
my $digest = md5_hex $rec; |
41 |
|
if ( my $times = $md5->{$digest} ) { |
42 |
|
warn "dumplicate $nr $digest $times\n"; |
43 |
|
$report->{file}->{$file}->{duplicates}++; |
44 |
|
} else { |
45 |
|
print $out_fh $rec . $_; |
46 |
|
$report->{file}->{$file}->{records}++; |
47 |
|
$report->{total_records}++; |
48 |
|
} |
49 |
|
|
50 |
|
$md5->{$digest}++; |
51 |
|
$rec = ''; |
52 |
|
} |
53 |
} |
} |
54 |
} |
} |
55 |
|
|
56 |
print $out_fh "EF\n"; |
print $out_fh "EF\n"; |
57 |
close $out_fh; |
close $out_fh; |
58 |
|
|
59 |
warn "# $path ", -s $path; |
|
60 |
|
warn "# $path ", -s $path, dump $report; |
61 |
|
|