/[webpac2]/trunk/bin/isi-merge.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/isi-merge.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1301 - (show annotations)
Sun Sep 20 01:21:17 2009 UTC (14 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 1077 byte(s)
nicer output for merge with . for record and d for duplicate
1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5 use autodie;
6 use Digest::MD5 qw(md5_hex);
7 use Data::Dump qw(dump);
8
9 my @files = @ARGV;
10 @files = glob '/tmp/isi.*-*.txt' unless @files;
11
12 my $path = '/tmp/isi.full.txt';
13 open(my $out_fh, '>', $path);
14 print $out_fh "FN ISI Export Format\nVR 1.0\n";
15
16 my $rec;
17 my $nr = 0;
18
19 my $md5;
20
21 my $report;
22
23 foreach my $file ( sort {
24 my $a_r = $1 if $a =~ m{(\d+)-\d+};
25 my $b_r = $1 if $b =~ m{(\d+)-\d+};
26 $a_r <=> $b_r;
27 } @files ) {
28 print STDERR $file;
29
30 push @{ $report->{files} }, $file;
31
32 open(my $fh, '<', $file);
33 while(<$fh>) {
34 next if m/^(FN|VR|EF)/;
35
36 if ( ! m/^[\r\n]+$/s ) {
37 $rec .= $_;
38 } else {
39 $nr++;
40 my $digest = md5_hex $rec;
41 if ( my $times = $md5->{$digest} ) {
42 print STDERR 'd';
43 $report->{file}->{$file}->{duplicates}++;
44 } else {
45 print $out_fh $rec . $_;
46 $report->{file}->{$file}->{records}++;
47 $report->{total_records}++;
48 print STDERR '.';
49 }
50
51 $md5->{$digest}++;
52 $rec = '';
53 }
54 }
55 warn "\n";
56 }
57
58 print $out_fh "EF\n";
59 close $out_fh;
60
61
62 warn "# $path ", -s $path, dump $report;
63

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26