1 |
#!/usr/bin/perl -w |
2 |
# |
3 |
# This script will try (hard) to convert database from |
4 |
# PhpMyLibrary (http://phpmylibrary.sourceforge.net/) back |
5 |
# to MARC format (ISO 2709) |
6 |
# |
7 |
# 2003-01-31 Dobrica Pavlinusic <dpavlin@rot13.org> |
8 |
# |
9 |
# This script is written for WebPac project available at |
10 |
# http://webpac.sourceforge.net/ |
11 |
# |
12 |
# MARC file format documentation is taken from |
13 |
# http://www.ariadne.ac.uk/issue7/marc/ |
14 |
# |
15 |
# |
16 |
|
17 |
use DBI; |
18 |
use strict; |
19 |
use Getopt::Long; |
20 |
|
21 |
my $database = "postnuke"; |
22 |
my $host = "localhost"; |
23 |
my $user = "root"; |
24 |
my $passwd = ""; |
25 |
|
26 |
my $usage = 0; |
27 |
my $debug = 0; |
28 |
my $quiet = 0; |
29 |
|
30 |
my $result = GetOptions( |
31 |
"database=s" => \$database, |
32 |
"host=s" => \$host, |
33 |
"user=s" => \$user, |
34 |
"password=s" => \$passwd, |
35 |
"debug!" => \$debug, |
36 |
"verbose!" => \$debug, |
37 |
"quiet!" => \$quiet, |
38 |
"help!" => \$usage, |
39 |
); |
40 |
|
41 |
if ($usage) { |
42 |
print qq{usage: $0 [--database="$database" --host="$host" --user="$user" --password="$passwd"] > file.marc\n |
43 |
This script will convert PhpMyLibrary database to standard UNIMARC format\n}; |
44 |
exit 1; |
45 |
} |
46 |
|
47 |
my $dsn = "DBI:mysql:database=$database;host=$host"; |
48 |
my $dbh = DBI->connect($dsn, $user, $passwd, {'RaiseError' => 1}); |
49 |
|
50 |
# UNIMARC leader format |
51 |
#my $leader_fmt = qq{%05diam0 22%05d 45 }; |
52 |
# MARC leader format |
53 |
my $leader_fmt = qq{%05dcas 22%05d a 4500}; |
54 |
|
55 |
|
56 |
my $sth = $dbh->prepare("SELECT marc FROM tblbib"); |
57 |
$sth->execute(); |
58 |
|
59 |
my $count = 0; |
60 |
my $rec_nr = 0; |
61 |
|
62 |
while (my $row = $sth->fetchrow_hashref()) { |
63 |
my $marc = $row->{'marc'}; |
64 |
$rec_nr++; |
65 |
|
66 |
my $real_len = length($marc); |
67 |
|
68 |
my $skip = 0; # skip this record? |
69 |
|
70 |
# fix PhpMyLibrary MARC (why do I have to do this? It's MARC, |
71 |
# for gaddem sake!!! |
72 |
|
73 |
# Byte Name |
74 |
# ---- ---- |
75 |
# 0-4 Record Length |
76 |
# 5 Status (n=new, c=corrected and d=deleted) |
77 |
# 6 Type of Record (a=printed material) |
78 |
# 7 Bibliographic Level (m=monograph) |
79 |
# 8-9 Blanks |
80 |
# 10 Indictator count (2 for monographs) |
81 |
# 11 Subfield code count (2 - 0x1F+subfield code itself) |
82 |
# 12-16 Base address of data |
83 |
# 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2, |
84 |
# 3=sublevel 3) |
85 |
# 18 Descriptive Cataloguing Form (blank=record is full ISBD, |
86 |
# n=record is in non-ISBD format, i=record is in |
87 |
# an incomplete ISBD format) |
88 |
# 19 Blank |
89 |
# 20 Length of length field in directory (always 4 in UNIMARC) |
90 |
# 21 Length of Starting Character Position in directory (always |
91 |
# 5 in UNIMARC) |
92 |
# 22 Length of implementation defined portion in directory (always |
93 |
# 0 in UNIMARC) |
94 |
# 23 Blank |
95 |
# |
96 |
# |0 4|5 89 |12 16|1n 450 | |
97 |
# (xxxxx)nam 22(.....) 450 <--- |
98 |
$marc =~ m/^(.....)......(.....)polerioj/ || die "record: '$marc' unparsable!"; |
99 |
my ($reclen,$base_addr) = ($1,$2); |
100 |
|
101 |
my $directory = substr($marc,24,$base_addr-24); |
102 |
my $fields = substr($marc,$base_addr-1); |
103 |
|
104 |
print STDERR "# $rec_nr fields: '$fields'\n" if ($debug); |
105 |
print STDERR "# $rec_nr directory: [",length($directory),"]\n" if ($debug); |
106 |
|
107 |
# PhpMyLibrary MARC records don't have indicators, so we'll add them |
108 |
|
109 |
my $o = 0; # offset |
110 |
my $new_dictionary; |
111 |
my $new_fields; |
112 |
|
113 |
while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) { |
114 |
my ($tag,$len,$addr) = ($1,$2,$3); |
115 |
|
116 |
sub check_field($) { |
117 |
my $f = shift; |
118 |
my $del = substr($f,0,1); |
119 |
|
120 |
die "expected 0x1e, got '$del' (".ord($del)."): '$f'" if (ord($del) != 30); |
121 |
} |
122 |
|
123 |
if (($addr+$len) > length($fields)) { |
124 |
print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if (! $quiet); |
125 |
$skip = 1; |
126 |
next; |
127 |
} |
128 |
|
129 |
# take field |
130 |
my $f = substr($fields,$addr,$len); |
131 |
print STDERR "tag/len/addr $tag [$len] $addr: '$f'\n" if ($debug); |
132 |
|
133 |
my $del = substr($fields,$addr+$len,1); |
134 |
|
135 |
# check field delimiters... |
136 |
if ($del ne chr(30)) { |
137 |
print STDERR "WARNING: skipping record $rec_nr, can't find delimiters got: '$del'\n" if (! $quiet); |
138 |
$skip = 1; |
139 |
next; |
140 |
} |
141 |
|
142 |
check_field($f); |
143 |
|
144 |
if (length($f) < 2) { |
145 |
print STDERR "WARNING: skipping field $tag from record $rec_nr because it's too short!\n" if (! $quiet); |
146 |
next; |
147 |
} |
148 |
|
149 |
|
150 |
if ($tag =~ m/^00/) { |
151 |
# fields 001-008 doesn't have indicators |
152 |
$new_dictionary .= sprintf("%03d%04d%05d",$tag,$len,length($new_fields || '')); |
153 |
$new_fields.=$f; |
154 |
} else { |
155 |
$new_dictionary .= sprintf("%03d%04d%05d",$tag,($len+2),length($new_fields || '')); |
156 |
$new_fields.=chr(30)." ".substr($f,1); |
157 |
$o += 2; |
158 |
} |
159 |
} |
160 |
|
161 |
if (! $skip) { |
162 |
my $new_leader = sprintf($leader_fmt,24+length($new_dictionary.$new_fields)+2,length($new_dictionary)+25); |
163 |
my $new_marc = $new_leader . $new_dictionary . $new_fields . chr(30); |
164 |
$new_marc .= chr(29); # end of record |
165 |
|
166 |
print STDERR "original and new marc: [$rec_nr/$count]\n$marc\n$new_marc\n\n" if ($debug); |
167 |
print "$new_marc"; |
168 |
$count++; |
169 |
} |
170 |
|
171 |
# last if ($count > 100); |
172 |
|
173 |
} |
174 |
$sth->finish(); |
175 |
$dbh->disconnect(); |
176 |
|
177 |
print STDERR "$count records from database $database converted...\n"; |