1 |
#!/usr/bin/perl -w |
2 |
# |
3 |
# This script will try (hard) to convert database from |
4 |
# PhpMyLibrary (http://phpmylibrary.sourceforge.net/) back |
5 |
# to MARC format (ISO 2709) |
6 |
# |
7 |
# 2003-01-31 Dobrica Pavlinusic <dpavlin@rot13.org> |
8 |
# |
9 |
# This script is written for WebPac project available at |
10 |
# http://webpac.sourceforge.net/ |
11 |
# |
12 |
# MARC file format documentation is taken from |
13 |
# http://www.ariadne.ac.uk/issue7/marc/ |
14 |
# |
15 |
# |
16 |
|
17 |
use DBI; |
18 |
use strict; |
19 |
use Getopt::Long; |
20 |
|
21 |
my $database = "postnuke"; |
22 |
my $host = "localhost"; |
23 |
my $user = "root"; |
24 |
my $passwd = ""; |
25 |
|
26 |
my $usage = 0; |
27 |
my $debug = 0; |
28 |
|
29 |
my $result = GetOptions( |
30 |
"database=s" => \$database, |
31 |
"host=s" => \$host, |
32 |
"user=s" => \$user, |
33 |
"password=s" => \$passwd, |
34 |
"debug!" => \$debug, |
35 |
"help!" => \$usage, |
36 |
); |
37 |
|
38 |
if ($usage) { |
39 |
print qq{usage: $0 [--database="$database" --host="$host" --user="$user" --password="$passwd"] > file.marc\n |
40 |
This script will convert PhpMyLibrary database to standard UNIMARC format\n}; |
41 |
exit 1; |
42 |
} |
43 |
|
44 |
my $dsn = "DBI:mysql:database=$database;host=$host"; |
45 |
my $dbh = DBI->connect($dsn, $user, $passwd, {'RaiseError' => 1}); |
46 |
|
47 |
# UNIMARC leader format |
48 |
#my $leader_fmt = qq{%05diam0 22%05d 45 }; |
49 |
# MARC leader format |
50 |
my $leader_fmt = qq{%05dcas 22%05d a 4500}; |
51 |
|
52 |
|
53 |
my $sth = $dbh->prepare("SELECT marc FROM tblbib"); |
54 |
$sth->execute(); |
55 |
|
56 |
my $count = 0; |
57 |
my $rec_nr = 0; |
58 |
|
59 |
while (my $row = $sth->fetchrow_hashref()) { |
60 |
my $marc = $row->{'marc'}; |
61 |
$rec_nr++; |
62 |
|
63 |
my $real_len = length($marc); |
64 |
|
65 |
my $skip = 0; # skip this record? |
66 |
|
67 |
# fix PhpMyLibrary MARC (why do I have to do this? It's MARC, |
68 |
# for gaddem sake!!! |
69 |
|
70 |
# Byte Name |
71 |
# ---- ---- |
72 |
# 0-4 Record Length |
73 |
# 5 Status (n=new, c=corrected and d=deleted) |
74 |
# 6 Type of Record (a=printed material) |
75 |
# 7 Bibliographic Level (m=monograph) |
76 |
# 8-9 Blanks |
77 |
# 10 Indictator count (2 for monographs) |
78 |
# 11 Subfield code count (2 - 0x1F+subfield code itself) |
79 |
# 12-16 Base address of data |
80 |
# 17 Encoding level (blank=full level, 1=sublevel 1, 2=sublevel 2, |
81 |
# 3=sublevel 3) |
82 |
# 18 Descriptive Cataloguing Form (blank=record is full ISBD, |
83 |
# n=record is in non-ISBD format, i=record is in |
84 |
# an incomplete ISBD format) |
85 |
# 19 Blank |
86 |
# 20 Length of length field in directory (always 4 in UNIMARC) |
87 |
# 21 Length of Starting Character Position in directory (always |
88 |
# 5 in UNIMARC) |
89 |
# 22 Length of implementation defined portion in directory (always |
90 |
# 0 in UNIMARC) |
91 |
# 23 Blank |
92 |
# |
93 |
# |0 4|5 89 |12 16|1n 450 | |
94 |
# (xxxxx)nam 22(.....) 450 <--- |
95 |
$marc =~ m/^(.....)......(.....)polerioj/ || die "record: '$marc' unparsable!"; |
96 |
my ($reclen,$base_addr) = ($1,$2); |
97 |
|
98 |
my $directory = substr($marc,24,$base_addr-24); |
99 |
my $fields = substr($marc,$base_addr-1); |
100 |
|
101 |
print STDERR "# $rec_nr fields: '$fields'\n" if ($debug); |
102 |
print STDERR "# $rec_nr directory: [",length($directory),"]\n" if ($debug); |
103 |
|
104 |
# PhpMyLibrary MARC records don't have indicators, so we'll add them |
105 |
|
106 |
my $o = 0; # offset |
107 |
my $new_dictionary; |
108 |
my $new_fields; |
109 |
|
110 |
while (!$skip && $directory =~ s/(\d{3})(\d{4})(\d{5})//) { |
111 |
my ($tag,$len,$addr) = ($1,$2,$3); |
112 |
|
113 |
print STDERR "tag/len/addr: $tag $len $addr\n" if ($debug); |
114 |
|
115 |
sub check_field($) { |
116 |
my $f = shift; |
117 |
my $del = substr($f,0,1); |
118 |
|
119 |
die "expected 0x1e, got '$del' (".ord($del)."): '$f'" if (ord($del) != 30); |
120 |
} |
121 |
|
122 |
if (($addr+$len) > length($fields)) { |
123 |
print STDERR "WARNING: error in dictionary on record $rec_nr skipping...\n" if ($debug); |
124 |
$skip = 1; |
125 |
next; |
126 |
} |
127 |
|
128 |
# take field |
129 |
my $f = substr($fields,$addr,$len); |
130 |
print STDERR "data $tag [$len] $addr: '$f'\n" if ($debug); |
131 |
|
132 |
my $del = substr($fields,$addr+$len,1); |
133 |
|
134 |
# check field delimiters... |
135 |
if ($del ne chr(30)) { |
136 |
print STDERR "WARNING: skipping record $rec_nr, can't find delimiters got: '$del'\n" if ($debug); |
137 |
$skip = 1; |
138 |
next; |
139 |
} |
140 |
|
141 |
check_field($f); |
142 |
|
143 |
if ($tag =~ m/^00/) { |
144 |
# fields 001-008 doesn't have indicators |
145 |
$new_dictionary .= sprintf("%03d%04d%05d",$tag,$len,$addr); |
146 |
$new_fields.=$f; |
147 |
} else { |
148 |
$new_dictionary .= sprintf("%03d%04d%05d",$tag,($len+2),($addr+$o)); |
149 |
$new_fields.=chr(30)." ".substr($f,1); |
150 |
$o += 2; |
151 |
} |
152 |
} |
153 |
|
154 |
if (! $skip) { |
155 |
my $new_leader = sprintf($leader_fmt,24+length($new_dictionary.$new_fields)+2,$base_addr); |
156 |
my $new_marc = $new_leader . $new_dictionary . $new_fields . chr(30); |
157 |
$new_marc .= chr(29); # end of record |
158 |
|
159 |
print STDERR "original and new marc: [$rec_nr]\n$marc\n$new_marc\n\n" if ($debug); |
160 |
print "$new_marc"; |
161 |
$count++; |
162 |
} |
163 |
|
164 |
# last if ($count > 100); |
165 |
|
166 |
} |
167 |
$sth->finish(); |
168 |
$dbh->disconnect(); |
169 |
|
170 |
print STDERR "$count records from database $database converted...\n"; |