1 |
dpavlin |
12 |
package WebPAC::Normalize::XML; |
2 |
dpavlin |
8 |
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
dpavlin |
13 |
use base qw/WebPAC::Common WebPAC::Normalize/; |
7 |
dpavlin |
12 |
use XML::Simple; |
8 |
|
|
use Data::Dumper; |
9 |
dpavlin |
13 |
use Text::Iconv; |
10 |
dpavlin |
8 |
|
11 |
|
|
=head1 NAME |
12 |
|
|
|
13 |
dpavlin |
12 |
WebPAC::Normalize::XML - apply XML normalisaton rules |
14 |
dpavlin |
8 |
|
15 |
|
|
=head1 VERSION |
16 |
|
|
|
17 |
dpavlin |
62 |
Version 0.02 |
18 |
dpavlin |
8 |
|
19 |
|
|
=cut |
20 |
|
|
|
21 |
dpavlin |
62 |
our $VERSION = '0.02'; |
22 |
dpavlin |
8 |
|
23 |
|
|
=head1 SYNOPSIS |
24 |
|
|
|
25 |
|
|
This module uses C<conf/normalize/*.xml> files to perform normalisation |
26 |
|
|
from input records |
27 |
|
|
|
28 |
|
|
=cut |
29 |
|
|
|
30 |
|
|
=head1 FUNCTIONS |
31 |
|
|
|
32 |
dpavlin |
13 |
=head2 open |
33 |
dpavlin |
8 |
|
34 |
dpavlin |
12 |
Read normalisation rules defined using XML from C<conf/normalize/*.xml> and |
35 |
|
|
parse it. |
36 |
dpavlin |
8 |
|
37 |
dpavlin |
13 |
my $n = new WebPAC::Normalize::XML; |
38 |
|
|
$n->open( |
39 |
dpavlin |
12 |
tag => 'isis', |
40 |
|
|
xml_file => '/path/to/conf/normalize/isis.xml', |
41 |
dpavlin |
13 |
); |
42 |
dpavlin |
8 |
|
43 |
dpavlin |
12 |
C<tag> defines tag to use within C<xml_file> |
44 |
|
|
|
45 |
|
|
C<xml_file> defines path to normalize XML. |
46 |
|
|
|
47 |
dpavlin |
62 |
C<tags> define additional tags that can be forced (and an be array). |
48 |
|
|
|
49 |
dpavlin |
8 |
=cut |
50 |
|
|
|
51 |
dpavlin |
13 |
sub open { |
52 |
|
|
my $self = shift; |
53 |
dpavlin |
8 |
|
54 |
dpavlin |
13 |
my $arg = {@_}; |
55 |
dpavlin |
8 |
|
56 |
|
|
my $log = $self->_get_logger(); |
57 |
|
|
|
58 |
dpavlin |
12 |
foreach my $req (qw/tag xml_file/) { |
59 |
dpavlin |
13 |
$log->logconfess("need argument $req") unless $arg->{$req}; |
60 |
dpavlin |
12 |
} |
61 |
dpavlin |
8 |
|
62 |
dpavlin |
13 |
$self->{'tag'} = $arg->{'tag'}; |
63 |
|
|
my $xml_file = $arg->{'xml_file'}; |
64 |
dpavlin |
8 |
|
65 |
dpavlin |
12 |
$log->info("using $xml_file tag <",$self->{'tag'},">"); |
66 |
dpavlin |
8 |
|
67 |
dpavlin |
12 |
$log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file); |
68 |
dpavlin |
8 |
|
69 |
dpavlin |
12 |
$self->{'import_xml_file'} = $xml_file; |
70 |
dpavlin |
8 |
|
71 |
dpavlin |
62 |
my @force_array = [ $self->{'tag'}, 'config', 'format' ]; |
72 |
|
|
push @force_array, $self->{'tags'} if ($self->{'tags'}); |
73 |
|
|
|
74 |
dpavlin |
13 |
$self->{'import_xml'} = XMLin($xml_file, |
75 |
dpavlin |
62 |
ForceArray => @force_array, |
76 |
dpavlin |
38 |
ForceContent => 1, |
77 |
dpavlin |
8 |
); |
78 |
|
|
|
79 |
dpavlin |
252 |
$log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
80 |
dpavlin |
8 |
|
81 |
dpavlin |
12 |
return $self; |
82 |
dpavlin |
8 |
} |
83 |
|
|
|
84 |
|
|
|
85 |
dpavlin |
13 |
=head2 _x |
86 |
dpavlin |
8 |
|
87 |
dpavlin |
14 |
Convert string from XML UTF-8 encoding to code page defined in C<xml_file>. |
88 |
dpavlin |
8 |
|
89 |
dpavlin |
13 |
my $text = $n->_x('utf8 text'); |
90 |
dpavlin |
8 |
|
91 |
dpavlin |
13 |
Default application code page is C<ISO-8859-2>. You will probably want to |
92 |
|
|
change that when creating new instance of object based on this one. |
93 |
dpavlin |
8 |
|
94 |
|
|
=cut |
95 |
|
|
|
96 |
dpavlin |
13 |
sub _x { |
97 |
dpavlin |
8 |
my $self = shift; |
98 |
dpavlin |
13 |
my $utf8 = shift || return; |
99 |
dpavlin |
8 |
|
100 |
dpavlin |
13 |
# create UTF-8 convertor for import_xml files |
101 |
|
|
$self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2'); |
102 |
dpavlin |
8 |
|
103 |
dpavlin |
13 |
return $self->{'utf2cp'}->convert($utf8) || |
104 |
|
|
$self->_get_logger()->logwarn("can't convert '$utf8'"); |
105 |
dpavlin |
8 |
} |
106 |
|
|
|
107 |
|
|
|
108 |
|
|
=head1 AUTHOR |
109 |
|
|
|
110 |
|
|
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >> |
111 |
|
|
|
112 |
|
|
=head1 COPYRIGHT & LICENSE |
113 |
|
|
|
114 |
|
|
Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. |
115 |
|
|
|
116 |
|
|
This program is free software; you can redistribute it and/or modify it |
117 |
|
|
under the same terms as Perl itself. |
118 |
|
|
|
119 |
|
|
=cut |
120 |
|
|
|
121 |
dpavlin |
12 |
1; # End of WebPAC::Normalize::XML |