1 |
package WebPAC::Normalize::XML; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use base qw/WebPAC::Common WebPAC::Normalize/; |
7 |
use XML::Simple; |
8 |
use Data::Dumper; |
9 |
use Text::Iconv; |
10 |
use YAML qw/Dump LoadFile/; |
11 |
|
12 |
=head1 NAME |
13 |
|
14 |
WebPAC::Normalize::XML - apply XML or YAML normalisaton rules |
15 |
|
16 |
=head1 VERSION |
17 |
|
18 |
Version 0.03 |
19 |
|
20 |
=cut |
21 |
|
22 |
our $VERSION = '0.03'; |
23 |
|
24 |
=head1 SYNOPSIS |
25 |
|
26 |
This module uses C<conf/normalize/*.xml> files to perform normalisation |
27 |
from input records |
28 |
|
29 |
=cut |
30 |
|
31 |
=head1 FUNCTIONS |
32 |
|
33 |
=head2 open |
34 |
|
35 |
Read normalisation rules defined using XML from C<conf/normalize/*.xml> and |
36 |
parse it. |
37 |
|
38 |
my $n = new WebPAC::Normalize::XML; |
39 |
$n->open( |
40 |
tag => 'isis', |
41 |
xml_file => '/path/to/conf/normalize/isis.xml', |
42 |
); |
43 |
|
44 |
C<tag> defines tag to use within C<xml_file> |
45 |
|
46 |
C<xml_file> defines path to normalize XML |
47 |
|
48 |
C<tags> define additional tags that can be forced (and an be array). |
49 |
|
50 |
=cut |
51 |
|
52 |
sub open { |
53 |
my $self = shift; |
54 |
|
55 |
my $arg = {@_}; |
56 |
|
57 |
my $log = $self->_get_logger(); |
58 |
|
59 |
foreach my $req (qw/tag xml_file/) { |
60 |
$log->logconfess("need argument $req") unless $arg->{$req}; |
61 |
} |
62 |
|
63 |
$self->{'tag'} = $arg->{'tag'}; |
64 |
my $xml_file = $arg->{'xml_file'}; |
65 |
|
66 |
$log->info("using $xml_file tag <",$self->{'tag'},">"); |
67 |
|
68 |
$log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file); |
69 |
|
70 |
$self->{'import_xml_file'} = $xml_file; |
71 |
|
72 |
my @force_array = [ $self->{'tag'}, 'config', 'format' ]; |
73 |
push @force_array, $self->{'tags'} if ($self->{'tags'}); |
74 |
|
75 |
$self->{'import_xml'} = XMLin($xml_file, |
76 |
ForceArray => @force_array, |
77 |
ForceContent => 1, |
78 |
); |
79 |
|
80 |
$log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
81 |
|
82 |
#print STDERR Dump($self->{import_xml}); |
83 |
|
84 |
return $self; |
85 |
} |
86 |
|
87 |
=head2 open_yaml |
88 |
|
89 |
Read normalisation rules defined in YAML file located usually at |
90 |
C<conf/normalize/*.yml> and parse it. |
91 |
|
92 |
my $n = new WebPAC::Normalize::XML; |
93 |
$n->open_yaml( |
94 |
tag => 'isis', |
95 |
path => '/path/to/conf/normalize/isis.yml', |
96 |
); |
97 |
|
98 |
=cut |
99 |
|
100 |
sub open_yaml { |
101 |
my $self = shift; |
102 |
|
103 |
my $arg = {@_}; |
104 |
|
105 |
my $log = $self->_get_logger(); |
106 |
|
107 |
foreach my $req (qw/tag path/) { |
108 |
$log->logconfess("need argument $req") unless $arg->{$req}; |
109 |
} |
110 |
|
111 |
my $path = $arg->{path}; |
112 |
$self->{tag} = $arg->{tag}; |
113 |
|
114 |
$log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path); |
115 |
|
116 |
$log->info("using $path normalization YAML"); |
117 |
|
118 |
$self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!"); |
119 |
|
120 |
$log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
121 |
|
122 |
$self->{_skip_x} = 1; |
123 |
|
124 |
return $self; |
125 |
} |
126 |
|
127 |
=head2 _x |
128 |
|
129 |
Convert string from XML UTF-8 encoding to code page defined in C<xml_file>. |
130 |
|
131 |
my $text = $n->_x('utf8 text'); |
132 |
|
133 |
Default application code page is C<ISO-8859-2>. You will probably want to |
134 |
change that when creating new instance of object based on this one. |
135 |
|
136 |
=cut |
137 |
|
138 |
sub _x { |
139 |
my $self = shift; |
140 |
my $utf8 = shift || return; |
141 |
return $utf8 if ($self->{_skip_x}); |
142 |
|
143 |
# create UTF-8 convertor for import_xml files |
144 |
$self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2'); |
145 |
|
146 |
return $self->{'utf2cp'}->convert($utf8) || |
147 |
$self->_get_logger()->logwarn("can't convert '$utf8'"); |
148 |
} |
149 |
|
150 |
|
151 |
=head1 AUTHOR |
152 |
|
153 |
Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >> |
154 |
|
155 |
=head1 COPYRIGHT & LICENSE |
156 |
|
157 |
Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. |
158 |
|
159 |
This program is free software; you can redistribute it and/or modify it |
160 |
under the same terms as Perl itself. |
161 |
|
162 |
=cut |
163 |
|
164 |
1; # End of WebPAC::Normalize::XML |