7 |
use XML::Simple; |
use XML::Simple; |
8 |
use Data::Dumper; |
use Data::Dumper; |
9 |
use Text::Iconv; |
use Text::Iconv; |
10 |
|
use YAML qw/Dump LoadFile/; |
11 |
|
|
12 |
=head1 NAME |
=head1 NAME |
13 |
|
|
14 |
WebPAC::Normalize::XML - apply XML normalisaton rules |
WebPAC::Normalize::XML - apply XML or YAML normalisaton rules |
15 |
|
|
16 |
=head1 VERSION |
=head1 VERSION |
17 |
|
|
18 |
Version 0.02 |
Version 0.03 |
19 |
|
|
20 |
=cut |
=cut |
21 |
|
|
22 |
our $VERSION = '0.02'; |
our $VERSION = '0.03'; |
23 |
|
|
24 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
25 |
|
|
43 |
|
|
44 |
C<tag> defines tag to use within C<xml_file> |
C<tag> defines tag to use within C<xml_file> |
45 |
|
|
46 |
C<xml_file> defines path to normalize XML. |
C<xml_file> defines path to normalize XML |
47 |
|
|
48 |
C<tags> define additional tags that can be forced (and an be array). |
C<tags> define additional tags that can be forced (and an be array). |
49 |
|
|
79 |
|
|
80 |
$log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
$log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
81 |
|
|
82 |
|
#print STDERR Dump($self->{import_xml}); |
83 |
|
|
84 |
return $self; |
return $self; |
85 |
} |
} |
86 |
|
|
87 |
|
=head2 open_yaml |
88 |
|
|
89 |
|
Read normalisation rules defined in YAML file located usually at |
90 |
|
C<conf/normalize/*.yml> and parse it. |
91 |
|
|
92 |
|
my $n = new WebPAC::Normalize::XML; |
93 |
|
$n->open_yaml( |
94 |
|
tag => 'isis', |
95 |
|
path => '/path/to/conf/normalize/isis.yml', |
96 |
|
); |
97 |
|
|
98 |
|
=cut |
99 |
|
|
100 |
|
sub open_yaml { |
101 |
|
my $self = shift; |
102 |
|
|
103 |
|
my $arg = {@_}; |
104 |
|
|
105 |
|
my $log = $self->_get_logger(); |
106 |
|
|
107 |
|
foreach my $req (qw/tag path/) { |
108 |
|
$log->logconfess("need argument $req") unless $arg->{$req}; |
109 |
|
} |
110 |
|
|
111 |
|
my $path = $arg->{path}; |
112 |
|
$self->{tag} = $arg->{tag}; |
113 |
|
|
114 |
|
$log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path); |
115 |
|
|
116 |
|
$log->info("using $path normalization YAML"); |
117 |
|
|
118 |
|
$self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!"); |
119 |
|
|
120 |
|
$log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled"); |
121 |
|
|
122 |
|
$self->{_skip_x} = 1; |
123 |
|
|
124 |
|
return $self; |
125 |
|
} |
126 |
|
|
127 |
=head2 _x |
=head2 _x |
128 |
|
|
138 |
sub _x { |
sub _x { |
139 |
my $self = shift; |
my $self = shift; |
140 |
my $utf8 = shift || return; |
my $utf8 = shift || return; |
141 |
|
return $utf8 if ($self->{_skip_x}); |
142 |
|
|
143 |
# create UTF-8 convertor for import_xml files |
# create UTF-8 convertor for import_xml files |
144 |
$self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2'); |
$self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2'); |