/[webpac2]/trunk/lib/WebPAC/Normalize/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 505 - (show annotations)
Sun May 14 22:38:01 2006 UTC (17 years, 11 months ago) by dpavlin
File size: 3477 byte(s)
fix whitespace

1 package WebPAC::Normalize::XML;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common WebPAC::Normalize/;
7 use XML::Simple;
8 use Data::Dumper;
9 use Text::Iconv;
10 use YAML qw/Dump LoadFile/;
11
12 =head1 NAME
13
14 WebPAC::Normalize::XML - apply XML or YAML normalisaton rules
15
16 =head1 VERSION
17
18 Version 0.03
19
20 =cut
21
22 our $VERSION = '0.03';
23
24 =head1 SYNOPSIS
25
26 This module uses C<conf/normalize/*.xml> files to perform normalisation
27 from input records
28
29 =cut
30
31 =head1 FUNCTIONS
32
33 =head2 open
34
35 Read normalisation rules defined using XML from C<conf/normalize/*.xml> and
36 parse it.
37
38 my $n = new WebPAC::Normalize::XML;
39 $n->open(
40 tag => 'isis',
41 xml_file => '/path/to/conf/normalize/isis.xml',
42 );
43
44 C<tag> defines tag to use within C<xml_file>
45
46 C<xml_file> defines path to normalize XML
47
48 C<tags> define additional tags that can be forced (and an be array).
49
50 =cut
51
52 sub open {
53 my $self = shift;
54
55 my $arg = {@_};
56
57 my $log = $self->_get_logger();
58
59 foreach my $req (qw/tag xml_file/) {
60 $log->logconfess("need argument $req") unless $arg->{$req};
61 }
62
63 $self->{'tag'} = $arg->{'tag'};
64 my $xml_file = $arg->{'xml_file'};
65
66 $log->info("using $xml_file tag <",$self->{'tag'},">");
67
68 $log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file);
69
70 $self->{'import_xml_file'} = $xml_file;
71
72 my @force_array = [ $self->{'tag'}, 'config', 'format' ];
73 push @force_array, $self->{'tags'} if ($self->{'tags'});
74
75 $self->{'import_xml'} = XMLin($xml_file,
76 ForceArray => @force_array,
77 ForceContent => 1,
78 );
79
80 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
81
82 #print STDERR Dump($self->{import_xml});
83
84 return $self;
85 }
86
87 =head2 open_yaml
88
89 Read normalisation rules defined in YAML file located usually at
90 C<conf/normalize/*.yml> and parse it.
91
92 my $n = new WebPAC::Normalize::XML;
93 $n->open_yaml(
94 tag => 'isis',
95 path => '/path/to/conf/normalize/isis.yml',
96 );
97
98 =cut
99
100 sub open_yaml {
101 my $self = shift;
102
103 my $arg = {@_};
104
105 my $log = $self->_get_logger();
106
107 foreach my $req (qw/tag path/) {
108 $log->logconfess("need argument $req") unless $arg->{$req};
109 }
110
111 my $path = $arg->{path};
112 $self->{tag} = $arg->{tag};
113
114 $log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path);
115
116 $log->info("using $path normalization YAML");
117
118 $self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!");
119
120 $log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
121
122 $self->{_skip_x} = 1;
123
124 return $self;
125 }
126
127 =head2 _x
128
129 Convert string from XML UTF-8 encoding to code page defined in C<xml_file>.
130
131 my $text = $n->_x('utf8 text');
132
133 Default application code page is C<ISO-8859-2>. You will probably want to
134 change that when creating new instance of object based on this one.
135
136 =cut
137
138 sub _x {
139 my $self = shift;
140 my $utf8 = shift || return;
141 return $utf8 if ($self->{_skip_x});
142
143 # create UTF-8 convertor for import_xml files
144 $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');
145
146 return $self->{'utf2cp'}->convert($utf8) ||
147 $self->_get_logger()->logwarn("can't convert '$utf8'");
148 }
149
150
151 =head1 AUTHOR
152
153 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
154
155 =head1 COPYRIGHT & LICENSE
156
157 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
158
159 This program is free software; you can redistribute it and/or modify it
160 under the same terms as Perl itself.
161
162 =cut
163
164 1; # End of WebPAC::Normalize::XML

  ViewVC Help
Powered by ViewVC 1.1.26