/[webpac2]/trunk/lib/WebPAC/Normalize/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 314 - (hide annotations)
Fri Dec 23 21:05:50 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 3480 byte(s)
 r345@athlon:  dpavlin | 2005-12-21 00:54:13 +0100
 work on tests

1 dpavlin 12 package WebPAC::Normalize::XML;
2 dpavlin 8
3     use warnings;
4     use strict;
5    
6 dpavlin 13 use base qw/WebPAC::Common WebPAC::Normalize/;
7 dpavlin 12 use XML::Simple;
8     use Data::Dumper;
9 dpavlin 13 use Text::Iconv;
10 dpavlin 269 use YAML qw/Dump LoadFile/;
11 dpavlin 8
12     =head1 NAME
13    
14 dpavlin 269 WebPAC::Normalize::XML - apply XML or YAML normalisaton rules
15 dpavlin 8
16     =head1 VERSION
17    
18 dpavlin 269 Version 0.03
19 dpavlin 8
20     =cut
21    
22 dpavlin 269 our $VERSION = '0.03';
23 dpavlin 8
24     =head1 SYNOPSIS
25    
26     This module uses C<conf/normalize/*.xml> files to perform normalisation
27     from input records
28    
29     =cut
30    
31     =head1 FUNCTIONS
32    
33 dpavlin 13 =head2 open
34 dpavlin 8
35 dpavlin 12 Read normalisation rules defined using XML from C<conf/normalize/*.xml> and
36     parse it.
37 dpavlin 8
38 dpavlin 13 my $n = new WebPAC::Normalize::XML;
39     $n->open(
40 dpavlin 12 tag => 'isis',
41     xml_file => '/path/to/conf/normalize/isis.xml',
42 dpavlin 13 );
43 dpavlin 8
44 dpavlin 12 C<tag> defines tag to use within C<xml_file>
45    
46 dpavlin 269 C<xml_file> defines path to normalize XML
47 dpavlin 12
48 dpavlin 62 C<tags> define additional tags that can be forced (and an be array).
49    
50 dpavlin 8 =cut
51    
52 dpavlin 13 sub open {
53     my $self = shift;
54 dpavlin 8
55 dpavlin 13 my $arg = {@_};
56 dpavlin 8
57     my $log = $self->_get_logger();
58    
59 dpavlin 12 foreach my $req (qw/tag xml_file/) {
60 dpavlin 13 $log->logconfess("need argument $req") unless $arg->{$req};
61 dpavlin 12 }
62 dpavlin 8
63 dpavlin 13 $self->{'tag'} = $arg->{'tag'};
64     my $xml_file = $arg->{'xml_file'};
65 dpavlin 8
66 dpavlin 12 $log->info("using $xml_file tag <",$self->{'tag'},">");
67 dpavlin 8
68 dpavlin 12 $log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file);
69 dpavlin 8
70 dpavlin 12 $self->{'import_xml_file'} = $xml_file;
71 dpavlin 8
72 dpavlin 62 my @force_array = [ $self->{'tag'}, 'config', 'format' ];
73     push @force_array, $self->{'tags'} if ($self->{'tags'});
74    
75 dpavlin 13 $self->{'import_xml'} = XMLin($xml_file,
76 dpavlin 62 ForceArray => @force_array,
77 dpavlin 38 ForceContent => 1,
78 dpavlin 8 );
79    
80 dpavlin 252 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
81 dpavlin 8
82 dpavlin 269 #print STDERR Dump($self->{import_xml});
83    
84 dpavlin 12 return $self;
85 dpavlin 8 }
86    
87 dpavlin 269 =head2 open_yaml
88 dpavlin 8
89 dpavlin 269 Read normalisation rules defined in YAML file located usually at
90     C<conf/normalize/*.yml> and parse it.
91    
92     my $n = new WebPAC::Normalize::XML;
93     $n->open_yaml(
94     tag => 'isis',
95     path => '/path/to/conf/normalize/isis.yml',
96     );
97    
98     =cut
99    
100     sub open_yaml {
101     my $self = shift;
102    
103     my $arg = {@_};
104    
105     my $log = $self->_get_logger();
106    
107     foreach my $req (qw/tag path/) {
108     $log->logconfess("need argument $req") unless $arg->{$req};
109     }
110    
111     my $path = $arg->{path};
112     $self->{tag} = $arg->{tag};
113    
114     $log->logdie("normalisation yaml file '$path' doesn't exist!") if (! -e $path);
115    
116     $log->info("using $path normalization YAML");
117    
118 dpavlin 314 $self->{'import_xml'} = LoadFile( $path ) || $log->die("can't load $path: $!");
119 dpavlin 269
120     $log->debug("import yaml is ",sub { Dumper($self->{'import_xml'}) }, $self->{lookup} ? " using lookups" : "lookups disabled");
121    
122     $self->{_skip_x} = 1;
123    
124     return $self;
125     }
126    
127 dpavlin 13 =head2 _x
128 dpavlin 8
129 dpavlin 14 Convert string from XML UTF-8 encoding to code page defined in C<xml_file>.
130 dpavlin 8
131 dpavlin 13 my $text = $n->_x('utf8 text');
132 dpavlin 8
133 dpavlin 13 Default application code page is C<ISO-8859-2>. You will probably want to
134     change that when creating new instance of object based on this one.
135 dpavlin 8
136     =cut
137    
138 dpavlin 13 sub _x {
139 dpavlin 8 my $self = shift;
140 dpavlin 13 my $utf8 = shift || return;
141 dpavlin 269 return $utf8 if ($self->{_skip_x});
142 dpavlin 8
143 dpavlin 13 # create UTF-8 convertor for import_xml files
144     $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');
145 dpavlin 8
146 dpavlin 13 return $self->{'utf2cp'}->convert($utf8) ||
147     $self->_get_logger()->logwarn("can't convert '$utf8'");
148 dpavlin 8 }
149    
150    
151     =head1 AUTHOR
152    
153     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
154    
155     =head1 COPYRIGHT & LICENSE
156    
157     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
158    
159     This program is free software; you can redistribute it and/or modify it
160     under the same terms as Perl itself.
161    
162     =cut
163    
164 dpavlin 12 1; # End of WebPAC::Normalize::XML

  ViewVC Help
Powered by ViewVC 1.1.26