/[webpac2]/trunk/lib/WebPAC/Normalize/XML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/XML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (show annotations)
Tue Nov 15 14:31:12 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 2430 byte(s)
 r8884@llin:  dpavlin | 2005-11-15 14:46:07 +0100
 fix small warnings

1 package WebPAC::Normalize::XML;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common WebPAC::Normalize/;
7 use XML::Simple;
8 use Data::Dumper;
9 use Text::Iconv;
10
11 =head1 NAME
12
13 WebPAC::Normalize::XML - apply XML normalisaton rules
14
15 =head1 VERSION
16
17 Version 0.02
18
19 =cut
20
21 our $VERSION = '0.02';
22
23 =head1 SYNOPSIS
24
25 This module uses C<conf/normalize/*.xml> files to perform normalisation
26 from input records
27
28 =cut
29
30 =head1 FUNCTIONS
31
32 =head2 open
33
34 Read normalisation rules defined using XML from C<conf/normalize/*.xml> and
35 parse it.
36
37 my $n = new WebPAC::Normalize::XML;
38 $n->open(
39 tag => 'isis',
40 xml_file => '/path/to/conf/normalize/isis.xml',
41 );
42
43 C<tag> defines tag to use within C<xml_file>
44
45 C<xml_file> defines path to normalize XML.
46
47 C<tags> define additional tags that can be forced (and an be array).
48
49 =cut
50
51 sub open {
52 my $self = shift;
53
54 my $arg = {@_};
55
56 my $log = $self->_get_logger();
57
58 foreach my $req (qw/tag xml_file/) {
59 $log->logconfess("need argument $req") unless $arg->{$req};
60 }
61
62 $self->{'tag'} = $arg->{'tag'};
63 my $xml_file = $arg->{'xml_file'};
64
65 $log->info("using $xml_file tag <",$self->{'tag'},">");
66
67 $log->logdie("normalisation xml file '$xml_file' doesn't exist!") if (! -e $xml_file);
68
69 $self->{'import_xml_file'} = $xml_file;
70
71 my @force_array = [ $self->{'tag'}, 'config', 'format' ];
72 push @force_array, $self->{'tags'} if ($self->{'tags'});
73
74 $self->{'import_xml'} = XMLin($xml_file,
75 ForceArray => @force_array,
76 ForceContent => 1,
77 );
78
79 $log->debug("import xml is ",sub { Dumper($self->{'import_xml'}) });
80
81 return $self;
82 }
83
84
85 =head2 _x
86
87 Convert string from XML UTF-8 encoding to code page defined in C<xml_file>.
88
89 my $text = $n->_x('utf8 text');
90
91 Default application code page is C<ISO-8859-2>. You will probably want to
92 change that when creating new instance of object based on this one.
93
94 =cut
95
96 sub _x {
97 my $self = shift;
98 my $utf8 = shift || return;
99
100 # create UTF-8 convertor for import_xml files
101 $self->{'utf2cp'} ||= Text::Iconv->new('UTF-8' ,$self->{'code_page'} || 'ISO-8859-2');
102
103 return $self->{'utf2cp'}->convert($utf8) ||
104 $self->_get_logger()->logwarn("can't convert '$utf8'");
105 }
106
107
108 =head1 AUTHOR
109
110 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
111
112 =head1 COPYRIGHT & LICENSE
113
114 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
115
116 This program is free software; you can redistribute it and/or modify it
117 under the same terms as Perl itself.
118
119 =cut
120
121 1; # End of WebPAC::Normalize::XML

  ViewVC Help
Powered by ViewVC 1.1.26