/[webpac2]/trunk/lib/WebPAC/Input/Excel.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/Excel.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1244 - (hide annotations)
Mon Jul 20 22:00:43 2009 UTC (14 years, 9 months ago) by dpavlin
File size: 4643 byte(s)
don't fallback to input name, but use first sheet instead

1 dpavlin 498 package WebPAC::Input::Excel;
2    
3     use warnings;
4     use strict;
5    
6     use Spreadsheet::ParseExcel;
7     use Spreadsheet::ParseExcel::Utility qw/int2col/;
8 dpavlin 728 use base qw/WebPAC::Common/;
9 dpavlin 1222 use Text::Unaccent::PurePerl qw/unac_string/;
10     use Data::Dump qw/dump/;
11 dpavlin 498
12     =head1 NAME
13    
14 dpavlin 894 WebPAC::Input::Excel - support for Microsoft Excel and compatibile files
15 dpavlin 498
16     =cut
17    
18 dpavlin 1222 our $VERSION = '0.06';
19 dpavlin 498
20    
21     =head1 SYNOPSIS
22    
23     Open Microsoft Excell, or compatibile format (for e.g. from OpenOffice.org
24     or Gnuemeric) in C<.xls> format.
25    
26     =head1 FUNCTIONS
27    
28 dpavlin 728 =head2 new
29 dpavlin 498
30     Returns handle to database and size
31    
32 dpavlin 728 my $excel = new WebPAC::Input::Excel(
33 dpavlin 498 path => '/path/to/workbook.xls'
34     worksheet => 'name of sheet',
35 dpavlin 524 from => 42,
36     to => 9999,
37 dpavlin 498 }
38    
39     C<worksheet> is case and white-space insensitive name of worksheet in Excel
40 dpavlin 1217 file to use. If not specified, name of input is used. If none of those
41     methods returned sheet, first worksheet in file is used instead.
42 dpavlin 498
43 dpavlin 524 C<from> and C<to> specify row numbers to start and finish import.
44    
45 dpavlin 498 =cut
46    
47 dpavlin 728 sub new {
48     my $class = shift;
49     my $self = {@_};
50     bless($self, $class);
51 dpavlin 498
52     my $log = $self->_get_logger();
53    
54 dpavlin 728 $log->logdie("can't open excel file $self->{path}: $!") unless (-r $self->{path} && -f $self->{path});
55 dpavlin 498
56 dpavlin 728 my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($self->{path});
57 dpavlin 498
58 dpavlin 728 my $sheet;
59 dpavlin 1244 my $wanted_worksheet = $self->{worksheet} ; # || $self->{name};
60 dpavlin 498
61 dpavlin 1217 if ($wanted_worksheet) {
62 dpavlin 498 my $name;
63     do {
64     $sheet = shift @{ $workbook->{Worksheet} };
65 dpavlin 728 $log->logdie("can't find sheet '$wanted_worksheet' in $self->{path}\n") unless (defined($sheet));
66 dpavlin 498 $name = $sheet->{Name};
67     $name =~ s/\s\s+/ /g;
68     } until ($name =~ m/^\s*\Q$wanted_worksheet\E\s*$/i);
69    
70     }
71    
72 dpavlin 1217 $sheet ||= shift @{ $workbook->{Worksheet} };
73    
74 dpavlin 728 $self->{sheet} = $sheet;
75 dpavlin 498
76 dpavlin 728 $self->{from} ||= $sheet->{MinRow};
77     $self->{to} ||= $sheet->{MaxRow};
78 dpavlin 524
79 dpavlin 728 my $size = $self->{to} - $self->{from};
80     $self->{size} = $size;
81    
82     $log->warn("opening Excel file '$self->{path}', using ",
83 dpavlin 498 $wanted_worksheet ? '' : 'first ',
84     "worksheet: $sheet->{Name} [$size rows]"
85     );
86    
87 dpavlin 728 $self ? return $self : return undef;
88 dpavlin 498 }
89    
90     =head2 fetch_rec
91    
92     Return record with ID C<$mfn> from database
93    
94 dpavlin 652 my $rec = $self->fetch_rec( $mfn );
95 dpavlin 498
96 dpavlin 1055 Columns are named C<A>, C<B> and so on...
97 dpavlin 498
98     =cut
99    
100     sub fetch_rec {
101     my $self = shift;
102    
103 dpavlin 652 my $mfn = shift;
104 dpavlin 498
105     my $log = $self->_get_logger();
106    
107 dpavlin 728 my $sheet = $self->{sheet};
108 dpavlin 498 $log->logdie("can't find sheet hash") unless (defined($sheet));
109     $log->logdie("sheet hash isn't Spreadsheet::ParseExcel::Worksheet") unless ($sheet->isa('Spreadsheet::ParseExcel::Worksheet'));
110    
111     my $rec;
112    
113 dpavlin 728 my $row = $self->{from} + $mfn - 1;
114 dpavlin 498
115     $log->debug("fetch_rec( $mfn ) row: $row cols: ",$sheet->{MinCol}," - ",$sheet->{MaxCol});
116    
117     foreach my $col ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
118 dpavlin 1222 my $v = $sheet->{Cells}->[$row]->[$col]->{_Value}; ## XXX _Value = formatted | Val = unformated !
119     $rec->{ int2col($col) } = $v if defined $v;
120 dpavlin 498 }
121    
122     # add mfn only to records with data
123 dpavlin 521 $rec->{'000'} = [ $mfn ] if ($rec);
124 dpavlin 498
125     return $rec;
126     }
127    
128 dpavlin 728 =head2 size
129    
130     Return number of records in database
131    
132     my $size = $isis->size;
133    
134     =cut
135    
136     sub size {
137     my $self = shift;
138     return $self->{size};
139     }
140 dpavlin 1100
141 dpavlin 1217 our @labels;
142     our @names;
143    
144     sub normalize {
145     my ($self,$mfn) = @_;
146    
147     my $log = $self->_get_logger();
148    
149     my $sheet = $self->{sheet};
150    
151 dpavlin 1222 my $ds;
152    
153 dpavlin 1217 if ( ! @labels ) {
154 dpavlin 1222
155     my $labels;
156    
157     foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
158     my $label = $sheet->{Cells}->[0]->[$_]->{_Value};
159     last if length($label) == 0;
160     push @labels, $label;
161     }
162     @names = map {
163     my $t = unac_string($_);
164     $t =~ s{[^a-z0-9]+}{_}gi;
165     $t =~ s{_+$}{};
166     $t =~ s{^_+}{};
167     $t = lc($t);
168     $labels .= "$t\t$_\n";
169     $t;
170     } @labels;
171    
172     $log->info("columns = ", dump( @names ), " labels = ", dump( @labels ) );
173    
174     $ds = {
175     '_labels' => [ @labels ],
176     '_names' => [ @names ],
177     };
178    
179     my $path = $self->{labels} || 'var/labels.txt';
180     {
181     warn $labels;
182     open(my $fh, '>:raw', $path) || die "$path: $!";
183     print $fh $labels;
184     close $fh;
185     }
186     $log->info("created labels $path ", -s $path, " bytes");
187 dpavlin 1217 }
188    
189 dpavlin 1222
190 dpavlin 1217 my $row = $self->{from} + $mfn - 1;
191    
192 dpavlin 1222 my $data;
193     foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
194     my $name = $names[$_];
195     next unless $name;
196     my $v = $sheet->{Cells}->[$row]->[$_]->{_Value};
197     $data->{ $name } = $v;
198     $ds->{ $name } = { search => [ $v ] } if defined $v;
199     }
200    
201     $ds->{'_rows'} = { $self->{sheet}->{Name} => [ $data ] };
202    
203     return $ds;
204 dpavlin 1217 }
205    
206 dpavlin 498 =head1 AUTHOR
207    
208     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
209    
210     =head1 COPYRIGHT & LICENSE
211    
212     Copyright 2005-2006 Dobrica Pavlinusic, All Rights Reserved.
213    
214     This program is free software; you can redistribute it and/or modify it
215     under the same terms as Perl itself.
216    
217     =cut
218    
219     1; # End of WebPAC::Input::Excel

  ViewVC Help
Powered by ViewVC 1.1.26