/[webpac2]/trunk/lib/WebPAC/Input/Excel.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/Excel.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1217 - (hide annotations)
Tue Jun 9 18:38:36 2009 UTC (14 years, 11 months ago) by dpavlin
File size: 3748 byte(s)
select sheet: worksheet param, name of input, first one

1 dpavlin 498 package WebPAC::Input::Excel;
2    
3     use warnings;
4     use strict;
5    
6     use Spreadsheet::ParseExcel;
7     use Spreadsheet::ParseExcel::Utility qw/int2col/;
8 dpavlin 728 use base qw/WebPAC::Common/;
9 dpavlin 498
10     =head1 NAME
11    
12 dpavlin 894 WebPAC::Input::Excel - support for Microsoft Excel and compatibile files
13 dpavlin 498
14     =cut
15    
16 dpavlin 1217 our $VERSION = '0.05';
17 dpavlin 498
18    
19     =head1 SYNOPSIS
20    
21     Open Microsoft Excell, or compatibile format (for e.g. from OpenOffice.org
22     or Gnuemeric) in C<.xls> format.
23    
24     =head1 FUNCTIONS
25    
26 dpavlin 728 =head2 new
27 dpavlin 498
28     Returns handle to database and size
29    
30 dpavlin 728 my $excel = new WebPAC::Input::Excel(
31 dpavlin 498 path => '/path/to/workbook.xls'
32     worksheet => 'name of sheet',
33 dpavlin 524 from => 42,
34     to => 9999,
35 dpavlin 498 }
36    
37     C<worksheet> is case and white-space insensitive name of worksheet in Excel
38 dpavlin 1217 file to use. If not specified, name of input is used. If none of those
39     methods returned sheet, first worksheet in file is used instead.
40 dpavlin 498
41 dpavlin 524 C<from> and C<to> specify row numbers to start and finish import.
42    
43 dpavlin 498 =cut
44    
45 dpavlin 728 sub new {
46     my $class = shift;
47     my $self = {@_};
48     bless($self, $class);
49 dpavlin 498
50     my $log = $self->_get_logger();
51    
52 dpavlin 728 $log->logdie("can't open excel file $self->{path}: $!") unless (-r $self->{path} && -f $self->{path});
53 dpavlin 498
54 dpavlin 728 my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($self->{path});
55 dpavlin 498
56 dpavlin 728 my $sheet;
57 dpavlin 1217 my $wanted_worksheet = $self->{worksheet} || $self->{name};
58 dpavlin 498
59 dpavlin 1217 if ($wanted_worksheet) {
60 dpavlin 498 my $name;
61     do {
62     $sheet = shift @{ $workbook->{Worksheet} };
63 dpavlin 728 $log->logdie("can't find sheet '$wanted_worksheet' in $self->{path}\n") unless (defined($sheet));
64 dpavlin 498 $name = $sheet->{Name};
65     $name =~ s/\s\s+/ /g;
66     } until ($name =~ m/^\s*\Q$wanted_worksheet\E\s*$/i);
67    
68     }
69    
70 dpavlin 1217 $sheet ||= shift @{ $workbook->{Worksheet} };
71    
72 dpavlin 728 $self->{sheet} = $sheet;
73 dpavlin 498
74 dpavlin 728 $self->{from} ||= $sheet->{MinRow};
75     $self->{to} ||= $sheet->{MaxRow};
76 dpavlin 524
77 dpavlin 728 my $size = $self->{to} - $self->{from};
78     $self->{size} = $size;
79    
80     $log->warn("opening Excel file '$self->{path}', using ",
81 dpavlin 498 $wanted_worksheet ? '' : 'first ',
82     "worksheet: $sheet->{Name} [$size rows]"
83     );
84    
85 dpavlin 728 $self ? return $self : return undef;
86 dpavlin 498 }
87    
88     =head2 fetch_rec
89    
90     Return record with ID C<$mfn> from database
91    
92 dpavlin 652 my $rec = $self->fetch_rec( $mfn );
93 dpavlin 498
94 dpavlin 1055 Columns are named C<A>, C<B> and so on...
95 dpavlin 498
96     =cut
97    
98     sub fetch_rec {
99     my $self = shift;
100    
101 dpavlin 652 my $mfn = shift;
102 dpavlin 498
103     my $log = $self->_get_logger();
104    
105 dpavlin 728 my $sheet = $self->{sheet};
106 dpavlin 498 $log->logdie("can't find sheet hash") unless (defined($sheet));
107     $log->logdie("sheet hash isn't Spreadsheet::ParseExcel::Worksheet") unless ($sheet->isa('Spreadsheet::ParseExcel::Worksheet'));
108    
109     my $rec;
110    
111 dpavlin 728 my $row = $self->{from} + $mfn - 1;
112 dpavlin 498
113     $log->debug("fetch_rec( $mfn ) row: $row cols: ",$sheet->{MinCol}," - ",$sheet->{MaxCol});
114    
115     foreach my $col ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
116 dpavlin 1105 if (my $v = $sheet->{Cells}->[$row]->[$col]->{_Value}) { ## XXX _Value = formatted | Val = unformated !
117 dpavlin 498 $rec->{ int2col($col) } = $v;
118     }
119     }
120    
121     # add mfn only to records with data
122 dpavlin 521 $rec->{'000'} = [ $mfn ] if ($rec);
123 dpavlin 498
124     return $rec;
125     }
126    
127 dpavlin 728 =head2 size
128    
129     Return number of records in database
130    
131     my $size = $isis->size;
132    
133     =cut
134    
135     sub size {
136     my $self = shift;
137     return $self->{size};
138     }
139 dpavlin 1100
140     sub default_encoding { 'UTF-16' }
141    
142 dpavlin 1217 our @labels;
143     our @names;
144    
145     sub normalize {
146     my ($self,$mfn) = @_;
147    
148     my $log = $self->_get_logger();
149    
150     my $sheet = $self->{sheet};
151    
152     if ( ! @labels ) {
153     push @labels, $sheet->{Cells}->[0]->[$_]->{_Value}
154     foreach ( $sheet->{MinCol} ... $sheet->{MaxCol} )
155     ;
156     @names = map { s{\W+}{_}; $_ } @labels;
157     $log->loginfo("column labels:", @labels, @names);
158     }
159    
160     my $row = $self->{from} + $mfn - 1;
161    
162    
163     }
164    
165 dpavlin 498 =head1 AUTHOR
166    
167     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
168    
169     =head1 COPYRIGHT & LICENSE
170    
171     Copyright 2005-2006 Dobrica Pavlinusic, All Rights Reserved.
172    
173     This program is free software; you can redistribute it and/or modify it
174     under the same terms as Perl itself.
175    
176     =cut
177    
178     1; # End of WebPAC::Input::Excel

  ViewVC Help
Powered by ViewVC 1.1.26