/[webpac2]/trunk/lib/WebPAC/Input/Excel.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Input/Excel.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1100 - (hide annotations)
Sat Aug 2 23:46:41 2008 UTC (15 years, 9 months ago) by dpavlin
File size: 3254 byte(s)
Make cleanup of encodings, moving webpac closer to having
internal utf-8 representation.

This will break current code, but is really neceserry
step toward checking input encoding for validity

1 dpavlin 498 package WebPAC::Input::Excel;
2    
3     use warnings;
4     use strict;
5    
6     use Spreadsheet::ParseExcel;
7     use Spreadsheet::ParseExcel::Utility qw/int2col/;
8 dpavlin 728 use base qw/WebPAC::Common/;
9 dpavlin 498
10     =head1 NAME
11    
12 dpavlin 894 WebPAC::Input::Excel - support for Microsoft Excel and compatibile files
13 dpavlin 498
14     =head1 VERSION
15    
16 dpavlin 728 Version 0.04
17 dpavlin 498
18     =cut
19    
20 dpavlin 728 our $VERSION = '0.04';
21 dpavlin 498
22    
23     =head1 SYNOPSIS
24    
25     Open Microsoft Excell, or compatibile format (for e.g. from OpenOffice.org
26     or Gnuemeric) in C<.xls> format.
27    
28     =head1 FUNCTIONS
29    
30 dpavlin 728 =head2 new
31 dpavlin 498
32     Returns handle to database and size
33    
34 dpavlin 728 my $excel = new WebPAC::Input::Excel(
35 dpavlin 498 path => '/path/to/workbook.xls'
36     worksheet => 'name of sheet',
37 dpavlin 524 from => 42,
38     to => 9999,
39 dpavlin 498 }
40    
41     C<worksheet> is case and white-space insensitive name of worksheet in Excel
42     file to use. If not specified, it will use first worksheet in file.
43    
44 dpavlin 524 C<from> and C<to> specify row numbers to start and finish import.
45    
46 dpavlin 498 =cut
47    
48 dpavlin 728 sub new {
49     my $class = shift;
50     my $self = {@_};
51     bless($self, $class);
52 dpavlin 498
53     my $log = $self->_get_logger();
54    
55 dpavlin 728 $log->logdie("can't open excel file $self->{path}: $!") unless (-r $self->{path} && -f $self->{path});
56 dpavlin 498
57 dpavlin 728 my $workbook = Spreadsheet::ParseExcel::Workbook->Parse($self->{path});
58 dpavlin 498
59 dpavlin 728 my $sheet;
60     my $wanted_worksheet;
61 dpavlin 498
62 dpavlin 728 if ($wanted_worksheet = $self->{worksheet}) {
63 dpavlin 498 my $name;
64     do {
65     $sheet = shift @{ $workbook->{Worksheet} };
66 dpavlin 728 $log->logdie("can't find sheet '$wanted_worksheet' in $self->{path}\n") unless (defined($sheet));
67 dpavlin 498 $name = $sheet->{Name};
68     $name =~ s/\s\s+/ /g;
69     } until ($name =~ m/^\s*\Q$wanted_worksheet\E\s*$/i);
70    
71     } else {
72    
73     $sheet = shift @{ $workbook->{Worksheet} };
74    
75     }
76    
77 dpavlin 728 $self->{sheet} = $sheet;
78 dpavlin 498
79 dpavlin 728 $self->{from} ||= $sheet->{MinRow};
80     $self->{to} ||= $sheet->{MaxRow};
81 dpavlin 524
82 dpavlin 728 my $size = $self->{to} - $self->{from};
83     $self->{size} = $size;
84    
85     $log->warn("opening Excel file '$self->{path}', using ",
86 dpavlin 498 $wanted_worksheet ? '' : 'first ',
87     "worksheet: $sheet->{Name} [$size rows]"
88     );
89    
90 dpavlin 728 $self ? return $self : return undef;
91 dpavlin 498 }
92    
93     =head2 fetch_rec
94    
95     Return record with ID C<$mfn> from database
96    
97 dpavlin 652 my $rec = $self->fetch_rec( $mfn );
98 dpavlin 498
99 dpavlin 1055 Columns are named C<A>, C<B> and so on...
100 dpavlin 498
101     =cut
102    
103     sub fetch_rec {
104     my $self = shift;
105    
106 dpavlin 652 my $mfn = shift;
107 dpavlin 498
108     my $log = $self->_get_logger();
109    
110 dpavlin 728 my $sheet = $self->{sheet};
111 dpavlin 498 $log->logdie("can't find sheet hash") unless (defined($sheet));
112     $log->logdie("sheet hash isn't Spreadsheet::ParseExcel::Worksheet") unless ($sheet->isa('Spreadsheet::ParseExcel::Worksheet'));
113    
114     my $rec;
115    
116 dpavlin 728 my $row = $self->{from} + $mfn - 1;
117 dpavlin 498
118     $log->debug("fetch_rec( $mfn ) row: $row cols: ",$sheet->{MinCol}," - ",$sheet->{MaxCol});
119    
120     foreach my $col ( $sheet->{MinCol} ... $sheet->{MaxCol} ) {
121     if (my $v = $sheet->{Cells}->[$row]->[$col]->{Val}) {
122     $rec->{ int2col($col) } = $v;
123     }
124     }
125    
126     # add mfn only to records with data
127 dpavlin 521 $rec->{'000'} = [ $mfn ] if ($rec);
128 dpavlin 498
129     return $rec;
130     }
131    
132 dpavlin 728 =head2 size
133    
134     Return number of records in database
135    
136     my $size = $isis->size;
137    
138     =cut
139    
140     sub size {
141     my $self = shift;
142     return $self->{size};
143     }
144 dpavlin 1100
145     sub default_encoding { 'UTF-16' }
146    
147 dpavlin 498 =head1 AUTHOR
148    
149     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
150    
151     =head1 COPYRIGHT & LICENSE
152    
153     Copyright 2005-2006 Dobrica Pavlinusic, All Rights Reserved.
154    
155     This program is free software; you can redistribute it and/or modify it
156     under the same terms as Perl itself.
157    
158     =cut
159    
160     1; # End of WebPAC::Input::Excel

  ViewVC Help
Powered by ViewVC 1.1.26