/[Frey]/trunk/lib/DBD/RAM.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/DBD/RAM.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 334 - (hide annotations)
Sat Nov 8 22:15:15 2008 UTC (15 years, 6 months ago) by dpavlin
File size: 78393 byte(s)
import DBD::RAM for cleanup (and rename) patched with
http://rt.cpan.org/Public/Bug/Display.html?id=33882

We could and probably will use DBD::AnyData as source for
data. However, at this stage, it makes much more sense to
first support SQL queries over Frey objects and DBD::RAM seems like smaller
codebase to start with.
1 dpavlin 334 #########################################################################
2     #
3     # DBD::RAM - a DBI driver for files and data structures
4     #
5     # This module is copyright (c), 2000 by Jeff Zucker
6     # All rights reserved.
7     #
8     # This is free software. You may distribute it under
9     # the same terms as Perl itself as specified in the
10     # Perl README file.
11     #
12     # WARNING: no warranty of any kind is implied.
13     #
14     # To learn more: enter "perldoc DBD::RAM" at the command prompt,
15     # or search in this file for =head1 and read the text below it
16     #
17     #########################################################################
18    
19     package DBD::RAM;
20    
21     use strict;
22     require DBD::File;
23     require SQL::Statement;
24     require SQL::Eval;
25     use IO::File;
26    
27     use vars qw($VERSION $err $errstr $sqlstate $drh $ramdata);
28    
29     use base qw(DBD::File);
30    
31     $VERSION = '0.07';
32    
33     $err = 0; # holds error code for DBI::err
34     $errstr = ""; # holds error string for DBI::errstr
35     $sqlstate = ""; # holds SQL state for DBI::state
36     $drh = undef; # holds driver handle once initialized
37    
38     #sub driver {
39     # return $drh if $drh; # already created - return same one
40     # my($class, $attr) = @_;
41     # $class .= "::dr";
42     # $drh = DBI::_new_drh($class, {
43     # 'Name' => 'RAM',
44     # 'Version' => $VERSION,
45     # 'Err' => \$DBD::RAM::err,
46     # 'Errstr' => \$DBD::RAM::errstr,
47     # 'State' => \$DBD::RAM::sqlstate,
48     # 'Attribution' => 'DBD::RAM by Jeff Zucker',
49     # });
50     # return $drh;
51     #}
52    
53     package DBD::RAM::dr; # ====== DRIVER ======
54    
55     $DBD::RAM::dr::imp_data_size = 0;
56    
57     use base qw(DBD::File::dr);
58    
59     sub connect {
60     my($drh, $dbname, $user, $auth, $attr)= @_;
61     my $dbh = DBI::_new_dbh($drh, {
62     Name => $dbname,
63     USER => $user,
64     CURRENT_USER => $user,
65     });
66     # PARSE EXTRA STRINGS IN DSN HERE
67     # Process attributes from the DSN; we assume ODBC syntax
68     # here, that is, the DSN looks like var1=val1;...;varN=valN
69     my $var;
70     foreach $var (split(/;/, $dbname)) {
71     if ($var =~ /(.*?)=(.*)/) {
72     my $key = $1;
73     my $val = $2;
74     $dbh->STORE($key, $val);
75     }
76     }
77     $dbh->STORE('f_dir','./') if !$dbh->{f_dir};
78     # use Data::Dumper; die Dumper $DBD::RAM::ramdata if $DBD::RAM::ramdata;
79     $dbh;
80     }
81    
82     sub data_sources {}
83    
84     sub disconnect_all{ $DBD::RAM::ramdata = {};}
85    
86     sub DESTROY { $DBD::RAM::ramdata = {};}
87    
88    
89     package DBD::RAM::db; # ====== DATABASE ======
90    
91     $DBD::RAM::db::imp_data_size = 0;
92    
93     use base qw(DBD::File::db);
94    
95     sub disconnect{ $DBD::RAM::ramdata = {};}
96    
97     # DRIVER PRIVATE METHODS
98    
99     sub clear {
100     my $dbh = shift;
101     my $tname = shift;
102     my $r = $DBD::RAM::ramdata;
103     if ( $tname && $r->{$tname} ) {
104     delete $r->{$tname} if $tname && $r->{$tname};
105     }
106     else {
107     $DBD::RAM::ramdata = {};
108     }
109    
110     }
111    
112     sub dump {
113     my $dbh = shift;
114     my $sql = shift;
115     my $txt;
116     my $sth = $dbh->prepare($sql) or die $dbh->errstr;
117     # use Data::Dumper; $Data::Dumper::Indent=0; print Dumper $sth;
118     $sth->execute or die $sth->errstr;
119     my @col_names = @{$sth->{NAME}};
120     $txt .= "<";
121     for (@col_names) {
122     $txt .= "$_,";
123     }
124     $txt =~ s/,$//;
125     $txt .= ">\n";
126     while (my @row = $sth->fetchrow_array) {
127     for (@row) {
128     $_ ||= '';
129     s/^\s*//;
130     s/\s*$//;
131     $txt .= "[$_] ";
132     }
133     $txt .= "\n";
134     }
135     return $txt;
136     }
137    
138     sub get_catalog {
139     my $self = shift;
140     my $tname = shift || '';
141     my $catalog = $DBD::RAM::ramdata->{catalog}{$tname} || {};
142     $catalog->{f_type} ||= '';
143     $catalog->{r_type} ||= $catalog->{f_type};
144     $catalog->{f_name} ||= '';
145     $catalog->{pattern} ||= '';
146     $catalog->{col_names} ||= '';
147     $catalog->{eol} ||= "\n";
148     return $catalog;
149     }
150    
151     sub catalog {
152     my $dbh = shift;
153     my $table_info = shift;
154     if (!$table_info) {
155     my @tables = (keys %{$DBD::RAM::ramdata->{catalog}} );
156     my @all_tables;
157     for (@tables) {
158     push @all_tables,[
159     $_,
160     $DBD::RAM::ramdata->{catalog}{$_}{f_type},
161     $DBD::RAM::ramdata->{catalog}{$_}{f_name},
162     $DBD::RAM::ramdata->{catalog}{$_}{pattern},
163     $DBD::RAM::ramdata->{catalog}{$_}{sep_char},
164     $DBD::RAM::ramdata->{catalog}{$_}{eol},
165     $DBD::RAM::ramdata->{catalog}{$_}{col_names},
166     $DBD::RAM::ramdata->{catalog}{$_}{read_sub},
167     $DBD::RAM::ramdata->{catalog}{$_}{write_sub}];
168     }
169     return @all_tables;
170     }
171     for (@{$table_info}) {
172     my($table_name,$f_type,$f_name,$hash);
173     if (ref $_ eq 'ARRAY') {
174     ($table_name,$f_type,$f_name,$hash) = @{$_};
175     }
176     if (ref $_ eq 'HASH') {
177     $table_name = $_->{table_name} || die "catlog() requires a table_name";
178     $f_type = $_->{data_type} || 'CSV';
179     $f_name = $_->{file_source} || '';
180     $hash = $_;
181     }
182     $hash->{r_type} = $f_type;
183     if ($f_type eq 'FIXED') { $f_type = 'FIX'; }
184     if ($f_type eq 'PIPE'){
185     $hash->{sep_char}='\s*\|\s*';
186     $hash->{wsep_char}='|';
187     $f_type = 'CSV';
188     }
189     if ($f_type eq 'TAB' ){
190     $hash->{sep_char}="\t";
191     $f_type = 'CSV';
192     }
193     if ($f_type eq 'INI' ){
194     $hash->{sep_char}='=';
195     }
196     $DBD::RAM::ramdata->{catalog}{$table_name}{f_type} = uc $f_type || '';
197     $DBD::RAM::ramdata->{catalog}{$table_name}{f_name} = $f_name || '';
198     if ($hash) {
199     for(keys %{$hash}) {
200     next if /table_name/;
201     next if /data_type/;
202     next if /file_source/;
203     $DBD::RAM::ramdata->{catalog}{$table_name}{$_}=$hash->{$_};
204     }
205     }
206     $DBD::RAM::ramdata->{catalog}{$table_name}{eol} ||= "\n";
207     }
208     }
209    
210     sub get_table_name {
211     my $dbh = shift;
212     my @tables = (keys %{$DBD::RAM::ramdata} );
213     if (!$tables[0]) { return 'table1'; }
214     my $next=0;
215     for my $table(@tables) {
216     if ($table =~ /^table(\d+)/ ) {
217     $next = $1 if $1 > $next;
218     }
219     }
220     $next++;
221     return("table$next");
222     }
223    
224     sub export() {
225     my $dbh = shift;
226     my $args = shift || die "No arguments for export()\n";
227     my $msg = "export() requires ";
228     my $sql = $args->{data_source} || die $msg . '{data_source => $}';
229     my $f_name = $args->{data_target} || die 'export requires {data_target => $f}';
230     my $f_type = $args->{data_type} || die 'export requires {data_type => $d}';
231     if ($f_type eq 'XML') { return &export_xml($dbh,$args); }
232     my $temp_table = 'temp__';
233     $dbh->func( [[$temp_table,$f_type,$f_name,$args]],'catalog');
234     my $sth1 = $dbh->prepare($sql);
235     $sth1->execute or die $DBI::errstr;
236     my @col_names = @{$sth1->{NAME}};
237     my $sth2 = &prep_insert( $dbh, $temp_table, @col_names );
238     while (my @row = $sth1->fetchrow_array) {
239     $sth2->execute(@row);
240     }
241     delete $DBD::RAM::ramdata->{catalog}{$temp_table};
242     }
243    
244     sub export_xml() {
245     my $dbh = shift;
246     my $args = shift;
247     my $msg = "Export to XML requires ";
248     my $sql = $args->{data_source} || die $msg . '{data_source => $}';
249     my $f_name = $args->{data_target} || die $msg . '{data_target => $f}';
250     my $f_type = $args->{data_type} || die $msg . '{data_type => $d}';
251     my $record_tag = $args->{record_tag} || die $msg . '{record_tag => $r}';
252     my $header = $args->{header} || '';
253     my($head,$item,$foot) = &prep_xml_export($header,$record_tag);
254     $f_name = $dbh->{f_dir} . '/' .$f_name;
255     $f_name =~ s#//#/#g;
256     open(O,">$f_name") || die "Couldn't write to $f_name: $!\n";
257     print O $head, "\n";
258     my $sth = $dbh->prepare($sql);
259     $sth->execute;
260     my @col_names = @{$sth->{NAME}};
261     while (my @row = $sth->fetchrow_array) {
262     print O "<$item>\n";
263     my $i=0;
264     for (@row) {
265     next unless $row[$i];
266     print O " <$col_names[$i]>";
267     print O "$row[$i]";
268     print O "</$col_names[$i]>\n";
269     $i++;
270     }
271     print O "</$item>\n\n";
272     }
273     print O $foot;
274     close O || die "Couldn't write to $f_name: $!\n";
275     }
276    
277     sub prep_xml_export {
278     my $header = shift || qq{<?xml version="1.0" ?>\n};
279     my $record_tag = shift;
280     my @tag_starts = split ' ', $record_tag;
281     my $terminal_tag = pop @tag_starts;
282     my @tag_ends = map("</$_>\n",reverse @tag_starts);
283     @tag_starts = map("<$_>\n",@tag_starts);
284     for (@tag_starts) { $header .= $_; }
285     #print " <$terminal_tag>\n";
286     my $footer;
287     for (@tag_ends) { $footer .= $_; }
288     return($header,$terminal_tag,$footer);
289     }
290    
291     sub convert() {
292     my $dbh = shift;
293     my $specs = shift;
294     my $source_type = $specs->{source_type} || '';
295     my $source_file = $specs->{source_file} || '';
296     my $source_params = $specs->{source_params} || '';
297     my $target_type = $specs->{target_type} || '';
298     my $target_file = $specs->{target_file} || '';
299     my $temp_table = 'temp__';
300     my($dbh2,$sth1);
301     $dbh->func( [
302     ["${temp_table}2",$target_type,$target_file,$source_params],
303     ],'catalog');
304     if ($source_type eq 'DBI' ) {
305     my @con_ary = @{$source_params->{connection_ary}};
306     my $table = $source_params->{table};
307     $dbh2 = DBI->connect( @con_ary );
308     $sth1 = $dbh2->prepare("SELECT * FROM $table");
309     }
310     else {
311     $dbh->func( [
312     ["${temp_table}1",$source_type,$source_file,$source_params],
313     ],'catalog');
314     $sth1 = $dbh->prepare("SELECT * FROM ${temp_table}1");
315     }
316     $sth1->execute;
317     my @col_names = @{$sth1->{NAME}};
318     my $sth2 = &prep_insert( $dbh, "${temp_table}2", @col_names );
319     while (my @row = $sth1->fetchrow_array) {
320     $sth2->execute(@row);
321     }
322     if ($source_type eq 'DBI' ) { $dbh2->disconnect; }
323     }
324    
325    
326     sub import() {
327     my $dbh = shift;
328     my $specs = shift;
329     my $data = shift;
330     if ($specs && ! $data ) {
331     if (ref $specs eq 'ARRAY' ) {
332     $data = $specs; $specs = {};
333     }
334     else {
335     $data = [];
336     }
337     }
338     if (ref $specs ne 'HASH') {
339     die 'First argument to "import" must be a hashref.';
340     }
341     if (ref $data ne 'ARRAY') {
342     die 'Second argument to "import" must be an arrayref.';
343     }
344     my $data_type = uc $specs->{data_type} || 'CSV';
345     my $table_name = $specs->{table_name} || $dbh->func('get_table_name');
346     my $col_names = $specs->{col_names} || '';
347     my $pattern = $specs->{pattern} || '';
348     my $read_sub = $specs->{read_sub} || '';
349     my $write_sub = $specs->{write_sub} || '';
350     my $data_source = $specs->{data_source} || '';
351     my $file_source = $specs->{file_source} || '';
352     my $remote_source = $specs->{remote_source} || '';
353     my $sep_char = $specs->{sep_char} || '';
354     my $eol = $specs->{eol} || "\n";
355     $DBD::RAM::ramdata->{catalog}{$table_name}->{r_type} = $data_type;
356     if ($data_type eq 'FIXED'){ $data_type = 'FIX'; }
357     if ($data_type eq 'PIPE') { $sep_char = '\s*\|\s*'; $data_type = 'CSV'; }
358     if ($data_type eq 'TAB' ) { $sep_char = "\t"; $data_type = 'CSV'; }
359     $DBD::RAM::ramdata->{catalog}{$table_name}->{sep_char} = $sep_char if $sep_char;
360     $DBD::RAM::ramdata->{catalog}{$table_name}->{eol} = $eol if $eol;
361     $DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern if $pattern;
362     $DBD::RAM::ramdata->{catalog}{$table_name}->{read_sub} = $read_sub if $read_sub;
363     $DBD::RAM::ramdata->{catalog}{$table_name}->{write_sub} = $write_sub if $write_sub;
364     if ($data_type eq 'MP3' ) {
365     $data_type = 'FIX';
366     $col_names = 'file_name,song_name,artist,album,year,comment,genre',
367     $pattern = 'A255 A30 A30 A30 A4 A30 A50',
368     $DBD::RAM::ramdata->{catalog}{$table_name}->{pattern} = $pattern;
369     $data = &get_music_library( $specs )
370     }
371     if ($data_type eq 'XML' ) {
372     $data = $dbh->func( $specs, $table_name, 'get_xml_db' );
373     return 1;
374     }
375     ####################################################################
376     # DATA SOURCE
377     ####################################################################
378     #
379     # DATA FROM REMOTE FILE
380     #
381     if ($remote_source) {
382     $data = $dbh->func($remote_source,'get_remote_data') or return undef;
383     $data = [split("\n",$data)]; # turn string into arrayref
384     }
385     #
386     # DATA FROM LOCAL FILE
387     #
388     if ($file_source) {
389     $data = &get_file_data($dbh,$file_source);
390     $data = [split("\n",$data)]; # turn string into arrayref
391     }
392     #
393     # DATA FROM DATA STRUCTURE
394     #
395     if ($data_source) {
396     $data = $data_source;
397     }
398     my @col_names;
399     if ($data_type eq 'DBI' ) {
400     @col_names = @{$data->{NAME}};
401     my $sth_new = &prep_insert( $dbh, $table_name, @col_names );
402     while (my @datarow = $data->fetchrow_array) {
403     $sth_new->execute(@datarow);
404     }
405     die "No data in table $table_name!"
406     unless $DBD::RAM::ramdata->{$table_name}->{DATA};
407     return 1;
408     }
409     ####################################################################
410     # GET COLUMN NAMES
411     ####################################################################
412     if (!ref $data) { my @tmp = split ( /$eol/m, $data ); $data = \@tmp; }
413     my $first_line;
414     if ($col_names eq 'first_line'
415     && $data_type ne 'HASH' ) { $first_line = shift @{$data}; }
416     else { $first_line = @{$data}->[0]; }
417     @col_names = $dbh->func(
418     $table_name,$data_type,$col_names,$first_line,
419     'get_column_names');
420     ####################################################################
421     # CREATE TABLE & PREPARE INSERT STATEMENT
422     ####################################################################
423     my $sth = &prep_insert( $dbh, $table_name, @col_names );
424    
425     ####################################################################
426     # INSERT DATA INTO TABLE
427     ####################################################################
428     if ('CSV FIX INI ARRAY HASH USR' =~ /$data_type/ ) {
429     for ( @{$data} ) {
430     my @datarow;
431     if ( $data_type eq 'HASH') {
432     my %rowhash = %{$_};
433     for (@col_names) {
434     my $val = $rowhash{$_} || '';
435     push @datarow, $val;
436     }
437     }
438     else {
439     @datarow = $dbh->func($_,$table_name,$data_type,'read_fields');
440     }
441     $sth->execute(@datarow);
442     }
443     }
444     die "No data in table $table_name!" unless $DBD::RAM::ramdata->{$table_name}->{DATA};
445     $DBD::RAM::ramdata->{$table_name}->{data_type} = $data_type;
446     $DBD::RAM::ramdata->{$table_name}->{pattern} = $pattern;
447     $DBD::RAM::ramdata->{$table_name}->{read_sub} = $read_sub;
448     $DBD::RAM::ramdata->{$table_name}->{write_sub} = $write_sub;
449     return 1;
450     }
451    
452     ####################################################################
453     # COLUMN NAMES
454     ####################################################################
455     sub get_column_names {
456     my($dbh,$table_name,$data_type,$col_names,$first_line) = @_;
457     my $catalog = $DBD::RAM::ramdata->{catalog}{$table_name};
458     my $pattern = $catalog->{pattern} || '';
459     my $read_sub = $catalog->{read_sub} || '';
460     my($colstr,@col_names,$num_params);
461     $colstr = '';
462     #
463     # COLUMN NAMES FROM FIRST LINE OF DATA
464     #
465     if ( $col_names eq 'first_line' && $data_type ne 'HASH' ) {
466     @col_names = $dbh->func(
467     $first_line,$table_name,$data_type,'read_fields');
468     $num_params = scalar @col_names;
469     }
470     #
471     # COLUMN NAMES FROM USER-SUPPLIED LIST
472     #
473     if ( $col_names && $col_names ne 'first_line' ) {
474     $col_names =~ s/\s+//g;
475     @col_names = split /,/,$col_names;
476     $num_params = scalar @col_names;
477     }
478     #
479     # AUTOMATICALLY ASSIGNED COLUMN NAMES
480     #
481     if ( $data_type eq 'HASH' && !$num_params ) {
482     @col_names = keys %{$first_line};
483     $num_params = scalar @col_names;
484     }
485     if ( !$num_params ) {
486     if ( $data_type eq 'INI' ) {
487     $num_params = 2;
488     }
489     if ( $data_type eq 'FIX' ) {
490     my @x = split /\s+/,$pattern;
491     $num_params = scalar @x;
492     }
493     if ( $data_type eq 'CSV' or $data_type eq 'USR' ) {
494     my @colAry = $dbh->func(
495     $first_line,$table_name,$data_type,'read_fields');
496     $num_params = scalar @colAry;
497     }
498     $num_params = scalar @{ $first_line } if
499     !$num_params && ref $first_line eq 'ARRAY';
500     die "Couldn't find column names!" if !$num_params;
501     for ( 1 .. $num_params ) { push(@col_names,"col$_"); }
502     }
503     return @col_names;
504     }
505    
506     sub prep_insert {
507     my( $dbh, $table_name, @col_names ) = @_;
508     my($colstr,$num_params);
509     for ( @col_names ) { $colstr .= $_ . ' TEXT,'; }
510     $colstr =~ s/,$//;
511     my $create_stmt = "CREATE TABLE $table_name ($colstr)";
512     my $param_str = (join ",", ("?") x @col_names);
513     my $insert_stmt = "INSERT INTO $table_name VALUES ($param_str)";
514     $dbh->do($create_stmt);
515     my $sth = $dbh->prepare($insert_stmt);
516     }
517    
518    
519     sub get_remote_data {
520     my $dbh = shift;
521     my $remote_source = shift;
522     undef $@;
523     eval{ require 'LWP/UserAgent.pm'; };
524     die "LWP module not found! $@" if $@;
525     my $ua = new LWP::UserAgent;
526     my $req = new HTTP::Request GET => $remote_source;
527     my $res = $ua->request($req);
528     die "[$remote_source] : " . $res->message if !$res->is_success;
529     my $data = $res->content;
530     return $data;
531     }
532    
533     sub get_file_data {
534     my $dbh = shift;
535     my $file_source = shift;
536     $file_source = $dbh->{f_dir} . '/' .$file_source;
537     $file_source =~ s#//#/#g;
538     open(I,$file_source) || die "[$file_source]: $!\n";
539     local $/ = undef;
540     my $data = <I>;
541     close(I) || die "$file_source: $!\n";
542     return $data;
543     }
544    
545     sub get_xml_db {
546     # Hat tip to Randal Schwartz for the XML/LWP stuff
547     my($dbh,$specs,$table_name) = @_;
548     my $remote_source = $specs->{remote_source} || '';
549     my $file_source = $specs->{file_source} || '';
550     my $data_source = $specs->{data_source} || '';
551     my $record_tag = $specs->{record_tag} || '';
552     my $col_tags = $specs->{col_tags} || '';
553     my $fold_col = $specs->{fold_col} || '';
554     my $col_mapping = $specs->{col_mapping} || '';
555     my $col_names = $specs->{col_names} || '';
556     my $read_sub = $specs->{read_sub} || '';
557     my $attr = $specs->{attr} || '';
558     my $data;
559     my @columns;
560     if (ref $col_names ne 'ARRAY') { $col_names = [split ',',$col_names]; }
561     for ( @{$col_names} ) {
562     if ($_ =~ /^\[(.*)\]$/ ) {
563     my @newCols = split ',', $1;
564     for (@newCols) { push @columns, $_; }
565     }
566     else {
567     push @columns, $_;
568     }
569     }
570     my $colstr;
571     for ( @columns ) { $colstr .= $_ . ' TEXT,'; }
572     $colstr =~ s/,$//;
573     my $sql = "CREATE TABLE $table_name ($colstr)";
574     $dbh->do($sql) || die DBI::errstr, " : $sql";
575     $DBD::RAM::ramdata->{$table_name}->{data_type} = 'XML';
576     if ($remote_source){$data = $dbh->func($remote_source,'get_remote_data') or die; }
577     if ($file_source) { $data = &get_file_data($dbh,$file_source); }
578     if ($data_source) { $data = $data_source; }
579     die "No file or data source supplied!" unless $data;
580     my $insert = $dbh->prepare("INSERT INTO $table_name (".
581     (join ", ", @columns).
582     ") VALUES (".
583     (join ",", ("?") x @columns).")");
584     My_XML_Parser::doParse($data, $insert, $record_tag,
585     $col_names, $col_mapping,$fold_col,$attr,$read_sub);
586     #use Data::Dumper; print Dumper $DBD::RAM::ramdata; exit;
587     }
588    
589     sub read_fields {
590     my $dbh = shift;
591     my $str = shift;
592     my $tname = shift;
593     my $type = uc shift;
594     my $catalog = $dbh->func($tname,'get_catalog');
595     if ($type eq 'ARRAY') {
596     return @{$str};
597     }
598     chomp $str;
599     if ($type eq 'CSV') {
600     my $sep_char = $catalog->{sep_char} || ',';
601     #my @fields = Text::ParseWords::parse_line( $sep_char, 0, $str );
602     my @fields = &csv2ary( $sep_char, $str );
603     return @fields;
604     }
605     if ($type eq 'USR') {
606     my $read_sub = $catalog->{read_sub} || die "USR Type requires read_sub routine!\n";
607     return &$read_sub($str);
608     }
609     if ($type eq 'FIX') {
610     return unpack $catalog->{pattern}, $str;
611     }
612     if ($type eq 'INI') {
613     if ( $str =~ /^([^=]+)=(.*)/ ) {
614     my @fields = ($1,$2);
615     return @fields;
616     }
617     }
618     if ($type eq 'XML') {
619     my @fields;
620     $str =~ s#<[^>]*>([^<]*)<[^>]*>#
621     my $x = $1 || '';
622     push @fields, $x;
623     #ge;
624     return @fields;
625     }
626     return ();
627     }
628    
629     sub ary2csv {
630     my($field_sep,$record_sep,@ary)=@_;
631     my $field_rsep = quotemeta($field_sep);
632     my $str='';
633     for (@ary) {
634     $_ = '' if !defined $_;
635     if ($field_sep eq ',') {
636     s/"/""/g;
637     s/^(.*)$/"$1"/s if /,/ or /\n/s or /"/;
638     }
639     $str .= $_ . $field_sep;
640     }
641     $str =~ s/$field_rsep$/$record_sep/;
642     return $str;
643     }
644    
645     sub csv2ary {
646     my($field_sep,$str)=@_;
647     # chomp $str;
648     #$str =~ s/[\015\012]//g;
649     $str =~ tr/\015\012//d;
650     if ($field_sep ne ',') {
651     #$field_sep = quotemeta($field_sep); LEFT UP TO USER TO DO
652     return split($field_sep, $str);
653     }
654     $str =~ s/""/\\"/g;
655     my @new = ();
656     push(@new, $+ ) while $str =~ m{
657     "([^\"\\]*(?:\\.[^\"\\]*)*)"$field_sep?
658     | ([^$field_sep]+)$field_sep?
659     | $field_sep
660     }gx;
661     push(@new, undef) if substr($str,-1,1) eq $field_sep;
662     @new = map {my $x=$_; $x = '' if !defined $x; $x =~ s/\\"/"/g; $x;} @new;
663     return @new;
664     }
665    
666     sub write_fields {
667     my($dbh,$fields,$tname,$type) = @_;
668     my $catalog = $dbh->func($tname,'get_catalog');
669     my $sep = $catalog->{sep_char} || ',';
670     my $wsep = $catalog->{wsep_char} || $sep;
671     my $fieldNum =0;
672     my $fieldStr = $catalog->{pattern} || '';
673     $fieldStr =~ s/a//gi;
674     my @fieldLengths = split / /, $fieldStr;
675     $fieldStr = '';
676     if( $catalog->{f_type} eq 'USR' ) {
677     my $write_sub = $catalog->{write_sub} || die "Requires write_sub!\n";
678     my $fieldStr = &$write_sub(@{$fields});
679     return $fieldStr;
680     }
681     if( $catalog->{f_type} eq 'XML' ) {
682     my @col_names = split ',',$catalog->{col_names};
683     my $count =0;
684     for (@col_names) {
685     $fieldStr .= "<$_>$fields->[$count]</$_>";
686     $count++;
687     }
688     return $fieldStr;
689     }
690     for(@$fields) {
691     # PAD OR TRUNCATE DATA TO FIT WITHIN FIELD LENGTHS
692     if( $catalog->{f_type} eq 'FIX' ) {
693     my $oldLen = length $_;
694     my $newLen = $fieldLengths[$fieldNum];
695     if ($oldLen < $newLen) { $_ = sprintf "%-${newLen}s",$_; }
696     if ($oldLen > $newLen) { $_ = substr $_, 0, $newLen; }
697     $fieldNum++;
698     }
699     my $newCol = $_;
700     if( $catalog->{f_type} eq 'CSV' ) {
701     if ($newCol =~ /$sep/ ) {
702     $newCol =~ s/\042/\\\042/go;
703     $newCol = qq{"$newCol"};
704     }
705     $fieldStr .= $newCol . $wsep;
706     }
707     else { $fieldStr .= $newCol; }
708     if( $catalog->{f_type} eq 'INI' ) { $fieldStr .= '='; }
709     }
710     if( $catalog->{f_type} eq 'CSV' ) { $fieldStr =~ s/$sep$//; }
711     if( $catalog->{f_type} eq 'INI' ) { $fieldStr =~ s/=$//; }
712     return $fieldStr;
713     }
714    
715     sub get_music_library {
716     my $specs = shift;
717     my @dirs = @{$specs->{dirs}};
718     my @db;
719     for my $dir(@dirs) {
720     my @files = get_music_dir( $dir );
721     for my $fname(@files) {
722     push @db, &get_mp3_tag($fname)
723     }
724     }
725     return \@db;
726     }
727    
728     sub get_music_dir {
729     my $dir = shift;
730     opendir(D,$dir) || print "$dir: $!\n";
731     return '' if $!;
732     my @files = grep /mp3$/i, readdir D;
733     @files = map ( $_ = $dir . $_, @files);
734     closedir(D) || print "Couldn't read '$dir':$!";
735     return @files;
736     }
737    
738     sub get_mp3_tag {
739     my($file) = shift;
740     open(I,$file) || return '';
741     binmode I;
742     local $/ = '';
743     seek I, -128, 2;
744     my $str = <I> || '';
745     return '' if !($str =~ /^TAG/);
746     $file = sprintf("%-255s",$file);
747     $str =~ s/^TAG(.*)/$file$1/;
748     my $genre = $str;
749     $genre =~ s/^.*(.)$/$1/g;
750     $str =~ s/(.)$//g;
751     $genre = unpack( 'C', $genre );
752     my @genres =("Blues", "Classic Rock", "Country", "Dance", "Disco", "Funk", "Grunge", "Hip-Hop", "Jazz", "Metal", "New Age", "Oldies", "Other", "Pop", "R&B", "Rap", "Reggae", "Rock", "Techno", "Industrial", "Alternative", "Ska", "Death Metal", "Pranks", "Soundtrack", "Eurotechno", "Ambient", "Trip-Hop", "Vocal", "Jazz+Funk", "Fusion", "Trance", "Classical", "Instrumental", "Acid", "House", "Game", "Sound Clip", "Gospel", "Noise", "Alternative Rock", "Bass", "Soul", "Punk", "Space", "Meditative", "Instrumental Pop", "Instrumental Rock", "Ethnic", "Gothic", "Darkwave", "Techno-Industrial", "Electronic", "Pop-Folk", "Eurodance", "Dream", "Southern Rock", "Comedy", "Cult", "Gangsta", "Top 40", "Christian Rap", "Pop/Funk", "Jungle", "Native American", "Cabaret", "New Wave", "Psychadelic", "Rave", "Show Tunes", "Trailer", "Lo-Fi", "Tribal", "Acid Punk", "Acid Jazz", "Polka", "Retro", "Musical", "Rock & Roll", "Hard Rock", "Folk", "Folk/Rock", "National Folk", "Swing", "Fast-Fusion", "Bebop", "Latin", "Revival", "Celtic", "Bluegrass", "Avantgarde", "Gothic Rock", "Progressive Rock", "Psychedelic Rock", "Symphonic Rock", "Slow Rock", "Big Band", "Chorus", "Easy Listening", "Acoustic", "Humour", "Speech", "Chanson", "Opera", "Chamber Music", "Sonata", "Symphony", "Booty Bass", "Primus", "Porn Groove", "Satire", "Slow Jam", "Club", "Tango", "Samba", "Folklore", "Ballad", "Power Ballad", "Rhytmic Soul", "Freestyle", "Duet", "Punk Rock", "Drum Solo", "Acapella", "Euro-House", "Dance Hall", "Goa", "Drum & Bass", "Club-House", "Hardcore", "Terror", "Indie", "BritPop", "Negerpunk", "Polsk Punk", "Beat", "Christian Gangsta Rap", "Heavy Metal", "Black Metal", "Crossover", "Contemporary Christian", "Christian Rock", "Unknown");
753     $genre = $genres[$genre] || '';
754     $str .= $genre . "\n";
755     return $str;
756     }
757    
758    
759     # END OF DRIVER PRIVATE METHODS
760    
761     sub table_info ($) {
762     my($dbh) = @_;
763     my @tables;
764     for (keys %{$DBD::RAM::ramdata} ) {
765     push(@tables, [undef, undef, $_, "TABLE", undef]);
766     }
767     my $names = ['TABLE_QUALIFIER', 'TABLE_OWNER', 'TABLE_NAME',
768     'TABLE_TYPE', 'REMARKS'];
769     my $dbh2 = $dbh->{'csv_sponge_driver'};
770     if (!$dbh2) {
771     $dbh2 = $dbh->{'csv_sponge_driver'} = DBI->connect("DBI:Sponge:");
772     if (!$dbh2) {
773     DBI::set_err($dbh, 1, $DBI::errstr);
774     return undef;
775     }
776     }
777    
778     # Temporary kludge: DBD::Sponge dies if @tables is empty. :-(
779     return undef if !@tables;
780    
781     my $sth = $dbh2->prepare("TABLE_INFO", { 'rows' => \@tables,
782     'NAMES' => $names });
783     if (!$sth) {
784     DBI::set_err($dbh, 1, $dbh2->errstr());
785     }
786     $sth;
787     }
788    
789     sub DESTROY { $DBD::RAM::ramdata = {};}
790    
791     package DBD::RAM::st; # ====== STATEMENT ======
792    
793     $DBD::RAM::st::imp_data_size = 0;
794     @DBD::RAM::st::ISA = qw(DBD::File::st);
795    
796    
797     package DBD::RAM::Statement;
798    
799     #@DBD::RAM::Statement::ISA = qw(SQL::Statement);
800     @DBD::RAM::Statement::ISA = qw(SQL::Statement DBD::File::Statement);
801     #@DBD::RAM::Statement::ISA = qw(DBD::File::Statement);
802    
803     sub open_table ($$$$$) {
804     my($self, $data, $tname, $createMode, $lockMode) = @_;
805     my($table);
806     my $dbh = $data->{Database};
807     my $catalog = $dbh->func($tname,'get_catalog');
808     my $ftype = $catalog->{f_type} || '';
809     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
810     if ($createMode && !($DBD::RAM::ramdata->{$tname}) ) {
811     if (exists($data->{$tname})) {
812     die "A table $tname already exists";
813     }
814     $table = $data->{$tname} = { 'DATA' => [],
815     'CURRENT_ROW' => 0,
816     'NAME' => $tname,
817     };
818     bless($table, ref($self) . "::Table");
819     $DBD::RAM::ramdata->{$tname} = $table;
820     return $table;
821     }
822     else {
823     $table = $DBD::RAM::ramdata->{$tname};
824     die "No such table $tname" unless $table;
825     $table->{'CURRENT_ROW'} = 0;
826     return $table;
827     }
828     }
829     else {
830     my $file_name = $catalog->{f_name} || $tname;
831     $table = $self->SUPER::open_table(
832     $data, $file_name, $createMode, $lockMode
833     );
834     my $fh = $table->{'fh'};
835     my $col_names = $catalog->{col_names} || '';
836     my @col_names = ();
837     if (!$createMode) {
838     my $first_line = $fh->getline || '';
839     #$first_line =~ s/[\015\012]//g;
840     $first_line =~ tr/\015\012//d;
841     @col_names = $dbh->func(
842     $tname,$ftype,$col_names,$first_line,
843     'get_column_names');
844     }
845     if ($col_names eq 'first_line' && !$createMode) {
846     $table->{first_row_pos} = $fh->tell();
847     }
848     else {
849     seek $fh,0,0;
850     }
851     my $count = 0;
852     my %col_nums;
853     for (@col_names) { next unless $_; $col_nums{$_} = $count; $count++; }
854     $table->{col_names} = \@col_names;
855     $table->{col_nums} = \%col_nums;
856     $table->{'CURRENT_ROW'} = 0;
857     $table->{NAME} = $tname;
858     $table;
859     }
860     }
861    
862     package DBD::RAM::Statement::Table;
863    
864     @DBD::RAM::Statement::Table::ISA = qw(DBD::RAM::Table);
865    
866     package DBD::RAM::Table;
867    
868     #@DBD::RAM::Table::ISA = qw(SQL::Eval::Table);
869     #@DBD::RAM::Statement::Table::ISA = qw(SQL::Eval::Table DBD::File::Table);
870     use base qw(DBD::File::Table);
871    
872     ##################################
873     # fetch_row()
874     # CALLED WITH "SELECT ... FETCH"
875     ##################################
876     sub fetch_row ($$$) {
877     my($self, $data, $row) = @_;
878     my $dbh = $data->{Database};
879     my $tname = $self->{NAME};
880     my $catalog = $dbh->func($tname,'get_catalog');
881     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
882     my($currentRow) = $self->{'CURRENT_ROW'};
883     if ($currentRow >= @{$self->{'DATA'}}) {
884     return undef;
885     }
886     $self->{'CURRENT_ROW'} = $currentRow+1;
887     $self->{'row'} = $self->{'DATA'}->[$currentRow];
888     return $self->{row};
889     }
890     else {
891     my $fields;
892     if (exists($self->{cached_row})) {
893     $fields = delete($self->{cached_row});
894     } else {
895     local $/ = $catalog->{eol} || "\n";
896     #$fields = $csv->getline($self->{'fh'});
897     my $fh = $self->{'fh'} ;
898     my $line = $fh->getline || return undef;
899     chomp $line;
900     @$fields = $dbh->func($line,$tname,$catalog->{f_type},'read_fields');
901     # @$fields = unpack $dbh->{pattern}, $line;
902     if ( $dbh->{ChopBlanks} ) {
903     @$fields = map($_=&trim($_),@$fields);
904     }
905     if (!$fields ) {
906     die "Error while reading file " . $self->{'file'} . ": $!" if $!;
907     return undef;
908     }
909     }
910     $self->{row} = (@$fields ? $fields : undef);
911     }
912     return $self->{row};
913     }
914    
915     sub trim { my $x=shift; $x =~ s/^\s+//; $x =~ s/\s+$//; $x; }
916    
917     ##############################
918     # push_names()
919     # CALLED WITH "CREATE TABLE"
920     ##############################
921     sub push_names ($$$) {
922     my($self, $data, $names) = @_;
923     my $dbh = $data->{Database};
924     my $tname = $self->{NAME};
925     my $catalog = $dbh->func($tname,'get_catalog');
926     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
927     $self->{'col_names'} = $names;
928     my($colNums) = {};
929     for (my $i = 0; $i < @$names; $i++) {
930     $colNums->{$names->[$i]} = $i;
931     }
932     $self->{'col_nums'} = $colNums;
933     }
934     elsif(!$catalog->{col_names}) {
935     my $fh = $self->{'fh'} ;
936     my $colStr=$dbh->func($names,$tname,$catalog->{f_type},'write_fields');
937     $colStr .= $catalog->{eol};
938     $fh->print($colStr);
939     }
940     }
941    
942     ################################
943     # push_rows()
944     # CALLED WITH "INSERT" & UPDATE
945     ################################
946     sub push_row ($$$) {
947     my($self, $data, $fields) = @_;
948     my $dbh = $data->{Database};
949     my $tname = $self->{NAME};
950     my $catalog = $dbh->func($tname,'get_catalog');
951     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
952     my($currentRow) = $self->{'CURRENT_ROW'};
953     $self->{'CURRENT_ROW'} = $currentRow+1;
954     $self->{'DATA'}->[$currentRow] = $fields;
955     return 1;
956     }
957     my $fh = $self->{'fh'};
958     #
959     # Remove undef from the right end of the fields, so that at least
960     # in these cases undef is returned from FetchRow
961     #
962     while (@$fields && !defined($fields->[$#$fields])) {
963     pop @$fields;
964     }
965     my $fieldStr=$dbh->func($fields,$tname,$catalog->{f_type},'write_fields');
966     $fh->print($fieldStr,$catalog->{eol});
967     1;
968     }
969    
970     sub seek ($$$$) {
971     my($self, $data, $pos, $whence) = @_;
972     my $dbh = $data->{Database};
973     my $tname = $self->{NAME};
974     my $catalog = $dbh->func($tname,'get_catalog');
975     if( $catalog->{f_type} && $catalog->{f_type} ne 'RAM' ) {
976     return DBD::File::Table::seek(
977     $self, $data, $pos, $whence
978     );
979     }
980     my($currentRow) = $self->{'CURRENT_ROW'};
981     if ($whence == 0) {
982     $currentRow = $pos;
983     } elsif ($whence == 1) {
984     $currentRow += $pos;
985     } elsif ($whence == 2) {
986     $currentRow = @{$self->{'DATA'}} + $pos;
987     } else {
988     die $self . "->seek: Illegal whence argument ($whence)";
989     }
990     if ($currentRow < 0) {
991     die "Illegal row number: $currentRow";
992     }
993     $self->{'CURRENT_ROW'} = $currentRow;
994     }
995    
996    
997     sub drop ($$) {
998     my($self, $data) = @_;
999     my $dbh = $data->{Database};
1000     my $tname = $self->{NAME};
1001     my $catalog = $dbh->func($tname,'get_catalog');
1002     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
1003     my $table_name = $self->{NAME} || return;
1004     delete $DBD::RAM::ramdata->{$table_name}
1005     if $DBD::RAM::ramdata->{$table_name};
1006     delete $data->{$table_name}
1007     if $data->{$table_name};
1008     return 1;
1009     }
1010     return DBD::File::Table::drop( $self );
1011     }
1012    
1013     ##################################
1014     # truncate()
1015     # CALLED WITH "DELETE" & "UPDATE"
1016     ##################################
1017     sub truncate ($$) {
1018     my($self, $data) = @_;
1019     my $dbh = $data->{Database};
1020     my $tname = $self->{NAME};
1021     my $catalog = $dbh->func($tname,'get_catalog');
1022     if( !$catalog->{f_type} || $catalog->{f_type} eq 'RAM' ) {
1023     $#{$self->{'DATA'}} = $self->{'CURRENT_ROW'} - 1;
1024     return 1;
1025     }
1026     return DBD::File::Table::truncate( $self, $data );
1027     }
1028    
1029     package My_XML_Parser;
1030     my @state;
1031     my %one_group_data;
1032     my $insert_handle;
1033     my $record_tag;
1034     my @columns;
1035     my %column_mapping;
1036     my $multi_field_count;
1037     my $fold_col;
1038     my $fold;
1039     my $fold_name;
1040     my %folds;
1041     my $read_sub;
1042    
1043     sub doParse {
1044     my $data = shift;
1045     $insert_handle = shift;
1046     $record_tag = shift;
1047     my $col_names = shift;
1048     my $col_map = shift || '';
1049     $fold_col = shift || {};
1050     my $attributes = shift || {};
1051     $read_sub = shift || '';
1052     if ($read_sub eq 'latin1' && !$attributes->{ProtocolEncoding} ) {
1053     $read_sub = \&utf8_to_latin1;
1054     $attributes->{ProtocolEncoding} = 'ISO-8859-1';
1055     }
1056     @columns = @{$col_names};
1057     if ($col_map) { %column_mapping = %{$col_map}; }
1058     else {%column_mapping = map{ $_ => $_ } @columns; }
1059     undef $@;
1060     eval{ require 'XML/Parser.pm' };
1061     die "XML::Parser module not found! $@" if $@;
1062     XML::Parser->new( Style => 'Stream', %{$attributes} )->parse($data);
1063     }
1064    
1065     sub StartTag {
1066     my ($parser, $type) = @_;
1067     my %attrs = %_;
1068     push @state, $type;
1069     if ("@state" eq $record_tag) {
1070     %one_group_data = ();
1071     $multi_field_count = 0;
1072     while (my($k,$v)=each %folds) {
1073     $one_group_data{$k} = $v if $v;
1074     }
1075     }
1076     for (keys %{$fold_col}) {
1077     my $state = "@state";
1078     next unless $_ =~ /^$state\^*/;
1079     my $fold_tag = $fold_col->{$_} if $fold_col && $fold_col->{$_};
1080     if ( $fold_tag ) {
1081     $fold_name = $column_mapping{$fold_tag} if $column_mapping{$fold_tag};
1082     $fold_name ||= $fold_tag;
1083     $fold = $attrs{$fold_tag} || '';
1084     $folds{$fold_name} = $fold;
1085     }
1086     }
1087     for (keys %attrs) {
1088     my $place = $column_mapping{$_};
1089     if (defined $place) {
1090     $one_group_data{$place} .= " " if $one_group_data{$place};
1091     $one_group_data{$place} .= &check_read( $attrs{$_} );
1092     }
1093     }
1094     }
1095    
1096     sub EndTag {
1097     my ($parser, $type) = @_;
1098     my $tag = "@state";
1099     $tag =~ s/^$record_tag\s*//;
1100     my $column = $column_mapping{$tag};
1101     if (ref $column eq 'ARRAY') {
1102     $multi_field_count++;
1103     }
1104     if ("@state" eq $record_tag) {
1105     $insert_handle->execute(@one_group_data{@columns});
1106     }
1107     pop @state;
1108     }
1109    
1110     sub Text {
1111     my $tag = "@state";
1112     $tag =~ s/^$record_tag\s*//;
1113     my $column = $column_mapping{$tag};
1114     if (ref $column eq 'ARRAY') {
1115     $one_group_data{$column->[$multi_field_count]} .= &check_read($_);
1116     return;
1117     }
1118     if (defined $column) {
1119     $one_group_data{$column} .= " " if $one_group_data{$column};
1120     $one_group_data{$column} .= &check_read($_);
1121     }
1122     }
1123    
1124     sub check_read {
1125     my $x = shift;
1126     $read_sub
1127     ? return &$read_sub($x)
1128     : return $x;
1129     }
1130    
1131     sub utf8_to_latin1 {
1132     local $_ = shift;
1133     s/([\xC0-\xDF])([\x80-\xBF])
1134     /chr(ord($1)<<6&0xC0|ord($2)&0x3F)
1135     /egx;
1136     return $_;
1137     }
1138    
1139     ############################################################################
1140     1;
1141     __END__
1142    
1143    
1144     =head1 NAME
1145    
1146     DBD::RAM - a DBI driver for files and data structures
1147    
1148     =head1 SYNOPSIS
1149    
1150     use DBI;
1151     my $dbh = DBI->connect('DBI:RAM:','usr','pwd',{RaiseError=>1});
1152     $dbh->func({
1153     table_name => 'my_phrases',
1154     col_names => 'id,phrase',
1155     data_type => 'PIPE',
1156     data_source => [<DATA>],
1157     }, 'import' );
1158     print $dbh->selectcol_arrayref(qq[
1159     SELECT phrase FROM my_phrases WHERE id = 1
1160     ])->[0];
1161     __END__
1162     1 | Hello, New World
1163     2 | Some other Phrase
1164    
1165     This sample creates a database table from data, uses SQL to make a
1166     selection from the database and prints out the results. While this
1167     table is in-memory only and uses pipe "delimited" formating, many
1168     other options are available including local and remote file access and
1169     many different data formats.
1170    
1171    
1172     =head1 DESCRIPTION
1173    
1174     DBD::RAM allows you to import almost any type of Perl data
1175     structure into an in-memory table and then use DBI and SQL
1176     to access and modify it. It also allows direct access to
1177     almost any kind of file, supporting SQL manipulation
1178     of the file without converting the file out of its native
1179     format.
1180    
1181     The module allows you to prototype a database without having an rdbms
1182     system or other database engine and can operate either with or without
1183     creating or reading disk files. If you do use disk files, they may,
1184     in most cases, either be local files or any remote file accessible via
1185     HTTP or FTP.
1186    
1187     =head1 OVERVIEW
1188    
1189     This modules allows you to work with a variety of data formats and to
1190     treat them like standard DBI/SQL accessible databases. Currently
1191     supported formats include:
1192    
1193     FORMATS:
1194    
1195     XML Extended Markup Language (XML)
1196     FIXED fixed-width records
1197     INI name=value pairs
1198     PIPE pipe "delimited" text
1199     TAB tab "delimited" text
1200     CSV Comma Separated Values or other "delimited" text
1201     MP3 MP3 music binary files
1202     ARRAY Perl array
1203     HASH Perl associative array
1204     DBI DBI database connection
1205     USR user defined formats
1206    
1207     The data you use may come form several kinds of sources:
1208    
1209     SOURCES
1210    
1211     DATA Perl data structures: strings, arrays, hashes
1212     LOCAL FILE a file stored on your local computer hard disk
1213     REMOTE FILE a remote file accessible via HTTP or FTP
1214    
1215     If you modify the data in a table, the modifications may be stored in
1216     several ways. The storage can be temporary, i.e. in memory only with
1217     no disk storage. Or several modifications can be done in memory and
1218     then stored to disk once at the end of the processing. Or
1219     modifications can be stored to disk continuously, similarly to the way
1220     other DBDs operate.
1221    
1222     STORAGE
1223    
1224     RAM in-memory processing only, no storage
1225     ONE-TIME processed in memory, stored to disk on command
1226     CONTINUOUS all modifications stored to disk as they occur
1227    
1228     Here is a summary of the SOURCES, FORMATS, and STORAGE capabilities of
1229     DBD::RAM. (x = currently supported, - = notsupported, * = support in
1230     progress)
1231    
1232     FORMAT
1233     CSV PIPE TAB FIXED INI XML MP3 ARRAY HASH DBI USR
1234     INPUT
1235     array/hash/string x x x x x x - x x - x
1236     local file x x x x x x x - - x x
1237     remote file x x x x x x * - - * x
1238     OUTPUT
1239     ram table x x x x x x x x x x x
1240     file (1-time) x x x x x x - - - * *
1241     file (continuous) x x x x x * - - - x *
1242    
1243     Please note that any ram table, regardless of original source can be
1244     stored in any of the supported file output formats. So, for example,
1245     a table of MP3 information could be stored as a CSV file, the "-" in
1246     the MP3 column only indicates that the information from the MP3 table
1247     can not (for obvious reasons) be written back to an MP3 file.
1248    
1249     =head1 INSTALLATION & PREREQUISITES
1250    
1251     This module should work on any any platform that DBI works on.
1252    
1253     You don't need an external SQL engine or a running server, or a
1254     compiler. All you need are Perl itself and installed versions of DBI
1255     and SQL::Statement. If you do not also have DBD::CSV installed you
1256     will need to either install it, or simply copy File.pm into your DBD
1257     directory.
1258    
1259     You can either use the standard makefile method, or just copy RAM.pm
1260     into your DBD directory.
1261    
1262     Some features require installation of extra modules. If you wish to
1263     work with the XML format, you will need to install XML::Parser. If
1264     you wish to use the ability to work with remote files, you will need
1265     to install the LWP (libnet) modules. Other features of DBD::RAM work
1266     fine without these additional modules.
1267    
1268     =head1 SQL & DBI
1269    
1270     This module, like other DBD database drivers, works with the DBI
1271     methods listed in DBI.pm and its documentation. Please see the DBI
1272     documentation for details of methods such as connecting, preparing,
1273     executing, and fetching data. Currently only a limited subset of SQL
1274     commands are supported. Here is a brief synopsis, please see the
1275     documentation for SQL::Statement for a more comple description of
1276     these commands.
1277    
1278     CREATE TABLE $table
1279     ( $col1 $type1, ..., $colN $typeN,
1280     [ PRIMARY KEY ($col1, ... $colM) ] )
1281    
1282     DROP TABLE $table
1283    
1284     INSERT INTO $table
1285     [ ( $col1, ..., $colN ) ]
1286     VALUES ( $val1, ... $valN )
1287    
1288     DELETE FROM $table
1289     [ WHERE $wclause ]
1290    
1291     UPDATE $table
1292     SET $col1 = $val1, ... $colN = $valN
1293     [ WHERE $wclause ]
1294    
1295     SELECT [DISTINCT] $col1, ... $colN
1296     FROM $table
1297     [ WHERE $wclause ]
1298     [ ORDER BY $ocol1 [ASC|DESC], ... $ocolM [ASC|DESC] ]
1299    
1300     $wclause [NOT] $col $op $val|$col
1301     [ AND|OR $wclause2 ... AND|OR $wclauseN ]
1302    
1303     $op = | <> | < | > | <= | >=
1304     | IS NULL | IS NOT NULL | LIKE | CLIKE
1305    
1306    
1307     =head1 WORKING WITH FILES & TABLES:
1308    
1309     This module supports working with both in-memory and disk-based databases. In order to allow quick testing and prototyping, the default behavior is for tables to be created in-memory from in-memory data but it is easy to change this behavior so that tables can also be created, manipulated, and stored on disk or so that there is a combination of in-memory and disk-based manipulation.
1310    
1311     There are three methods unique to the DBD::RAM module to allow you to specify which mode of operation you use for each table or operation:
1312    
1313     1) import() imports data either from memory or from a file into an
1314     in-memory table
1315    
1316     2) export() exports data from an in-memory table to a file regardless of
1317     the original source of the data
1318    
1319     3) catalog() sets up an association between a file name and a table name
1320     such that all operations on the table are done continuously
1321     on the file
1322    
1323     With the import() method, standard DBI/SQL commands like select,
1324     update, delete, etc. apply only to the data that is in-memory. If you
1325     want to save the modifications to a file, you must explcitly call
1326     export() after making the changes.
1327    
1328     On the other hand, the catalog() method sets up an association between
1329     a file and a tablename such that all DBI/SQL commands operate on the
1330     file continuously without needing to explicitly call export(). This
1331     method of operation is similar to other DBD drivers.
1332    
1333     Here is a rough diagram of how the three methods operate:
1334    
1335     disk -> import() -> RAM
1336    
1337     select
1338     update
1339     delete
1340     insert
1341     (multiple times)
1342    
1343     disk <- export() <- RAM
1344    
1345     catlog()
1346     disk <-> select
1347     disk <-> update
1348     disk <-> delete
1349     disk <-> insert
1350    
1351     Regardless of which method is chosen, the same set of DBI and SQL commands may be applied to all tables.
1352    
1353     See below for details of import(), export() and catalog() and for
1354     specifics of naming files and directories.
1355    
1356     =head2 Creating in-memory tables from data and files: import()
1357    
1358     In-memory tables may be created using standard CREATE/INSERT
1359     statements, or using the DBD::RAM specific import method:
1360    
1361     $dbh->func( $args, 'import' );
1362    
1363     The $args parameter is a hashref which can contain several keys, most
1364     of which are optional and/or which contain common defaults.
1365    
1366     These keys can either be specified or left to default behaviour:
1367    
1368     table_name string: name of the table
1369     col_names string: column names for the table
1370     data_type string: format of the data (e.g. XML, CSV...)
1371    
1372     The table_name key to the import() method is either a string, or if
1373     it is omitted, a default table name will be automatically supplied,
1374     starting at table1, then table2, etc.
1375    
1376     table_name => 'my_test_db',
1377    
1378     OR simply omit the table_names key
1379    
1380     If the col_names key to the import() method is omitted, the column
1381     names will be automatically supplied, starting at col1, then col2,
1382     etc. If the col_names key is the string 'first_line', the column
1383     names will be taken from the first line of the data. If the col_names
1384     key is a comma separated list of column names, those will be taken (in
1385     order) as the names of the columns.
1386    
1387     col_names => 'first_line',
1388    
1389     OR col_names => 'name,address,phone',
1390    
1391     OR simply omit the col_names key
1392    
1393     If table_name or col_names are specified, they must comply with SQL
1394     naming rules for identifiers: start with an alphabetic character;
1395     contain nothing but alphabetic characters, numbers, and/or
1396     underscores; be less than 128 characters long; not be the same as a
1397     SQL reserved keyword. If the table refers to a file that has a period
1398     in its name (e.g. my_data.csv), this can be handled with the catalog()
1399     method, see below.
1400    
1401     The table_name and col_names, if specified, *are* case sensititive, so
1402     that "my_test_db" is not the same as "my_TEST_db".
1403    
1404     The data_type key to the import() method specifies the format of the
1405     data as already described above in the general description. It must
1406     be one of:
1407    
1408     data_type => 'CSV',
1409     data_type => 'TAB',
1410     data_type => 'PIPE',
1411     data_type => 'INI',
1412     data_type => 'FIXED',
1413     data_type => 'XML',
1414     data_type => 'MP3',
1415     data_type => 'DBI',
1416     data_type => 'USR',
1417     data_type => 'ARRAY',
1418     data_type => 'HASH',
1419    
1420     OR simply omit the data_type key
1421    
1422     If no data_type key is supplied, the default format CSV will be used.
1423    
1424     The import() keys must specify a source of the data for the table,
1425     which can be any of:
1426    
1427     file_source string: name of local file to get data from
1428     remote_source string: url of remote file to get data from
1429     data_source string or arrayref: the actual data
1430    
1431     The file_source key is the name of local file. It's location will be
1432     taken to be relative to the f_dir specified in the database
1433     connection, see the connect() method above. Whether or not the file
1434     name is case sensitive depends on the operating system the script is
1435     running on e.g. on Windows case is ignored and on UNIX it is not
1436     ignored. For maximum portability, it is safest to assume that case
1437     matters.
1438    
1439     file_source => 'my_test_db.csv'
1440    
1441     The remote_source key is a URL (Uniform Resource Locator) to a file
1442     located on some other computer. It may be any kind of URL that is
1443     supported by the LWP module includding http and FTP. If username and
1444     password are required, they can be included in the URL.
1445    
1446     remote_source => 'http://myhost.com/mypath/myfile.myext'
1447    
1448     OR remote_source => 'ftp://user:password@myhost.com/mypath/myfile.myext'
1449    
1450     The data_source key to the import() tag contains the actual data for
1451     the table. in cases where the data comes from the Perl script itself,
1452     rather than from a file. The method of specifying the data_source
1453     depends entirely on the format of the data_type. For example with
1454     data_type of XML or CSV, the data_source is a string in XML or CSV
1455     format but with data_type ARRAY, the data_source is a reference to an
1456     array of arrayrefs. See below under each data_type for details.
1457    
1458     The following keys to the import() method apply only to specific data
1459     formats, see the sections on the specific formats (listed in parens)
1460     for details:
1461    
1462     pattern (FIXED only)
1463     sep_char (CSV only)
1464     eol (CSV only)
1465     read_sub (USR and XML only)
1466     attr (XML only)
1467     record_tag (XML only)
1468     fold_col (XML only)
1469     col_mapping (XML only)
1470     dirs (MP3 only)
1471    
1472    
1473     =head2 Saving in-memory tables to disk: export()
1474    
1475     The export() method creates a file from an in-memory table. It takes
1476     a very similar set of keys as does the import() method. The data_type
1477     key specifies the format of the file to be created (CSV, PIPE, TAB,
1478     XML, FIXED-WIDTH, etc.). The same set of specifiers available for the
1479     import method for these various formats are also available
1480     (e.g. sep_char to set the field separator for CSV files, or pattern to
1481     set the fixed-width pattern).
1482    
1483     The data_source key for the export() method is a SQL select statement
1484     in relation to whatever in-memory table is chosen to export. The
1485     data_target key specifies the name of the file to put the results in.
1486     Here is an example:
1487    
1488     $dbh->func( {
1489     data_source => 'SELECT * FROM table1',
1490     data_target => 'my_db.fix',
1491     data_type => 'FIXED',
1492     pattern => 'A2 A19',
1493     },'export' );
1494    
1495     That statement creates a fixed-width record database file called
1496     "my_db.fix" and puts the entire contents of table1 into the file using
1497     the specified field widths and whatever column names alread exist in
1498     table1.
1499    
1500     See specific data formats below for details related to the export() method.
1501    
1502     =head2 Continuous file access: catalog()
1503    
1504     The catalog() method creates an association between a specific table
1505     name, a specific data type, and a specific file name. You can create
1506     those associations for several files at once. The parameter to the
1507     catalog() method is a reference to an array of arrayrefs. Each of the
1508     arrayrefs should contain a table name, a data type, and a file name
1509     and can optionally inlcude other paramtets specific to specific data
1510     types. Here is an example:
1511    
1512     $dbh->func([
1513     [ 'my_csv_table', 'CSV', 'test_db.csv' ],
1514     ],'catalog');
1515    
1516     This example creates an association to a CSV file. Once the catalog()
1517     statement has been issued, any DBI/SQL commands relating to
1518     "my_csv_table" will operate on the file "test_db.csv". If the command
1519     is a SELECT statement, the file witll be opened and searched. If the
1520     command is an INSERT statement, the file will be opened and the new
1521     data row(s) inserted and saved into the file.
1522    
1523     One can also pass additional information such as column names,
1524     fixed-width patterns, field and record separators to the export
1525     method(). See the import() information above for the meanings of
1526     these additional parameters. They should be passed as a hashref:
1527    
1528     $dbh->func([
1529     [ 'table1', 'FIXED', 'test_db.fix',{pattern => 'A2 A19'} ],
1530     [ 'table2', 'INI', 'test_db.ini',{col_names => 'id,phrase,name' ],
1531     ],'catalog');
1532    
1533     In future releases, users will be able to store catalogs in files for permanent associations between files and data types.
1534    
1535     =head2 Specifying file and directory names
1536    
1537     All filenames are relative to a user-specified file directory: f_dir.
1538     The f_dir parameter may be set in the connect statement:
1539    
1540     my $dbh=DBI->connect("dbi:RAM:f_dir=/mypath/to-files/" ...
1541    
1542     The f_dir parameter may also be set or reset anywhere in the program
1543     after the database connection:
1544    
1545     $dbh->{f_dir} = '/mypath/to-files'
1546    
1547     If the f_dir parameter is not set explicitly, it defaults to "./"
1548     which will be wherever your script thinks it is running from (which,
1549     depending on server setup may or may not be the physical location of
1550     your script so use this only if you know what you are doing).
1551    
1552     All filenames are relative to the f_dir directory. It is not possible
1553     to use an absolute path to a file.
1554    
1555     WARNING: no taint checking is performed on the filename or f_dir, this
1556     is the responsiblity of the programmer. Since the filename is
1557     relative to the f_dir directory, a filename starting with "../" will
1558     lead to files above or outside of the f_dir directory so you should
1559     exclude those from filenames if the filenames come from user input.
1560    
1561     =head2 Using defaults for quick testing & prototyping
1562    
1563     If no table_name is specified, a numbered table name will be supplied
1564     (table1, or if that exists table2, etc.). The same also applies to
1565     column names (col1, col2, etc.). If no data_type is supplied, CSV
1566     will be assumed. If the entire hashref parameter to import is missing
1567     and an arrayref of data is supplied instead, then defaults for all
1568     values will be used, the source will be defaulted to data and the
1569     contents of the array will be treated as the data source. For CSV
1570     file, a field separator of comma and a record separator of newline are
1571     the default. Thus, assuming there are no already exiting in-memory
1572     tables, the two statements below have the same effect:
1573    
1574     $dbh->func( [<DATA>], 'import' );
1575    
1576     $dbh->func({
1577     table_name => 'table1',
1578     data_type => 'CSV',
1579     col_names => 'col1,col2',
1580     sep_char => ',',
1581     eol => "\n",
1582     data_source => [<DATA>],
1583     },'import' );
1584    
1585     It is also possible to rely on some of the defaults, but not all of
1586     them. For example:
1587    
1588     $dbh->func({
1589     data_type => 'PIPE',
1590     file_source => 'my_db.pipe',
1591     },'import' );
1592    
1593     =head1 DATA FORMATS
1594    
1595     =head2 CSV / PIPE / TAB / INI (Comma,Pipe,Tab,INI & other "delimited" formats)
1596    
1597     DBD::RAM can import CSV (Comma Separated Values) from strings, from
1598     local files, or from remote files into database tables and export
1599     tables from any source to CSV files. It can also store and update CSV
1600     files continuously similarly to the way other DBD drivers operate.
1601    
1602     If you wish to use remote CSV files, you also need the LWP module
1603     installed. It is available from www.activestate.com for windows, and
1604     from www.cpan.org for other platforms.
1605    
1606     CSV is the format of files commonly exported by such programs as
1607     Excel, Access, and FileMakerPro. Typically newlines separate records
1608     and commas separate fields. Commas may also be included inside fields
1609     if the field itself is surrounded by quotation marks. Quotation marks
1610     may be included in fields by doubling them. Although some types of
1611     CSV formats allow newlines inside fields, DBD::RAM does not currently
1612     support that. If you need that feature, you should use DBD::CSV.
1613    
1614     Here are some typical CSV fields:
1615    
1616     hello,1,"testing, ""george"", 1,2,3",junk
1617    
1618     Note that numbers and strings that don't contain commas do not need
1619     quotation marks around them. That line would be parsed into four
1620     fields:
1621    
1622     hello
1623     1
1624     testing, "george", 1,2,3
1625     junk
1626    
1627     To import that string of CSV into a DBD::RAM table:
1628    
1629     $dbh->func({
1630     data_source => qq[hello,1,"testing, ""george"", 1,2,3",junk]
1631     },'import');
1632    
1633     Of if one wanted to continuously update a file similarly to the way
1634     DBD::CSV works:
1635    
1636     $dbh->func([ 'table1', 'CSV', 'my_test.csv' ],'catalog');
1637    
1638    
1639     Or if that string and others like it were in a local file called
1640     'my_test.csv':
1641    
1642     $dbh->func({ file_source => 'my_test.csv' },'import');
1643    
1644     Or if that string and others like it were in a remote file at a known
1645     URL:
1646    
1647     $dbh->func({ remote_source => 'http://www.foo.edu/my_test.csv' },'import');
1648    
1649     Note that these forms all use default behaviour since CSV is the
1650     default data_type. These methods also use the default table_name
1651     (table1,table2,etc.) and default column_names (col1,col2, etc.). The
1652     same functions can specify a table_name and can either specify a list
1653     of column names or specify that the column names should be taken from
1654     the first line of data:
1655    
1656     $dbh->func({
1657     file_source => 'my_test.csv',
1658     table_name => 'my_table',
1659     col_names => 'name,phone,address',
1660     },'import');
1661    
1662     It is also possible to define other field separators (e.g. a
1663     semicolon) with the "sep_char" key and define other record separators
1664     with the "eol" key. For example:
1665    
1666     sep_char => ';',
1667     eol => '~',
1668    
1669     Adding those to the import() hash would define data that has a
1670     semicolon between every field and a tilde between every record.
1671    
1672     For convenience shortcuts have been provided for PIPE and TAB
1673     separators. The data_type "PIPE" indicates a separator of the pipe
1674     character '|' which may optionally have blank spaces before or afer
1675     it. The TAB data_type indicates fields that are separated by tabs.
1676     In both cases newlines remain the default record separator unless
1677     specifically set to something else.
1678    
1679     Another shortcut is the INI data_type. This expects to see data in
1680     name=value pairs like this:
1681    
1682     resolution = 640x480
1683     colors = 256
1684    
1685     Currently the INI type does not support sections within the .ini file,
1686     but that will change in future releases of this module.
1687    
1688     The PIPE, TAB, and INI formats all behave like the CSV format.
1689     Defaults may be used for assigning column names from the first line of
1690     data, in which case the column names should be separated by the
1691     appropriate symbol (e.g. col1|col2 for PIPE, and col1=col2 for INI,
1692     and column names separated by tabs for TAB).
1693    
1694     In the examples above using data_source the data was a string with
1695     newlines separating the records. It is also possible to use an
1696     reference to an array of lines as the data_source. This makes it
1697     easy to use the DATA segment of a script or to import an array from
1698     some other part of a script:
1699    
1700     $dbh->func({ data_source => [<DATA>] },'import' );
1701    
1702     =head2 ARRAYS & HASHES
1703    
1704     DBD::RAM can import data directly from references to arrays of
1705     arrayrefs and references to arrays of hashrefs. This allows you to
1706     easily import data from some other portion of a perl script into a
1707     database format and either save it to disk or simply manipulate it in
1708     memory.
1709    
1710     $dbh->func({
1711     data_type => 'ARRAY',
1712     data_source => [
1713     ['1','CSV:Hello New World!'],
1714     ['2','CSV:Cool!']
1715     ],
1716     },'import');
1717    
1718     $dbh->func({
1719     data_type => 'HASH',
1720     data_source => [
1721     {id=>1,phrase=>'Hello new world!'},
1722     {id=>2,phrase=>'Junkity Junkity Junk'},
1723     ],
1724     },'import');
1725    
1726    
1727     =head2 FIXED-WIDTH RECORDS
1728    
1729     Fixed-width records (also called fixed-length records) do not use
1730     character patterns to separate fields, rather they use a preset number
1731     of characters in each field to determine where one field begins and
1732     another ends. DBD::RAM can import fixed-width records from strings,
1733     arrayrefs, local files, and remote files and can export data from any
1734     source to fixed-width record fields. The module also allows
1735     continuous disk-based updating of fixed-width format files similarly
1736     to other DBDs.
1737    
1738     The fixed-width format behaves exactly like the CSV formats mentioned
1739     above with the exception that the data_type is "FIXED" and that one
1740     must supply a pattern key to describe the width of the fields. The
1741     pattern should be in Perl unpack format e.g. "A2 A7 A14" would
1742     indicate a table with three columns with widths of 2,7,14 characters.
1743     When data is inserted or updated, it will be truncated or padded to
1744     fill exactly the amount of space alloted to each field.
1745    
1746     $dbh->func({
1747     table_name => 'phrases',
1748     col_names => 'id,phrase',
1749     data_type => 'FIXED',
1750     pattern => 'A1 A20',
1751     data_source => [ '1Hello new world! ',
1752     '2Junkity Junkity Junk',
1753     ],
1754     },'import' );
1755    
1756    
1757     =head2 XML
1758    
1759     DBD::RAM can import XML (Extended Markup Language) from strings, from
1760     local files, or from remote files into database tables and export
1761     tables from any source to XML files.
1762    
1763     You must have XML::Parser installed in order to use the XML feature of
1764     DBD::RAM. If you wish to use remote XML files, you also need the LWP
1765     module installed. Both are available from www.activestate.com for
1766     windows, and from www.cpan.org for other platforms.
1767    
1768     Support is provided for information in tag attributes and tag text and
1769     for multiple levels of nested tags. There are several options on how
1770     to treat tag names that occur multiple times in a single record
1771     including a variety of relationships between XML tags and database
1772     columns: one-to-one, one-to-many, and many-to-one. Tag attributes can
1773     be made to apply to multiple records nested within the tag. There is
1774     also support for alternate character encodings and other XML::Parser
1775     parameter attributes.
1776    
1777     See below for details.
1778    
1779     =over 4
1780    
1781     =item XML Import
1782    
1783     To start with a very simple example, consider this XML string:
1784    
1785     <staff>
1786     <person name='Joe' title='bottle washer'/>
1787     <person name='Tom' title='flunky'/>
1788     <person name='Bev' title='chief cook'/>
1789     <person name='Sue' title='head honcho'/>
1790     </staff>
1791    
1792     Assuming you have that XML structure in a variable $str, you can
1793     import it into a DBD::RAM table like this:
1794    
1795     $dbh->func({
1796     data_source => $str
1797     data_type => 'XML',
1798     record_tag => 'staff person',
1799     col_names => 'name,title'
1800     },'import');
1801    
1802     Which would produce this SQL/DBI accessible table:
1803    
1804     name | title
1805     -----+--------------
1806     Joe | bottle washer
1807     Tom | flunky
1808     Bev | chief cook
1809     Sue | head honcho
1810    
1811     If the XML data is in a local or remote file, rather than a string,
1812     simply change the "data_source" to "file_source" (for local files) or
1813     "remote_source" (for remote files) an everything else mentioned in
1814     this section works the same as if the data was imported from a string.
1815    
1816     Notice that the "record_tag" is a space separated list of all of the
1817     tags that enclose the fields you want to capture starting at the
1818     highest level with the <staff> tag. In this example there is only one
1819     level of nesting, but there could be many levels of nesting in actual
1820     practice.
1821    
1822     DBD::RAM can treat both text and tag attributes as fields. So the
1823     following three records could produce the same database row:
1824    
1825     <person name='Tom' title='flunky'/>
1826    
1827     <person name='Tom'>
1828     <title>flunky</title>
1829     </person>
1830    
1831     <person>
1832     <name>Tom</name>
1833     <title>flunky</title>
1834     </person>
1835    
1836     The database column names should be specified as a comma-separated
1837     string, in the order you want them to appear in the database:
1838    
1839     col_names => 'name,state,year'
1840    
1841     If you want the database column names to be the same as the XML tag
1842     names you do not need to do anything further.
1843    
1844     NOTE: you *must* speficy the column names for XML data, you can not
1845     rely on automatic default column names (col1,col2,etc.) or on reading
1846     the column names from the "first line" of the data as you can with
1847     most other data types.
1848    
1849    
1850     =item Alternate relationships between XML tags & database columns
1851    
1852     If you want the database column names to be different from the XML tag
1853     names, you need to add a col_mapping parameter which should be a hash
1854     with the XML tag as the key and the database column as the value:
1855    
1856     col_mapping => {
1857     name => 'Member_Name',
1858     state => 'Location',
1859     year => 'Year',
1860     }
1861    
1862     ('name' is the XML tag, 'Member_Name' is the database column)
1863    
1864     If a given tag occurs more than once in an XML record, it can be
1865     mapped onto a single column name (in which case all of the values for
1866     it will be concatenated with spaces into the single column), or it can
1867     be mapped onto an array of column names (in which case each succeeding
1868     instance of the tag will be entered into the succeeding column in the
1869     array).
1870    
1871     For example, given this XML snippet:
1872    
1873     <person name='Joe' state='OR'>
1874     <year>1998</year>
1875     <year>1999</year>
1876     </person>
1877     <person name='Sally' state='WA'>
1878     <year>1998</year>
1879     <year>1999</year>
1880     <year>2000</year>
1881     </person>
1882    
1883     This column mapping:
1884    
1885     col_mapping => {
1886     name => 'Member_Name',
1887     state => 'Location',
1888     year => ['Year1','Year2','Year3'],
1889     }
1890    
1891     Would produce this table:
1892    
1893     Member_Name | Location | Year1 | Year2 | Year3
1894     ------------+----------+-------+-------+------
1895     Joe | OR | 1998 | 1999 |
1896     Sally | WA | 1998 | 1999 | 2000
1897    
1898     And this column mapping:
1899    
1900     col_mapping => {
1901     name => 'Member_Name',
1902     state => 'Location',
1903     year => 'Year',
1904     }
1905    
1906     Would produce this table:
1907    
1908     Member_Name | Location | Year
1909     ------------+----------+----------------
1910     Joe | OR | 1998 1999
1911     Sally | WA | 1998 1999 2000
1912    
1913     It is also possible to map several differnt tags into a single column,
1914     e.g:
1915    
1916     <person name='Joe' state='OR'>
1917     <year1>1998</year1>
1918     <year2>1999</year2>
1919     </person>
1920     <person name='Sally' state='WA'>
1921     <year1>1998</year1>
1922     <year2>1999</year2>
1923     <year3>2000</year3>
1924     </person>
1925    
1926     col_mapping => {
1927     name => 'Member_Name',
1928     state => 'Location',
1929     year1 => 'Year',
1930     year2 => 'Year',
1931     year3 => 'Year',
1932     }
1933    
1934     Member_Name | Location | Year
1935     ------------+----------+----------------
1936     Joe | OR | 1998 1999
1937     Sally | WA | 1998 1999 2000
1938    
1939     =item Nested attributes that apply to multiple records
1940    
1941     It is also possible to use nested record attributes to create column
1942     values that apply to multiple records. Consider the following XML:
1943    
1944     <staff>
1945     <office location='Portland'>
1946     <person name='Joe'>
1947     <person name='Tom'/>
1948     </office>
1949     <office location='Seattle'>
1950     <person name='Bev'/>
1951     <person name='Sue'/>
1952     </office>
1953     </staff>
1954    
1955     One might like to associate the office location with all of the staff
1956     members in that office. This is how that would be done:
1957    
1958     record_tag => 'staff office person',
1959     col_names => 'location,name',
1960     fold_col => { 'staff office' => 'location' },
1961    
1962     That fold-col specification in the import() method would "fold in"
1963     the attribute for location and apply it to all records nested within
1964     the office tag and produce the following table:
1965    
1966     location | name
1967     ---------+-----
1968     Portland | Joe
1969     Portland | Tom
1970     Seattle | Bev
1971     Seattle | Sue
1972    
1973     You may use several levels of folded columns, for example, to capture
1974     both the office location and title in this XML:
1975    
1976     <staff>
1977     <office location='Portland'>
1978     <title id='manager'>
1979     <person name='Joe'/>
1980     </title>
1981     <title id='flunky'>
1982     <person name='Tom'/>
1983     </title>
1984     </office>
1985     <office location='Seattle'>
1986     <title id='flunky'>
1987     <person name='Bev'/>
1988     <person name='Sue'/>
1989     </title>
1990     </office>
1991     </staff>
1992    
1993     You would use this fold_col key:
1994    
1995     fold_col => { 'staff office' => 'location',
1996     'staff office title' => 'id',
1997     },
1998    
1999     And obtain this table:
2000    
2001     location | title | name
2002     ---------+---------+-----
2003     Portland | manager | Joe
2004     Portland | flunky | Tom
2005     Seattle | flunky | Bev
2006     Seattle | flunky | Sue
2007    
2008     If you need to grab more than one attribute from a single tag, you
2009     need to put one or more carets (^) on the end of the fold_col key.
2010     For example:
2011    
2012     <office type='branch' location='Portland' manager='Sue'> ...</office>
2013    
2014     fold_col => { 'office' => 'branch',
2015     'office^' => 'location',
2016     'office^^' => 'manager',
2017    
2018     =item Character Encoding and Unicode issues
2019    
2020     The attr key can be used to pass extra information to XML::Parser when
2021     it imports a database. For example, if the XML file contains latin-1
2022     characters, one might like to pass the parser an encoding protocol
2023     like this:
2024    
2025     attr => {ProtocolEncoding => 'ISO-8859-1'},
2026    
2027     Attributes passed in this manner are passed straight to the
2028     XML::Parser.
2029    
2030     Since the results of XML::Parser are returned as UTF-8, one might also
2031     like to translate from UTF-8 to something else when the data is
2032     entered into the database. This can be done by passing a pointer to a
2033     subroutine in the read_sub key. For example:
2034    
2035     read_sub => \&utf8_to_latin1,
2036    
2037     For this to work, there would need to be a subroutine utf8_to_latin1
2038     in the main module that takes a UTF8 string as input and returns a
2039     latin-1 string as output. Similar routines can be used to translate
2040     the UTF8 characters into any other character encoding.
2041    
2042     Apologies for being Euro-centric, but I have included a short-cut for
2043     Latin-1. One can omit the attr key and instead of passing a pointer
2044     to a subroutine in the read_sub key, if one simply puts the string
2045     "latin1", the module will automatically perform ISO-8859-1 protocol
2046     encoding on reading the XML file and automatically translate from
2047     UTF-8 to Latin-1 as the values are inserted in the database, that is
2048     to say, a shortcut for the two keys mentioned above.
2049    
2050    
2051     =item Other features of XML import
2052    
2053     * Tags, attributes, and text that are not specifically referred to in
2054     the import() parameters are ignored when creating the database table.
2055    
2056     * If a column name is listed that is not mapped onto a tag that occurs
2057     in the XML source, a column will be created in the database for that
2058     name and it will be given a default value of NULL for each record
2059     created.
2060    
2061     =item XML Export
2062    
2063     Any DBD::RAM table, regardless of its original source or its original
2064     format, can be exported to an XML file.
2065    
2066     The export() parameters are the same as for other types of export() --
2067     see the above for details. Additionally there are some export
2068     parameters specific to XML files which are the same as the import()
2069     parameters for XML files mentioned above. The col_names parameter is
2070     required, as is the record_tag parameter. Additionally one may
2071     optionally pass a header and/or a footer parameter which will be
2072     material that goes above and below the records in the file. If no
2073     header is passed, a default header consisting of
2074    
2075     <?xml version="1.0" ?>
2076    
2077     will be created at the top of the file.
2078    
2079     Given a datbase like this:
2080    
2081     location | name
2082     ---------+-----
2083     Portland | Joe
2084     Seattle | Sue
2085    
2086     And an export() call like this:
2087    
2088     $dbh->func({
2089     data_type => 'XML',
2090     data_target => 'test_db.new.xml',
2091     data_source => 'SELECT * FROM table1',
2092     record_tag => 'staff person',
2093     col_names => 'name,location',
2094     },'export');
2095    
2096     Would produce a file called 'test_db.xml' containing text like this:
2097    
2098     <?xml version="1.0" ?>
2099     <staff>
2100     <office>
2101     <person>
2102     <name>Joe</name>
2103     <location>Portland</location>
2104     </person>
2105     <person>
2106     <name>Sue</name>
2107     <location>Seattle</location>
2108     </person>
2109     </office>
2110     </staff>
2111    
2112     The module does not currently support exporting tag attributes or
2113     "folding out" nested column information, but those are planned for
2114     future releases.
2115    
2116     back
2117    
2118     =head2 USER-DEFINED DATA STRUCTURES
2119    
2120     DBD::RAM can be extended to handle almost any type of structured
2121     information with the USR data type. With this data type, you define a
2122     subroutine that parses your data and pass that to the import() command
2123     and the module will use that routine to create a database from your
2124     data. The routine can be as simple or as complex as you like. It
2125     must accept a string and return an array with the fields of the array
2126     in the same order as the columns in your database. Here is a simple
2127     example that works with data separated by double tildes. In reality,
2128     you could just do this with the bulit-in CSV type, but here is how you
2129     could recreate it with the USR type:
2130    
2131     $dbh->func({
2132     data_type => 'USR',
2133     data_source => "1~~2~~3\n4~~5~~6\n",
2134     read_sub => sub { split /~~/,shift },
2135     },'import' );
2136    
2137     That would build a table with two rows of three fields each. The
2138     first row would contain the values 1,2,3 and the second row would
2139     contain the values 4,5,6.
2140    
2141     Here is a more complex example that handles a simplified address book.
2142     It assumes that your data is a series of addresses separated by blank
2143     lines and that the address has the name on the first line, the street
2144     on the second line and the town, state, and zipcode on the third line.
2145     (Apologies to those in countries that don't have states or zipcodes in
2146     this format). Here is an example of the kind of data it would handle:
2147    
2148     Fred Bloggs
2149     123 Somewhere Lane
2150     Sometown OR 97215
2151    
2152     Joe Blow
2153     567 Anywhere Street
2154     OtherTown WA 98006
2155    
2156     Note that the end-of-line separator (eol) has been changed to be a
2157     blank line rather than a simple newline and that the parsing routine
2158     is more than a simple line by line parser, it splits the third line
2159     into three fields for town, state, and zip.
2160    
2161     $dbh->func({
2162     data_type => 'USR',
2163     data_source => join('',<DATA>),
2164     col_names => 'name,street,town,state,zip',
2165     eol => '^\s*\n',
2166     read_sub => sub {
2167     my($name,$street,$stuff) = split "\n", $_[0];
2168     my @ary = split ' ',$stuff;
2169     my $zip = $ary[-1];
2170     my $state = $ary[-2];
2171     my $town = $stuff;
2172     $town =~ s/^(.*)\s+$state\s+$zip$/$1/;
2173     return($name,$street,$town,$state,$zip);
2174     },
2175     },'import');
2176    
2177     Given the data above, this routine would create a table like this:
2178    
2179     name | street | town | state | zip
2180     ------------+---------------------+-----------+-------+------
2181     Fred Bloggs | 123 Somewhere Lane | Sometown | OR | 97215
2182     Joe Blow | 567 Anywhere Street | OtherTown | WA | 98006
2183    
2184     These are just samples, the possiblities are fairly unlimited.
2185    
2186     PLEASE NOTE: If you develop generally useful parser routines that
2187     others might also be able to use, send them to me and I can
2188     encorporate them into the DBD itself (with proper credit, of course).
2189    
2190     =head2 DBI DATABASE RECORDS
2191    
2192     You can import information from any other DBI accessible database with
2193     the data_type set to 'DBI' in the import() method. First connect to
2194     the other database via DBI and get a database handle for it separate
2195     from the database handle for DBD::RAM. Then do a prepare and execute
2196     to get a statement handle for a SELECT statement into that database.
2197     Then pass the statement handle to the DBD::RAM import() method as the
2198     data_source. This will perform the fetch and insert the fetched
2199     fields and records into the DBD::RAM table. After the import()
2200     statement, you can then close the database connection to the other
2201     database if you are not going to be using it for anything else.
2202    
2203     Here's an example using DBD::mysql --
2204    
2205     use DBI;
2206     my $dbh_ram = DBI->connect('dbi:RAM:','','',{RaiseError=>1});
2207     my $dbh_mysql = DBI->connect('dbi:mysql:test','','',{RaiseError=>1});
2208     my $sth_mysql = $dbh_mysql->prepare("SELECT * FROM cars");
2209     $sth_mysql->execute;
2210     $dbh_ram->func({
2211     data_type => 'DBI',
2212     data_source => $sth_mysql,
2213     },'import' );
2214     $dbh_mysql->disconnect;
2215    
2216     =head2 MP3 MUSIC FILES
2217    
2218     Most mp3 (mpeg three) music files contain a header describing the song
2219     name, artist, and other information about the music. This shortcut
2220     will collect all of the header information in all mp3 files in a group
2221     of directories and turn it into a searchable database:
2222    
2223    
2224     $dbh->func(
2225     { data_type => 'MP3', dirs => $dirlist }, 'import'
2226     );
2227    
2228     $dirlist should be a reference to an array of absolute paths to
2229     directories containing mp3 files. Each file in those directories
2230     will become a record containing the fields: file_name, song_name,
2231     artist, album, year, comment,genre. The fields will be filled
2232     in automatically from the ID3v1x header information in the mp3 file
2233     itself, assuming, of course, that the mp3 file contains a
2234     valid ID3v1x header.
2235    
2236     =head1 USING MULTIPLE TABLES
2237    
2238     A single script can create as many tables as your RAM will support and
2239     you can have multiple statement handles open to the tables
2240     simultaneously. This allows you to simulate joins and multi-table
2241     operations by iterating over several statement handles at once. You
2242     can also mix and match databases of different formats, for example
2243     gathering user info from .ini and .config files in many different
2244     formats and putting them all into a single table.
2245    
2246    
2247     =head1 TO DO
2248    
2249     Lots of stuff. Allow users to specify a file where catalog
2250     information is stored so that one could record file types once and
2251     thereafter automatically open the files with the correct data type. A
2252     convert() function to go from one format to another. Support for a
2253     variety of other easily parsed formats such as Mail files, web logs,
2254     and for various DBM formats. Support for HTML files with the
2255     directory considered as a table, each HTML file considered as a record
2256     and the filename, <TITLE> tag, and <BODY> tags considered as fields.
2257     More robust SQL (coming when I update Statement.pm) including RLIKE (a
2258     regex-based LIKE), joins, alter table, typed fields?, authorization
2259     mechanisms? transactions? Allow remote exports (e.g. with LWP
2260     POST/PUT).
2261    
2262     Let me know what else...
2263    
2264     =head1 AUTHOR
2265    
2266     Jeff Zucker <jeff@vpservices.com>
2267    
2268     Copyright (c) 2000 Jeff Zucker. All rights reserved. This program is
2269     free software; you can redistribute it and/or modify it under the same
2270     terms as Perl itself as specified in the Perl README file.
2271    
2272     No warranty of any kind is implied, use at your own risk.
2273    
2274     =head1 SEE ALSO
2275    
2276     DBI, SQL::Statement, DBD::File
2277    
2278     =cut

  ViewVC Help
Powered by ViewVC 1.1.26