What\'s the simplest way to parse an Excel file in Perl? Converting it to a text file would also work.
@echo off
ECHO CHECK THE VERSION FROM THE PROJECT
echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%
ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q
::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log
set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir% 1>>%0.log 2>>%0.error.log
ECHO.
set LogLevel=3
echo LogLevel is %LogLevel% 1>>%0.log 2>>%0.error.log
ECHO.
::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse% 1>>%0.log 2>>%0.error.log
echo.
set OutputDir=%BaseDir%
echo OutputDir is %Outputdir% 1>>%0.log 2>>%0.error.log
echo.
ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%
::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript% 1>>%0.log 2>>%0.error.log
ECHO.
echo Action !!! 1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
%0.error.log
%0.log
::debug pause
use strict;
use Spreadsheet::ParseExcel;
use utf8 ;
use Encode ;
package ExcelToHtml ;
my $DebugLevel = 3 ;
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ;
my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = ();
sub main {
logMsg("$0 SCRIPT START " , 1 );
#Action !!! call here methods
checkCommandLineArgs();
buildNames ();
ParseExcelAndlogMsgFiles ( $ExcelFileToParse , $BaseFileName , $OutputDir) ;
logMsg("$0 SCRIPT STOP " , 1 );
} #eof MAIN
sub buildNames
{
$ExcelFileToParse = "$ARGV[1]" ;
$OutputDir="$ARGV[2]" ;
$BaseFileName = $ExcelFileToParse ;
logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ;
logMsg ( "\$OutputDir is $OutputDir" ) ;
logMsg ("\$BaseFileName is $BaseFileName \n" );
$BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/; #strip the directory part
logMsg ("\$BaseFileName is $BaseFileName ");
$BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ; #strip the file extension
logMsg ( "\$BaseFileName is $BaseFileName ");
} #eof sub buildNames
sub ParseExcelAndlogMsgFiles {
my $ExcelFileToParse = shift ;
my $BaseFileName = shift ;
my $OutputDir = shift ;
my $strToReturn = "";
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
my $rowStr = "" ;
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
logMsg ( " Row, Col = ($row, $col)") ;
logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;
logMsg (" Value = ", $cell->value() );
$rowStr .= makeCell($cell->value() ) ; #The Value
logMsg ( "Unformatted = ", $cell->unformatted()) ;
} #eof col
$rowStr = makeRow( $rowStr );
$strToReturn .= $rowStr ;
} #eof for my row
my $worksheetName = $worksheet->{'Name'} ;
my $FileSheet = "$OutputDir/$BaseFileName" . '.' . $worksheetName . '.' . 'html' ;
$strToReturn = makeTable ( $strToReturn );
$strToReturn = makeFile ( $strToReturn );
logMsg ( "\$strToReturn is $strToReturn " ) ;
PrintToFile ( $FileSheet, $strToReturn ) ;
$strToReturn = "" ;
$FileSheet = "" ;
} #eof for my worksheet
} #eof sub
sub trim
{
$_[0]=~s/^\s+//;
$_[0]=~s/\s+$//;
return $_[0];
}
# =========================================== eof sub trim
# Action !!!
sub makeRow
{
my $row = shift ;
return "<tr>" . $row . "</tr> \n" ;
}
sub makeCell
{
my $cell = shift ;
return "<td>" . "$cell" . " </td>" ;
}
sub makeTable
{
my $table = shift ;
return "\n <table> " . $table . "</table> \n" ;
}
sub makeFile
{
my $file = shift ;
$file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ;
return $file ;
}
sub checkCommandLineArgs
{
if ( @ARGV != $minimumAmountOfCommandLineArgs )
{
logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
die "usage: $0 1 args \n" ;
exit 1 ;
} #eof if
else
{
foreach (@ARGV) { logMsg ( "$_ " ) ; }
} #eof else if ( @ARGV != 4 )
} #eof sub checkCommandLineArgs
#log a message according to the DebugLevel
sub logMsg
{
my $msg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth ();
my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" . '.log' ;
$msg = GetANiceTime () .' --- ' . $msg . " \n" ;
if ( $importance == 1 )
{
$msg = "\n============================================================ \n" . $msg ;
$msg = $msg . "============================================================ \n" ;
}
#debug nothing
if ( $DebugLevel == 0 ) { return ; }
#just logMsg the message
if ( $DebugLevel == 1 )
{
logMsg ( $msg );
} #eof if ( $DebugLevel == 1 )
#logMsg the message in
if ( $DebugLevel == 2 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
}
#logMsg the message in
if ( $DebugLevel == 3 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
print $msg ;
} #eof if ( $DebugLevel == 3 )
} #eof sub logMsg
#log a message according to the DebugLevel
sub logErrorMsg
{
my $errorMsg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth ();
my $errorLogFile = "$CurrentPerlScriptName" . "_" . "$niceMonth" . '.error.log' ;
$errorMsg = GetANiceTime () .' --- ' . $errorMsg . " \n" ;
if ( $importance == 1 )
{
$errorMsg = "\n============================================================ \n" . $errorMsg ;
$errorMsg= $errorMsg. "============================================================ \n" ;
}
#debug nothing
if ( $DebugLevel == 0 ) { return ; }
#just logMsg the message
if ( $DebugLevel == 1 ) { print $errorMsg ; }
#logMsg the message in a error log file
if ( $DebugLevel == 2 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
}
#logMsg the message in
if ( $DebugLevel == 3 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
print $errorMsg ;
} #eof if ( $DebugLevel == 3 )
} #eof sub logErrorMsg
#GET A NICE TIME
sub GetANiceTime {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$min = "0$min" if ($min < 10);
$hour = "0$hour" if ($hour < 10);
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
$mday = "0$mday" if ($mday < 10);
return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec";
} #eof sub GetANiceTime
sub GetANiceMonth {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
return "$year\.$mon" ;
} #eof sub GetANiceTime
sub PrintToFile {
my $FileOutput = shift ;
my $StringToPrint = shift ;
#READ ALL ROWS OF A FILE TO ALIST
open (FILEOUTPUT, ">$FileOutput") ||
print "could not open the \$FileOutput $FileOutput!\n";
print FILEOUTPUT $StringToPrint ;
close FILEOUTPUT ;
#debug $strToReturn .= $StringToPrint;
}
# =========================================== eof sub PrintToFile
#Action !!!
main();
1 ;
__END__
#VersionHistory:
#1.0. --- Yordan Georgiev --- Initial creation
#1.1 --- Yordan Georgiev --- Added conditional logging
I have had great luck using Spreadsheet::ParseExcel.
The 'best' way would ideally be to use a module from our beloved CPAN.
Whenever you have a problem Instantly ask: Why have I not checked CPAN yet?
Which module however I can't be certain, here is a list to get you started, try them, see what works.
http://search.cpan.org/dist/Spreadsheet-Read/Read.pm
Spreadsheet::Read tries to transparently read any spreadsheet and return its content in a universal manner independent of the parsing module that does the actual spreadsheet scanning.
The best way is to use Spreadsheet::ParseExcel.
Here is an example:
#!/usr/bin/perl -w
use strict;
use warnings;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
print "Row, Col = ($row, $col)\n";
print "Value = ", $cell->value(), "\n";
print "Unformatted = ", $cell->unformatted(), "\n";
print "\n";
}
}
}
To convert an Excel file to text with Perl I'd recommend excel2txt which uses Spreadsheet::ParseExcel.