What's the best way to parse Excel file in Perl?

后端 未结 5 1253
一向
一向 2021-02-13 22:02

What\'s the simplest way to parse an Excel file in Perl? Converting it to a text file would also work.

相关标签:
5条回答
  • 2021-02-13 22:18
    @echo off  
    ECHO CHECK THE VERSION FROM THE PROJECT
        echo see also this [link][1]
    for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
    ECHO The _Version is %_Version%
    
    ECHO remove the output html files
    del *.html /q
    ECHO remove the output log files
    del *.log /q
    
    
    ::pause
    ECHO %0 > %0.log
    ECHO %0.error.log >%0.error.log
    
    set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
    echo BaseDir is %BaseDir%  1>>%0.log 2>>%0.error.log
    ECHO.
    
    
    set LogLevel=3
    echo LogLevel is %LogLevel%  1>>%0.log 2>>%0.error.log
    ECHO.
    
    
    ::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
    SET ExcelFileToParse="%BaseDir%\TODO.xls"
    echo ExcelFileToParse is %ExcelFileToParse%  1>>%0.log 2>>%0.error.log
    echo.
    
    set OutputDir=%BaseDir%
    echo OutputDir is %Outputdir%  1>>%0.log 2>>%0.error.log
    echo.
    
    ECHO SET THE UNICODE FOR PERL FOR UTF-8
    SET PERL_UNICODE=S
    ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%
    
    ::set PerlScript=parseExcelToCsv.pl
    set PerlScript=ExcelToHtml.pl
    echo PerlScript is %PerlScript%  1>>%0.log 2>>%0.error.log
    ECHO.
    
    
    echo Action !!!  1>>%0.log 2>>%0.error.log
    echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir%  1>>%0.log 2>>%0.error.log
    perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
    
    %0.error.log
    %0.log
    
    ::debug pause
    
    use strict;
    use Spreadsheet::ParseExcel;
    use utf8 ; 
    use Encode ; 
    
    
    package ExcelToHtml ; 
    
    
    
    my $DebugLevel = 3 ; 
    print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
    my $minimumAmountOfCommandLineArgs = 3 ; 
    my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = (); 
    
    
    sub main {
        logMsg("$0 SCRIPT START " , 1 );
        #Action !!! call here methods
        checkCommandLineArgs();     
        buildNames (); 
    
        ParseExcelAndlogMsgFiles ( $ExcelFileToParse  , $BaseFileName , $OutputDir) ; 
    
        logMsg("$0 SCRIPT STOP " , 1 );
    
    } #eof MAIN 
    
    sub buildNames 
    {
    
        $ExcelFileToParse = "$ARGV[1]" ; 
        $OutputDir="$ARGV[2]" ;
        $BaseFileName = $ExcelFileToParse ; 
        logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ; 
        logMsg ( "\$OutputDir is $OutputDir" ) ; 
        logMsg ("\$BaseFileName is $BaseFileName \n" );
        $BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/;    #strip the directory part 
        logMsg ("\$BaseFileName is $BaseFileName ");
        $BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ;  #strip the file extension
        logMsg ( "\$BaseFileName is $BaseFileName "); 
    
    
    } #eof sub buildNames
    
    
    sub ParseExcelAndlogMsgFiles {
    
    my $ExcelFileToParse = shift ; 
    my $BaseFileName = shift ; 
    my $OutputDir = shift ; 
    my $strToReturn = "";
    
    my $parser   = Spreadsheet::ParseExcel->new();
    my $workbook = $parser->Parse("$ExcelFileToParse");
    
    for my $worksheet ( $workbook->worksheets() ) {
    
            my ( $row_min, $row_max ) = $worksheet->row_range();
            my ( $col_min, $col_max ) = $worksheet->col_range();
    
            for my $row ( $row_min .. $row_max ) {
                    my $rowStr = "" ; 
                    for my $col ( $col_min .. $col_max ) {
    
                            my $cell = $worksheet->get_cell( $row, $col );
                            next unless $cell;
    
                            logMsg ( " Row, Col    = ($row, $col)") ; 
                            logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;
    
                            logMsg (" Value = ", $cell->value() );
                            $rowStr .=  makeCell($cell->value() )      ;                #The Value
    
                            logMsg ( "Unformatted = ", $cell->unformatted()) ; 
    
                    } #eof col
                $rowStr = makeRow( $rowStr ); 
                $strToReturn .= $rowStr ; 
         } #eof for my row
    
            my $worksheetName = $worksheet->{'Name'}  ; 
            my $FileSheet = "$OutputDir/$BaseFileName" .  '.' . $worksheetName . '.' . 'html' ; 
            $strToReturn = makeTable ( $strToReturn );
            $strToReturn = makeFile ( $strToReturn );
            logMsg ( "\$strToReturn  is $strToReturn " ) ; 
            PrintToFile ( $FileSheet, $strToReturn )  ; 
            $strToReturn = "" ;
            $FileSheet = "" ;
            } #eof for my worksheet
    
    } #eof sub
    
    
    sub trim    
    {
            $_[0]=~s/^\s+//;
            $_[0]=~s/\s+$//;
            return $_[0];
    }
    
    # =========================================== eof sub trim 
    # Action !!!
    
    
    
    sub makeRow 
    {
        my $row = shift ; 
        return "<tr>" .  $row .  "</tr> \n"  ; 
    }
    
    sub makeCell 
    {
        my $cell = shift ; 
        return "<td>"  .  "$cell" . " </td>" ; 
    }
    
    sub makeTable
    {
        my $table = shift ; 
        return "\n <table> " . $table . "</table> \n" ; 
    
    }
    
    sub makeFile
    {
        my $file = shift ; 
    
        $file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ; 
        return $file ; 
    }
    
    
    sub checkCommandLineArgs
    {
    
            if ( @ARGV != $minimumAmountOfCommandLineArgs )
            {
                logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
                die "usage: $0 1 args \n" ; 
                exit 1 ; 
            } #eof if
            else 
            {
                    foreach (@ARGV) { logMsg ( "$_ " ) ;  }
            } #eof else         if ( @ARGV != 4 )
    
    
    } #eof sub checkCommandLineArgs
    
    #log a message according to the DebugLevel
    sub logMsg 
    {
        my $msg = shift ;
        my $importance = shift ; 
    
        my $CurrentPerlScriptName = "$0" ; 
        my $niceMonth = GetANiceMonth (); 
        my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" .  '.log' ; 
        $msg = GetANiceTime () .' --- ' .   $msg . " \n" ; 
    
        if ( $importance == 1 ) 
        {
            $msg = "\n============================================================ \n" . $msg ; 
            $msg = $msg . "============================================================ \n" ;
        }
    
    
        #debug nothing
        if ( $DebugLevel == 0 ) {   return ; } 
    
        #just logMsg the message
        if ( $DebugLevel == 1 ) 
        {
            logMsg ( $msg ); 
        } #eof if ( $DebugLevel == 1 ) 
    
        #logMsg the message in 
        if ( $DebugLevel == 2 ) 
        {
            #READ ALL ROWS OF A FILE TO ALIST 
            open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
            print LOG $msg  ; 
            close LOG;
        }
    
        #logMsg the message in 
        if ( $DebugLevel == 3 ) 
        {
            #READ ALL ROWS OF A FILE TO ALIST 
            open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
            print LOG $msg  ; 
            close LOG;
            print $msg ; 
        } #eof if ( $DebugLevel == 3 ) 
    
    
    } #eof sub logMsg 
    
    #log a message according to the DebugLevel
    sub logErrorMsg 
    {
        my $errorMsg = shift ; 
        my $importance  = shift ; 
    
        my $CurrentPerlScriptName = "$0" ; 
        my $niceMonth = GetANiceMonth (); 
        my $errorLogFile = "$CurrentPerlScriptName" . "_" .  "$niceMonth" . '.error.log' ; 
        $errorMsg = GetANiceTime () .' --- ' .   $errorMsg . " \n" ; 
    
            if ( $importance == 1 ) 
            {
                $errorMsg = "\n============================================================ \n" . $errorMsg ; 
                $errorMsg= $errorMsg. "============================================================ \n" ;
            }
    
    
    
        #debug nothing
        if ( $DebugLevel == 0 ) {   return ; } 
    
        #just logMsg the message
        if ( $DebugLevel == 1 )  {      print $errorMsg ;       } 
    
        #logMsg the message in a error log file
        if ( $DebugLevel == 2 ) 
        {
            #READ ALL ROWS OF A FILE TO ALIST 
            open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
            print ERRLOG $errorMsg ; 
            close ERRLOG;
        }
    
        #logMsg the message in 
        if ( $DebugLevel == 3 ) 
        {
            #READ ALL ROWS OF A FILE TO ALIST 
            open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
            print ERRLOG $errorMsg  ; 
            close ERRLOG;
            print $errorMsg ; 
        } #eof if ( $DebugLevel == 3 ) 
    
    
    } #eof sub logErrorMsg 
    
    
    #GET A NICE TIME 
    sub GetANiceTime {
     # Purpose: returns the time in yyyymmdd-format 
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
     #---- change 'month'- and 'year'-values to correct format ---- 
     $min = "0$min" if ($min < 10); 
     $hour = "0$hour" if ($hour < 10);
     $mon = $mon + 1;
     $mon = "0$mon" if ($mon < 10); 
     $year = $year + 1900;
     $mday = "0$mday" if ($mday < 10); 
     return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec"; 
    } #eof sub GetANiceTime
    
    
    sub GetANiceMonth {
     # Purpose: returns the time in yyyymmdd-format 
     my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
     #---- change 'month'- and 'year'-values to correct format ---- 
     $mon = $mon + 1;
     $mon = "0$mon" if ($mon < 10); 
     $year = $year + 1900;
     return "$year\.$mon" ; 
    } #eof sub GetANiceTime
    
    
    
    sub PrintToFile {
    my $FileOutput = shift ; 
    my $StringToPrint = shift ; 
    #READ ALL ROWS OF A FILE TO ALIST 
    open (FILEOUTPUT, ">$FileOutput") || 
                print "could not open the \$FileOutput $FileOutput!\n"; 
    print  FILEOUTPUT $StringToPrint ; 
    close FILEOUTPUT ;
    
    #debug $strToReturn .=  $StringToPrint; 
    
    }
    # =========================================== eof sub PrintToFile
    
    
    
    #Action !!!
    main();
    
    
    
    
    1 ; 
    
    __END__
    
    #VersionHistory: 
    #1.0. --- Yordan Georgiev --- Initial creation 
    #1.1 --- Yordan Georgiev --- Added conditional logging 
    
    0 讨论(0)
  • 2021-02-13 22:23

    I have had great luck using Spreadsheet::ParseExcel.

    0 讨论(0)
  • 2021-02-13 22:32

    The 'best' way would ideally be to use a module from our beloved CPAN.

    Whenever you have a problem Instantly ask: Why have I not checked CPAN yet?

    Which module however I can't be certain, here is a list to get you started, try them, see what works.

    • Spreadsheet::ParseExcel
    • DBD::Excel
    • XML::Excel
    • File::Extract
    0 讨论(0)
  • 2021-02-13 22:33

    http://search.cpan.org/dist/Spreadsheet-Read/Read.pm

    Spreadsheet::Read tries to transparently read any spreadsheet and return its content in a universal manner independent of the parsing module that does the actual spreadsheet scanning.

    0 讨论(0)
  • 2021-02-13 22:43

    The best way is to use Spreadsheet::ParseExcel.

    Here is an example:

    #!/usr/bin/perl -w
    
    use strict;
    use warnings;
    
    use Spreadsheet::ParseExcel;
    
    my $parser   = Spreadsheet::ParseExcel->new();
    my $workbook = $parser->parse('Book1.xls');
    
    for my $worksheet ( $workbook->worksheets() ) {
    
        my ( $row_min, $row_max ) = $worksheet->row_range();
        my ( $col_min, $col_max ) = $worksheet->col_range();
    
        for my $row ( $row_min .. $row_max ) {
            for my $col ( $col_min .. $col_max ) {
    
                my $cell = $worksheet->get_cell( $row, $col );
                next unless $cell;
    
                print "Row, Col    = ($row, $col)\n";
                print "Value       = ", $cell->value(),       "\n";
                print "Unformatted = ", $cell->unformatted(), "\n";
                print "\n";
            }
        }
    }
    

    To convert an Excel file to text with Perl I'd recommend excel2txt which uses Spreadsheet::ParseExcel.

    0 讨论(0)
提交回复
热议问题