Excel::Writer::XLSX Unreadable content error - excel

I just started using Perl and I am using
Excel::Writer::XLSX
to query a DB2 database and export the data to an .xlsx file. The data is about 250k rows.
The script is running fine, but when I try to open the Excel file it throws an error and asks to repair the file. Upon repairing some of the data gets replaced by inf.
Below is a snippet from my code.
while ( my $sqlStatement = ) {
$mSQL = $dbh->prepare( $sqlStatement )
or die "Can't prepare $sqlStatement";
$mSQL->execute()
or die "Can't execute $sqlStatement";
}
my $workbook = Excel::Writer::XLSX->new( $ARGV[2] );
$workbook->set_tempdir( '/tempDir/' );
$workbook->set_optimization();
my $worksheet = $workbook->add_worksheet();
$worksheet->keep_leading_zeros();
my $row = 0;
my $column = 0;
my #emptyRow = ();
$worksheet->write_row( $row++, $column, [ #{ $mSQL->{NAME_uc} } ] );
$worksheet->write_row( $row++, $column, [ #emptyRow ] );
while ( my #Row = $mSQL->fetchrow_array ) {
$worksheet->write_row( $row++, $column, [ #Row ] ); #, $cellFormat);
$count++;
}
$workbook->close();
Can someone please advise me on this issue?

Finally i figured it out (Thanks to John McNamara). This was resolved by adding a write handler that uses regular expressions to check if a particular token is being converted to "inf", and if it does, it invokes the write_string subroutine instead of write_row.
Below is the code.
#!/usr/bin/perl
use strict;
use warnings;
use Excel::Writer::XLSX;
my $workbook = Excel::Writer::XLSX->new( 'write_handler5.xlsx' );
my $worksheet = $workbook->add_worksheet();
# Add a handler to match any numbers in order to check for and handle
# infinity.
$worksheet->add_write_handler( qr[\d], \&write_with_infinity );
# The following function is used by write() to pre-process any the data when a
# match is found. If it finds something that looks like a number but evaluates
# to infinity it write it as a string.
sub write_with_infinity {
my $worksheet = shift;
my #args = #_;
my $token = $args[2];
# Check if token looks like a number, in the same way as write().
if ( $token =~ /^([+-]?)(?=[0-9]|\.[0-9])[0-9]*(\.[0-9]*)?([Ee]([+-]?[0-9]+))?$/ ) {
# Check for infinity.
$token = $token + 0;
if ($token =~ /inf/) {
# Write the value as a string instead of a number.
return $worksheet->write_string( #args );
}
}
# Reject the match and return control to write()
return undef;
}

Related

Chunk and excel Laravel

guys.
I need help with this code. I need to download an excel with arround 550000 records. So i was wondering how can i use eloquent with chunk, skip and limit and get to make the excel or do something better than this. I started to making some code to save excel in a folder and then download in a zip, but i cant make works chunk and get.
I had problems with time execution and memory limit, but thats no more a problem.
This is my code.
$x=1;
for ($i=0; $i<=550000; $i=$i+15000) {
$personas="";
$personas = DB::table("cat_personas as c")->select("c.*","s.seccion","ca.casilla")
->leftJoin("cat_casillas as ca","c.cat_casilla_id","=","ca.id")
->join("cat_seccion as s","c.cat_seccion_id","=","s.id")
->where($filtros)
->whereRaw("c.id NOT IN ( SELECT cruzado FROM registro_persona )")
->whereRaw($whereCadena)
->orderby('c.consecutivo')
->orderby('c.apellido_paterno')
->orderby('c.apellido_materno')
->orderby('c.nombre')
->orderby('s.seccion')
->orderby('ca.casilla')
->skip($i)->limit(15000);
//$personas=$personas->get();
dd($personas->count());
if($personas->count()>0){
$spreadsheet = new Spreadsheet();
$r=1;
$sheet = $spreadsheet->getActiveSheet()
->setCellValue('A'.$r, '#')
->setCellValue('B'.$r, 'NOMBRE')
->setCellValue('C'.$r, 'APELLIDO PATERNO')
->setCellValue('D'.$r, 'APELLIDO MATERNO')
->setCellValue('E'.$r, 'SECCION')
->setCellValue('F'.$r, 'CASILLA')
->setCellValue('G'.$r, 'CONSECUTIVO');
$r++;
$personas->chunk(5000, function($personas) use (&$spreadsheet,&$r,&$sheet) {
$c=1;
//dd($personas->count());
foreach ($personas as $key) {
$sheet = $spreadsheet->getActiveSheet()
->setCellValue('A'.$r, $c)
->setCellValue('B'.$r, $key->nombre)
->setCellValue('C'.$r, $key->apellido_paterno)
->set`enter code here`CellValue('D'.$r, $key->apellido_materno)
->setCellValue('E'.$r, $key->seccion)
->setCellValue('F'.$r, $key->casilla)
->setCellValue('G'.$r, $key->consecutivo);
$r++;
$c++;
}
});
$writer = new Xlsx($spreadsheet);
//header('Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet');
//header('Content-Disposition: attachment; filename="personas.xlsx"');
$writer->save($path."/personas$x.xlsx");
$x++;
}
}
You can use chunk() this way.
$personas = DB::table("cat_personas as c")->select("c.*","s.seccion","ca.casilla")
->leftJoin("cat_casillas as ca","c.cat_casilla_id","=","ca.id")
->join("cat_seccion as s","c.cat_seccion_id","=","s.id")
->where($filtros)
->whereRaw("c.id NOT IN ( SELECT cruzado FROM registro_persona )")
->whereRaw($whereCadena)
->orderby('c.consecutivo')
->orderby('c.apellido_paterno')
->orderby('c.apellido_materno')
->orderby('c.nombre')
->orderby('s.seccion')
->orderby('ca.casilla');
$spreadsheet = new Spreadsheet();
$r = 1;
$sheet = $spreadsheet->getActiveSheet()
->setCellValue('A'.$r, '#')
->setCellValue('B'.$r, 'NOMBRE')
->setCellValue('C'.$r, 'APELLIDO PATERNO')
->setCellValue('D'.$r, 'APELLIDO MATERNO')
->setCellValue('E'.$r, 'SECCION')
->setCellValue('F'.$r, 'CASILLA')
->setCellValue('G'.$r, 'CONSECUTIVO');
$r++;
$c = 1;
$personas->chunk(500, function($personae) use ($spreadsheet, $r, $c) {
foreach ($personae as $persona) {
$spreadsheet->getActiveSheet()
->setCellValue('A'.$r, $c)
->setCellValue('B'.$r, $persona->nombre)
->setCellValue('C'.$r, $persona->apellido_paterno)
->setCellValue('D'.$r, $persona->apellido_materno)
->setCellValue('E'.$r, $persona->seccion)
->setCellValue('F'.$r, $persona->casilla)
->setCellValue('G'.$r, $persona->consecutivo);
$r++;
$c++;
}
});
$writer = new Xlsx($spreadsheet);
$writer->save($path . "/personas.xlsx");
There might be an issue with the total size of the spreadsheet, but if so, this should be a good start towards finding that solution.
PS - This is untested.

How to Flatten / Recompile Excel Spreadsheet Using sheetjs or exceljs on Write

We use excel as a configuration file for clients. However, our processes only run on linux servers. We need to take a master file, update all the client workbooks with the new information, and commit to GitLab. The users then check it out, add their own changes, commit back to GitLab and a process promotes the workbook to Server A.
This process works great using nodeJS (exceljs)
Another process on a different server is using perl to pick up the workbook and then saves each sheet as a csv file.
The problem is, what gets written out is the data from the ORIGINAL worksheet and not the updated changes. This is true of both perl and nodejs. Code for perl and nodejs xlsx to csv is at the end of the post.
Modules Tried:
perl : Spreadsheet::ParseExcel; Spreadsheet::XLSX;
nodejs: node-xlsx, exceljs
I assume it has to do with Microsoft using XML inside the excel wrapper, it keeps the old version as history and since it was the original sheet name, it gets pulled instead of the updated latest version.
When I manually open in Excel, everything is correct with the new info as expected.
When I use "Save as..." instead of "Save" then the perl process is able to correctly write out the updated worksheet as csv. So our workaround is having the users always "Save as.." before committing their extra changes to GitLab. We'd like to rely on training, but the sheer number of users and clients makes trusting that the user will "Save AS..." is not practical.
Is there a way to replicate a "Save As..." during my promotion to Server A or at least be able to tell if the file had been saved correctly? I'd like to stick with excelJS, but I'll use whatever is necessary to replicate the "Save as..." which seems to recompile the workbook.
In addition to nodejs, I can use perl, python, ruby - whatever it takes - to make sure the csv creation process picks up the new changes.
Thanks for your time and help.
#!/usr/bin/env perl
use strict;
use warnings;
use Carp;
use Getopt::Long;
use Pod::Usage;
use File::Basename qw/fileparse/;
use File::Spec;
use Spreadsheet::ParseExcel;
use Spreadsheet::XLSX;
use Getopt::Std;
my %args = ();
my $help = undef;
GetOptions(
\%args,
'excel=s',
'sheet=s',
'man|help'=>\$help,
) or die pod2usage(1);
pod2usage(1) if $help;
pod2usage(-verbose=>2, exitstatus=>0, output=>\*STDOUT) unless $args{excel} || $args{sheet};
pod2usage(3) if $help;
pod2usage(-verbose=>2, exitstatus=>3, output=>\*STDOUT) unless $args{excel};
if (_getSuffix($args{excel}) eq ".xls") {
my $file = File::Spec->rel2abs($args{excel});
if (-e $file) {
print _XLS(file=>$file, sheet=>$args{sheet});
} else {
exit 1;
die "Error: Can not find excel file. Please check for exact excel file name and location. \nError: This Program is CASE SENSITIVE. \n";
}
}
elsif (_getSuffix($args{excel}) eq ".xlsx") {
my $file = File::Spec->rel2abs($args{excel});
if (-e $file) {
print _XLSX(file=>$file, sheet=>$args{sheet});
}
else {
exit 1;
die "\nError: Can not find excel file. Please check for exact excel file name and location. \nError: This Program is CASE SENSITIVE.\n";
}
}
else {
exit 5;
}
sub _XLS {
my %opts = (
file => undef,
sheet => undef,
#_,
);
my $aggregated = ();
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse($opts{file});
if (!defined $workbook) {
exit 3;
croak "Error: Workbook not found";
}
foreach my $worksheet ($workbook->worksheet($opts{sheet})) {
if (!defined $worksheet) {
exit 2;
croak "\nError: Worksheet name doesn't exist in the Excel File. Please check the WorkSheet Name. \nError: This program is CASE SENSITIVE.\n\n";
}
my ($row_min, $row_max) = $worksheet->row_range();
my ($col_min, $col_max) = $worksheet->col_range();
foreach my $row ($row_min .. $row_max){
foreach my $col ($col_min .. $col_max){
my $cell = $worksheet->get_cell($row, $col);
if ($cell) {
$aggregated .= $cell->value().',';
}
else {
$aggregated .= ',';
}
}
$aggregated .= "\n";
}
}
return $aggregated;
}
sub _XLSX {
eval {
my %opts = (
file => undef,
sheet => undef,
#_,
);
my $aggregated_x = ();
my $excel = Spreadsheet::XLSX->new($opts{file});
foreach my $sheet ($excel->worksheet($opts{sheet})) {
if (!defined $sheet) {
exit 2;
croak "Error: WorkSheet not found";
}
if ( $sheet->{Name} eq $opts{sheet}) {
$sheet->{MaxRow} ||= $sheet->{MinRow};
foreach my $row ($sheet->{MinRow} .. $sheet->{MaxRow}) {
$sheet->{MaxCol} ||= $sheet->{MinCol};
foreach my $col ($sheet->{MinCol} .. $sheet->{MaxCol}) {
my $cell = $sheet->{Cells}->[$row]->[$col];
if ($cell) {
$aggregated_x .= $cell->{Val}.',';
}
else {
$aggregated_x .= ',';
}
}
$aggregated_x .= "\n";
}
}
}
return $aggregated_x;
}
};
if ($#) {
exit 3;
}
sub _getSuffix {
my $f = shift;
my ($basename, $dirname, $ext) = fileparse($f, qr/\.[^\.]*$/);
return $ext;
}
sub _convertlwr{
my $f = shift;
my ($basename, $dirname, $ext) = fileparse($f, qr/\.[^\.]*$/);
return $ext;
}
var xlsx = require('node-xlsx')
var fs = require('fs')
var obj = xlsx.parse(__dirname + '/test2.xlsx') // parses a file
var rows = []
var writeStr = ""
//looping through all sheets
for(var i = 0; i < obj.length; i++)
{
var sheet = obj[i]
//loop through all rows in the sheet
for(var j = 0; j < sheet['data'].length; j++)
{
//add the row to the rows array
rows.push(sheet['data'][j])
}
}
//creates the csv string to write it to a file
for(var i = 0; i < rows.length; i++)
{
writeStr += rows[i].join(",") + "\n"
}
//writes to a file, but you will presumably send the csv as a
//response instead
fs.writeFile(__dirname + "/test2.csv", writeStr, function(err) {
if(err) {
return console.log(err)
}
console.log("test.csv was saved in the current directory!")
The answer is its impossible. In order to update data inside a workbook that has excel functions, you must open it in Excel for the formulas to trigger. It's that simple.
You could pull the workbook apart, create your own javascript functions, run the data through it and then write it out, but there are so many possible issues that it is not recommended.
Perhaps one day Microsoft will release a linux Excel engine API for linux. But its still unlikely that such a thing would work via command line without invoking the GUI.

Converting XLSX to CSV with Perl while maintaining the encoding

I'm a BI developer working with perl scripts as my ETL - I receive data over email, take the file, parse it and push it into the DB.
Most of the files are CSV, but occasionally I have an XLSX file.
I've been using Spreadsheet::XLSX to convert, but I've noticed that the CSV output comes out with the wrong encoding (needs to be UTF8, because accents and foreign languages).
That's the sub I'm using ($input_file is an Excel file), but I keep getting the data with the wrong characters.
WHAT am I missing?
Thanks a lot all!
sub convert_to_csv {
my $input_file = $_[0];
my ( $filename, $extension ) = split( '\.', $input_file );
open( format_file, ">:**encoding(utf-8)**", "$filename.csv" ) or die "could not open out file $!\n";
my $excel = Spreadsheet::XLSX->new($input_file);
my $line;
foreach my $sheet ( #{ $excel->{Worksheet} } ) {
#printf( "Sheet: %s\n", $sheet->{Name} );
$sheet->{MaxRow} ||= $sheet->{MinRow};
foreach my $row ( $sheet->{MinRow} .. $sheet->{MaxRow} ) {
$sheet->{MaxCol} ||= $sheet->{MinCol};
foreach my $col ( $sheet->{MinCol} .. $sheet->{MaxCol} ) {
my $cell = $sheet->{Cells}[$row][$col];
if ($cell) {
my $trimcell;
$trimcell = $cell->value();
print STDERR "cell: $trimcell\n"; ## Just for the tests so I don't have to open the file to see if it's ok
$trimcell =~ s/^\s+|\s+$//g; ## Just to make sure I don't have extra spaces
$line .= "\"" . $trimcell . "\",";
}
}
chomp($line);
if ($line =~ /Grand Total/){} ##customized for the files
else {
print format_file "$line\n";
$line = '';
}
}
}
close format_file;
}
My knowledge is from using ETL::Pipeline and it uses Spreadsheet::XLSX for reading .xlsx-files.
But I know which fields are UTF-8
I wrote a Local ETL::Pipeline module to handle output for Excel files
use Encode qw(decode encode);
$ra_rec->{name} = decode( 'UTF-8', $ra_rec->{name}, Encode::FB_CROAK );

unable to add new rows in an excel file using xlsx module in perl

i am trying to append new rows in an excel file, only the last data is present , since it overwrites any previous data written , in this code i have a for loop that loops again to write the same data in a second row but , instead it rewrites the existing data
use strict;
use warnings;
use Excel::Writer::XLSX;
my $workbook = Excel::Writer::XLSX->new( 'ke.xlsx' );#create the excel
my $worksheet = $workbook->add_worksheet();
my $server= "se";
my $domain = "de";
my $backup = "b";
# Some sample data for the table.
my $data = [[ $server, $domain ,$backup],
];
$worksheet->set_column( 'A:G', 20 );
for(my $i = 0 ;$i<2;$i++){
$worksheet->add_table( # Add a table to the worksheet.
'A1:G8',
{
data => $data,
total_row => 1,
columns => [
{
header => 'server name',
},
{
header => 'Domain Name',
},
{
header => 'Back Up address',
}
]
}
);
}
Your code writes the table at A1:G8 each time. You need to change that if you want the tables in different places.
Seems to work for me using sample data:
use strict;
use warnings;
use Excel::Writer::XLSX;
my $workbook = Excel::Writer::XLSX->new('ke.xlsx' );#create the excel
my $worksheet = $workbook->add_worksheet();
#my $server= "se";
#my $domain = "de";
#my $backup = "b";
# Some sample data for the table.
#my $data = [[ $server, $domain ,$backup],
# ];
my $data = [ ["se", "de","b"],
["ll", "pp","t"],
["yu", "ar","e"],
["gt", "po","w"],
["br", "tp","g"] ];
$worksheet->set_column( 'A:G', 20 );
for(my $i = 0 ;$i<2;$i++){
$worksheet->add_table( # Add a table to the worksheet.
'A1:G8',
{
data => $data,
total_row => 1,
columns => [
{
header => 'server name',
},
{
header => 'Domain Name',
},
{
header => 'Back Up address',
}
]
}
);
}
I have worked on this module, however we did not used add_table instead we used $worksheet->write_row.
I think you should try writing your data using write_row
https://metacpan.org/pod/Excel::Writer::XLSX
If you don't want to use write_row, then for more examples on add_table please visit : https://metacpan.org/pod/Excel::Writer::XLSX#TABLES-IN-EXCEL
Hope that helps :)

Change thread priority ERROR_INVALID_HANDLE

I'm trying to change a thread priority within my script, without success, here are the details.
$thr = threads->new(\&someFunction,
$shared variable 1,
$shared variable 2,
);
I've tried using threads::State;
$thr->priority(2);
Without success
So, I thought the Win32::API must work
my $functionGetLastError= Win32::API->new('Kernel32',
'GetLastError',
'',
'N'
);
my $functionSetThreadPriority= Win32::API->new('Kernel32',
'SetThreadPriority',
'II', # I've tried 'PI' and 'II' as well
'N'
);
my $h = $thr->_handle();
my $success = $functionSetThreadPriority->Call( $h, 2 );
warn "Return Error #".$functionGetLastError->Call() if !$success;
Again, without success: (, but now I have a clue, the script return error number
last Error 6
From MSDN site, System Error Codes (0-499), it seems that the error is
ERROR_INVALID_HANDLE
What am I doing wrong?
$thread->_handle weirdly returns a HANDLE*, while SetThreadPriority expects a HANDLE. You need to dereference the pointer, which you can do as follows:
use constant THREAD_PRIORITY_HIGHEST => 2;
sub SetThreadPriority {
my ($thread, $priority) = #_;
# $thread->_handle() returns a HANDLE*.
my $handle_ptr = $thread->_handle();
my $packed_handle = unpack('P'.HANDLE_SIZE, pack(PTR_FORMAT, $handle_ptr));
my $handle = unpack(HANDLE_FORMAT, $packed_handle);
state $SetThreadPriority = (
Win32::API->new('Kernel32', 'SetThreadPriority', 'Ni', 'i')
or die("Loading SetThreadPriority: $^E\n")
);
return $SetThreadPriority->Call($handle, $priority);
}
Here's the full test program:
use strict;
use warnings;
use feature qw( say state );
use threads;
use threads::shared;
use Carp qw( croak );
use Config qw( %Config );
use Win32::API qw( );
sub uint_format {
$_[0] == 4 ? 'L'
: $_[0] == 8 ? 'Q'
: croak("Unsupported")
}
use constant PTR_SIZE => $Config{ptrsize};
use constant PTR_FORMAT => uint_format(PTR_SIZE);
use constant HANDLE_SIZE => PTR_SIZE;
use constant HANDLE_FORMAT => PTR_FORMAT;
use constant THREAD_PRIORITY_HIGHEST => 2;
sub SetThreadPriority {
my ($thread, $priority) = #_;
# $thread->_handle() returns a HANDLE*.
my $handle_ptr = $thread->_handle();
my $packed_handle = unpack('P'.HANDLE_SIZE, pack(PTR_FORMAT, $handle_ptr));
my $handle = unpack(HANDLE_FORMAT, $packed_handle);
state $SetThreadPriority = (
Win32::API->new('Kernel32', 'SetThreadPriority', 'Ni', 'i')
or die("Loading SetThreadPriority: $^E\n")
);
return $SetThreadPriority->Call($handle, $priority);
}
{
my $done :shared = 0;
my $thread = async {
{ lock($done); cond_wait($done) while !$done; }
};
my $rv = SetThreadPriority($thread, THREAD_PRIORITY_HIGHEST);
say $rv ? "Success" : "Error: $^E";
{ lock($done); $done = 1; cond_broadcast($done); }
$thread->join();
}
Notice that you can use $^E to access GetLastError.
SetThreadPriority($handle, THREAD_PRIORITY_HIGHEST)
or die("SetThreadPriority: $^E\n";
ERROR_INVALID_HANDLE
Which suggests that what _handle returns is not something Win32::API understands. I suspect "P" wants a string buffer not an integer-casted pointer. "I" may be the wrong thing because it's the wrong size on 64-bit, I would try "N" myself.
Also, for future readers running into this issue on Unix: try my POSIX::RT::Scheduler module.

Resources