how to convert multiple files to compressed zip file using node js - node.js

I want to convert multiple files to a compressed zip file on node.js.
I tried the following code:
var archiver = require('archiver');
var fs = require('fs');
var StringStream = require('string-stream');
http.createServer(function(request, response) {
var dl = archiver('data');
dl.pipe(response);
dl.append(new fs.createReadStream('test/fixtures/test.txt'), {
name: 'stream.txt', date: testDate2
});
dl.append(new StringStream("Ooh dynamic stuff!"), {
name : 'YoDog/dynamic.txt'
});
dl.finalize(function(err) {
if (err)
res.send(200000)
});
}).listen(3500);

There is a much simpler solution with the archiver module:
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream('./example.zip');
var archive = archiver('zip', {
gzip: true,
zlib: { level: 9 } // Sets the compression level.
});
archive.on('error', function(err) {
throw err;
});
// pipe archive data to the output file
archive.pipe(output);
// append files
archive.file('/path/to/file0.txt', {name: 'file0-or-change-this-whatever.txt'});
archive.file('/path/to/README.md', {name: 'foobar.md'});
// wait for streams to complete
archive.finalize();
It also supports tar archives, just replace 'zip' by 'tar' at line 4.
I get no credit for this code, it's just part of the README (you should check it out for other means of adding stuff into the archive).
Neat package, and it's probably the only one that's still being maintained and documented properly.

For zipping up multiple files, you can use this utility method I wrote with the archiver module:-
var zipLogs = function(working_directory) {
var fs = require('fs');
var path = require('path');
var output = fs.createWriteStream(path.join(working_directory, 'logs.zip'));
var archiver = require('archiver');
var zipArchive = archiver('zip');
zipArchive.pipe(output);
zipArchive.bulk([{src: [path.join(working_directory, '*.log')], expand: true}]);
zipArchive.finalize(function(err, bytes) {
if (err)
throw err;
console.log('done:', base, bytes);
});
}
This for example, zips up all the log files in a particular directory.

To Compress the text file using node js
var fs=require('fs');
var Zlib=require('zlib');
fs.createReadStream('input.txt').pipe(Zlib.createGzip()).pipe(fs.createWriteStream('input.txt.gz'));

Related

Is there a way to get a data from a pdf file and interact it for a web page search? [duplicate]

I have a scenario to automate the PDF contents. How to retrieve the content of the PDF file in nodejs.
I am completely blocked for this. Although there are few posts on pdf2jsona and jsonreader but those are not working for me. Any help will be appreciated for the same.
var pdfParser = new PDFParser();
fs.readFile(pdfFilePath, function(err, pdfBuffer) {
pdfParser.parseBuffer(pdfBuffer);
}, function(pdfBuffer){
pdfParser.parseBuffer(pdfBuffer);
})
Error: Invalid parameter array, need either .data or .url
at FSReqWrap.readFileAfterClose [as oncomplete] (fs.js:445:3)
const fs = require("fs");
const PdfReader = require('pdfreader').PdfReader;
fs.readFile("E://file streaming in node js//demo//read.pdf", (err, pdfBuffer) => {
// pdfBuffer contains the file content
new PdfReader().parseBuffer(pdfBuffer, function(err, item){
if (err)
callback(err);
else if (!item)
callback();
else if (item.text)
console.log(item.text);
});
});
I found the answer and it's working perfectly. Install fs and pdf2json by running the below commands.
npm install pdf2json and npm install fs
var fs = require('fs');
var PDFParser = require('pdf2json');
var path = osHomedir();
var homepath = path.replace(new RegExp('\\' + path.sep, 'g'), '/');
var pdfFilePath = homepath + '/Downloads/' + 'filename.pdf';
if (fs.existsSync(pdfFilePath)) {
//Read the content of the pdf from the downloaded path
var pdfParser = new PDFParser(browser, 1);
pdfParser.on("pdfParser_dataError", function (errData) {
console.error(errData.parserError)
});
pdfParser.on("pdfParser_dataReady", function (pdfData) {
//console.log('here is the content: '+pdfParser.getRawTextContent());
browser.assert.ok(pdfParser.getRawTextContent().indexOf(textToVerify) > -1);
});
pdfParser.loadPDF(pdfFilePath);
} else {
console.log('OOPs file not present in the downloaded folder');
//Throw an error if the file is not found in the path mentioned
browser.assert.ok(fs.existsSync(pdfFilePath));
}

using archiver module with downloadable online links

In a node application, I wish to download a zip file that contains pdfs downloaded from various urls on the internet (where if I type the url into a browser, it just directs me to download a pdf). I've been using the archiver module which is documented on github at https://github.com/archiverjs/node-archiver, and the official documentation is at https://www.archiverjs.com/.
I'm stuck at the part where it gives the following examples for adding files to the zip file.
// append a file from stream
var file1 = __dirname + '/file1.txt';
archive.append(fs.createReadStream(file1), { name: 'file1.txt' });
// append a file from string
archive.append('string cheese!', { name: 'file2.txt' });
// append a file from buffer
var buffer3 = Buffer.from('buff it!');
archive.append(buffer3, { name: 'file3.txt' });
// append a file
archive.file('file1.txt', { name: 'file4.txt' });
// append files from a sub-directory and naming it `new-subdir` within the archive
archive.directory('subdir/', 'new-subdir');
// append files from a sub-directory, putting its contents at the root of archive
archive.directory('subdir/', false);
// append files from a glob pattern
archive.glob('subdir/*.txt');
Unfortunately, it seems just pasting urls into the first parameter of .append or .directory doesn't work - would anyone know how I can add downloadable files (that are online) into the zip file?
sure, using download-pdf first something like that
var download = require('download-pdf')
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream('./example.zip');
var archive = archiver('zip', {
gzip: true,
zlib: { level: 9 } // Sets the compression level.
});
var pdf = "http://www.consejoconsultivoemt.cl/wp-content/uploads/2018/12 /Presentaci%C3%B3n-Lineamientos-Estrat%C3%A9gicos-de-Corfo.pdf"
var pdf2 = "https://www.biobiochile.cl/static/tarifas.pdf"
var options = {
directory: "./files/",
filename: "first.pdf"
}
var options2 = {
directory: "./files/",
filename: "second.pdf"
}
download(pdf, options, function (err) {
if (err) throw err
console.log("meow")
})
download(pdf2, options2, function (err) {
if (err) throw err
console.log("meow2")
})
archive.on('error', function (err) {
throw err;
});
// pipe archive data to the output file
archive.pipe(output);
// append files
archive.file('./files/first.pdf', { name: 'first.pdf' });
archive.file('./files/second.pdf', { name: 'second.pdf' });
archive.finalize();

Create ZIP file in memory using any node module

Is there any node module to create zip in memory?(I don't want to save zip file on disk) so that we can send this created zip file to other server(From memory). What is the best way to do this?
Here is my example:
var file_system = require('fs');
var archiver = require('archiver');
var dirToCompress = 'directoryToZIP';
module.exports = function (req, res, next) {
var archive = archiver.create('zip', {});
archive.on('error', function (err) {
throw err;
});
var output = file_system.createWriteStream('/testDir/myZip.zip',{flags:'a'});//I don't want this line
output.on('close', function () {
console.log(archive.pointer() + ' total bytes');
console.log('archiver has been finalized and the output file descriptor has closed.');
});
archive.pipe(output);
archive.directory(dirToCompress);
archive.finalize();
};
I'm using Adm-Zip:
// create archive
var zip = new AdmZip();
// add in-memory file
var content = "inner content of the file";
zip.addFile("test.txt", Buffer.alloc(content.length, content), "entry comment goes here");
// add file
zip.addLocalFile("/home/me/some_picture.png");
// get in-memory zip
var willSendthis = zip.toBuffer();
If GZip will do, you can use the built-in zlib module and not even have to load an external module.
const gzip = zlib.createGzip();
// If you have an inputStream and an outputStream:
inputStream.pipe(gzip).pipe(outputStream);
For Zip and not GZip, you can check out archiver or zip-stream.

Concat MP3/media audio files on amazon S3 server

I want to concatenate the files uploaded on Amazon S3 server.
How can I do this.
Concatenation on local machine i can do using following code.
var fs = require('fs'),
files = fs.readdirSync('./files'),
clips = [],
stream,
currentfile,
dhh = fs.createWriteStream('./concatfile.mp3');
files.forEach(function (file) {
clips.push(file.substring(0, 6));
});
function main() {
if (!clips.length) {
dhh.end("Done");
return;
}
currentfile = './files/' + clips.shift() + '.mp3';
stream = fs.createReadStream(currentfile);
stream.pipe(dhh, {end: false});
stream.on("end", function() {
main();
});
}
main();
You can achieve what you want by breaking it into two steps:
Manipulating files on s3
Since s3 is a remote file storage, you can't run code on s3 server to do the operation locally (as #Andrey mentioned).
what you will need to do in your code is to fetch each input file, process them locally and upload the results back to s3. checkout the code examples from amazon:
var s3 = new AWS.S3();
var params = {Bucket: 'myBucket', Key: 'mp3-input1.mp3'};
var file = require('fs').createWriteStream('/path/to/input.mp3');
s3.getObject(params).createReadStream().pipe(file);
at this stage you'll run your concatenation code, and upload the results back:
var fs = require('fs');
var zlib = require('zlib');
var body = fs.createReadStream('bigfile.mp3').pipe(zlib.createGzip());
var s3obj = new AWS.S3({params: {Bucket: 'myBucket', Key: 'myKey'}});
s3obj.upload({Body: body}).
on('httpUploadProgress', function(evt) { console.log(evt); }).
send(function(err, data) { console.log(err, data) });
Merging two (or more) mp3 files
Since MP3 file include a header that specifies some information like bitrate, simply concatenating them together might introduce playback issues.
See: https://stackoverflow.com/a/5364985/1265980
what you want to use a tool to that. you can have one approach of saving your input mp3 files in tmp folder, and executing an external program like to change the bitrate, contcatenate files and fix the header.
alternatively you can use an library that allows you to use ffmpeg within node.js.
in their code example shown, you can see how their merge two files together within the node api.
ffmpeg('/path/to/part1.avi')
.input('/path/to/part2.avi')
.input('/path/to/part2.avi')
.on('error', function(err) {
console.log('An error occurred: ' + err.message);
})
.on('end', function() {
console.log('Merging finished !');
})
.mergeToFile('/path/to/merged.avi', '/path/to/tempDir');
Here's my quick take on the problem of downloading and processing S3 objects. My example is focused mostly on getting the data local and then processing it once it's all downloaded. I suggest you use one of the ffmpeg approaches mentioned above.
var RSVP = require('rsvp');
var s3 = new AWS.S3();
var bucket = '<your bucket name>';
var getFile = function(key, filePath) {
return new RSVP.Promise(function(resolve, reject) {
var file = require('fs').createWriteStream(filePath);
if(!file) {
reject('unable to open file');
}
s3.getObject({
Bucket: bucket,
Key: key
}).on('httpData', function(chunk) {
file.write(chunk);
}).on('httpDone', function() {
file.end();
resolve(filePath);
});
});
};
var tempFiles = ['<local temp filename 1>', '<local temp filename 2>'];
var keys = ['<s3 object key 1>', '<s3 object key 2>'];
var promises = [];
for(var i = 0; i < keys.length; ++i) {
var promise = getFile(keys[i], tempFiles[i]);
promises.push(promise);
}
RSVP.all(promises).then(function(data) {
//do something with your files
}).catch(function(error) {
//handle errors
});

Compressing multiple files using zlib

The below code will compress one file. How can I compress multiple files
var gzip = zlib.createGzip();
var fs = require('fs');
var inp = fs.createReadStream('input.txt');
var out = fs.createWriteStream('input.txt.gz');
inp.pipe(gzip).pipe(out);
Gzip is an algorithm that compresses a string of data. It knows nothing about files or folders and so can't do what you want by itself. What you can do is use an archiver tool to build a single archive file, and then use gzip to compress the data that makes up the archive:
https://github.com/mafintosh/tar-stream
Also see this answer for more information: Node.js - Zip/Unzip a folder
The best package for this (and the only one still maintained and properly documented) seems to be archiver
var fs = require('fs');
var archiver = require('archiver');
var output = fs.createWriteStream('./example.tar.gz');
var archive = archiver('tar', {
gzip: true,
zlib: { level: 9 } // Sets the compression level.
});
archive.on('error', function(err) {
throw err;
});
// pipe archive data to the output file
archive.pipe(output);
// append files
archive.file('/path/to/file0.txt', {name: 'file0-or-change-this-whatever.txt'});
archive.file('/path/to/README.md', {name: 'foobar.md'});
// Wait for streams to complete
archive.finalize();
module.exports = class ZlibModule{
listOfImages = ['a.png','b.jpg'];
fs = require('fs')
zlib = require('zlib');
constructor(req,res){
this.req = req;
this.res = res;
}
compressOperation(){
this.listOfImages.forEach((value,index) => {
const readStream = this.fs.createReadStream(rootDirectory+'/modules/zlibModule/images/'+value);
const writeStream = this.fs.createWriteStream(rootDirectory+'/modules/zlibModule/'+value+'.gz');
const compress = this.zlib.createGzip()
readStream
.on('error',(error)=>{
this.res.end(`${error.path} path does't exist`)
})
.pipe(compress)
.pipe(writeStream)
.on('error',(error)=>{
console.log(error);
this.res.end(`Something went to wrong`)
})
.on('finish',()=>{
if(index+1 === this.listOfImages.length) this.res.end("Done");
})
});
}
}
Something that you could use:
var listOfFiles = ['firstFile.txt', 'secondFile.txt', 'thirdFile.txt'];
function compressFile(filename, callback) {
var compress = zlib.createGzip(),
input = fs.createReadStream(filename),
output = fs.createWriteStream(filename + '.gz');
input.pipe(compress).pipe(output);
if (callback) {
output.on('end', callback);
}
}
#1 Method:
// Dummy, just compress the files, no control at all;
listOfFiles.forEach(compressFile);
#2 Method:
// Compress the files in waterfall and run a function in the end if it exists
function getNext(callback) {
if (listOfFiles.length) {
compressFile(listOfFiles.shift(), function () {
getNext(callback);
});
} else if (callback) {
callback();
}
}
getNext(function () {
console.log('File compression ended');
});

Resources