fs.createWriteStream does not immediately create file? - node.js

I have made a simple download from http function as below (error handling is omitted for simplifcation):
function download(url, tempFilepath, filepath, callback) {
var tempFile = fs.createWriteStream(tempFilepath);
http.request(url, function(res) {
res.on('data', function(chunk) {
tempFile.write(chunk);
}).on('end', function() {
tempFile.end();
fs.renameSync(tempFile.path, filepath);
return callback(filepath);
})
});
}
However, as I call download() tens of times asynchronously, it seldom reports error on fs.renameSync complaining it cannot find file at tempFile.path.
Error: ENOENT, no such file or directory 'xxx'
I used the same list of urls to test it, and it failed about 30% of time. The same list of urls worked when downloaded one by one.
Testing some more, I found out that the following code
fs.createWriteStream('anypath');
console.log(fs.exist('anypath'));
console.log(fs.exist('anypath'));
console.log(fs.exist('anypath'));
does not always print true, but sometimes the first answer prints false.
I am suspecting that too many asynchronous fs.createWriteStream calls cannot guarantee the file creation. Is this true? Are there any methods to guarantee file creation?

You shouldn't call write on your tempFile write stream until you've received the 'open' event from the stream. The file won't exist until you see that event.
For your function:
function download(url, tempFilepath, filepath, callback) {
var tempFile = fs.createWriteStream(tempFilepath);
tempFile.on('open', function(fd) {
http.request(url, function(res) {
res.on('data', function(chunk) {
tempFile.write(chunk);
}).on('end', function() {
tempFile.end();
fs.renameSync(tempFile.path, filepath);
return callback(filepath);
});
});
});
}
For your test:
var ws = fs.createWriteStream('anypath');
ws.on('open', function(fd) {
console.log(fs.existsSync('anypath'));
console.log(fs.existsSync('anypath'));
console.log(fs.existsSync('anypath'));
});

The accepted answer didn't download some of the last bytes for me.
Here's a Q version that works correctly (but without the temp file).
'use strict';
var fs = require('fs'),
http = require('http'),
path = require('path'),
Q = require('q');
function download(url, filepath) {
var fileStream = fs.createWriteStream(filepath),
deferred = Q.defer();
fileStream.on('open', function () {
http.get(url, function (res) {
res.on('error', function (err) {
deferred.reject(err);
});
res.pipe(fileStream);
});
}).on('error', function (err) {
deferred.reject(err);
}).on('finish', function () {
deferred.resolve(filepath);
});
return deferred.promise;
}
module.exports = {
'download': download
};
Note I'm listening to finish on file stream instead of end on response.

Here is what I use to get it done:
function download(url, dest) {
return new Promise((resolve, reject) => {
http.get(url, (res) => {
if (res.statusCode !== 200) {
var err = new Error('File couldn\'t be retrieved');
err.status = res.statusCode;
return reject(err);
}
var chunks = [];
res.setEncoding('binary');
res.on('data', (chunk) => {
chunks += chunk;
}).on('end', () => {
var stream = fs.createWriteStream(dest);
stream.write(chunks, 'binary');
stream.on('finish', () => {
resolve('File Saved !');
});
res.pipe(stream);
})
}).on('error', (e) => {
console.log("Error: " + e);
reject(e.message);
});
})
};

I am working on uploading and downloading file (docx, pdf, text, etc) through nodejs request-promise and request libraries.
Problem with request-promise is that they don't promisify pipe method from request package. Hence, we need to do it in the old way.
I was able to come up with the hybrid solution, where I was able to use async/await and Promise() at same time. Here is the example:
/**
* Downloads the file.
* #param {string} fileId : File id to be downloaded.
* #param {string} downloadFileName : File name to be downloaded.
* #param {string} downloadLocation : File location where it will be downloaded.
* #param {number} version : [Optional] version of the file to be downloaded.
* #returns {string}: Downloaded file's absolute path.
*/
const getFile = async (fileId, downloadFileName, downloadLocation, version = undefined) => {
try {
const url = version ? `http://localhost:3000/files/${fileId}?version=${version}` :
`${config.dms.url}/files/${fileUuid}`;
const fileOutputPath = path.join(downloadLocation, fileName);
const options = {
method: 'GET',
url: url,
headers: {
'content-type': 'application/json',
},
resolveWithFullResponse: true
}
// Download the file and return the full downloaded file path.
const downloadedFilePath = writeTheFileIntoDirectory(options, fileOutputPath);
return downloadedFilePath;
} catch (error) {
console.log(error);
}
};
As you can see in above getFile method, we are using latest ES supported async/await functionality for asynchronous programming. Now, lets look into writeTheFileIntoDirectory method.
/**
* Makes REST API request and writes the file to the location provided.
* #param {object} options : Request option to make REST API request.
* #param {string} fileOutputPath : Downloaded file's absolute path.
*/
const writeTheFileIntoDirectory = (options, fileOutputPath) => {
return new Promise((resolve, reject) => {
// Get file downloaded.
const stream = fs.createWriteStream(fileOutputPath);
return request
.get(options.url, options, (err, res, body) => {
if (res.statusCode < 200 || res.statusCode >= 400) {
const bodyObj = JSON.parse(body);
const error = bodyObj.error;
error.statusCode = res.statusCode;
return reject(error);
}
})
.on('error', error => reject(error))
.pipe(stream)
.on('close', () => resolve(fileOutputPath));
});
}
The beauty of nodejs is that it support backward compatibility of different asynchronous implementation. If a method is returning promise, then await will be kicked in and will wait for the method to be completed.
Above writeTheFileIntoDirectory method will download the file and will return positively when the stream is closed successfully, else it will return error.

Related

node.js save http response as pdf file

I have AWS Lambda function that return pdf file like arr.
I want to call function and save pdf file, but after saving I can't open it, it brocken. I cant undestand why, i tried differente ways to create pdf, by the way i can get arr and using online converter convert arr to file, and its work, but when i create pdf file using node code its always broken. I tried internal node moduls like fs, and external like pdfkit.
`const https = require('https');
const fs = require('fs');
const PDFDocument = require('pdfkit');
const options = {
host: 'uek9w0hztc.execute-api.eu-north-1.amazonaws.com',
path: '/pdfmaker',
method: 'POST',
headers: {
url: 'https://www.linkedin.com'
}
}
const req = https.request(options, res => {
let data = [];
const headerDate = res.headers && res.headers.date ? res.headers.date : 'no response date';
console.log('Status Code:', res.statusCode);
console.log('Date in Response header:', headerDate);
res.on('data', chunk => {
data.push(chunk);
});
res.on('end', () => {
console.log('Response ended: ');
// fs.writeFileSync('index.pdf', Buffer.from(data));
// fs.writeFileSync("index_v2.pdf", Buffer.from(data), 'binary', (err) => {
// if(err) {
// return console.log(err);
// }
// console.log("The file was saved!");
// });
// const doc = new PDFDocument();
// doc.pipe(fs.createWriteStream('output.pdf'));
let writeStream = fs.createWriteStream('pdf123.pdf')
writeStream.once('open', (fd) =>{
writeStream.write(new Buffer.from(data, 'binary'))
writeStream.on('finish', () => {
console.log('wrote all data to file');
});
writeStream.end()
})
});
}).on('error', err => {
console.log('Error: ', err.message);
});
req.end();`
I tried internal node moduls like fs, and external like pdfkit.
I expect someone give me a hint where the problem is.

NodeJS cannot return data from busboy finish event

I am currently trying to develop a google cloud function to parse multipart files (excel format or csv) in order to populate the firestore database.
I am using busboy in a helper function to parse the file, convert it to json and return it to the main function.
Everything goes well until I am trying to return the parsed data. I thought the most logic way of doing was to return the data from the busboy 'finish' event but it seems not to return the data as once back in the main function it is undefined. I first thought of some issue related to asynchronous code execution but when I tried to only print the data in the busboy finish event it worked properly.
I've tried to find some related content online but unfortunately didnt success. Here is my helper function :
// Takes a multipart request and sends back redable data
const processRequest = (req) => {
const busboy = Busboy({headers: req.headers});
formats = ['application/vnd.ms-excel', 'text/csv', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'];
var finalData;
// fieldname is the request key name of the file
// file is the stream
// fname is the name of the fileq
busboy.on('file', (fieldname, file, fname) => {
// Checks if file is right format
if(!formats.includes(fname.mimeType)) throw new FileFormatError('File must be excel or csv');
bytes = [];
// Checks that the request key is the right one
if(fieldname == 'file') {
// Data is the actual bytes, adds it to the buffer each time received
file.on('data', (data) => {
bytes.push(data);
});
// Concatenates the bytes into a buffer and reads data given mimetype
file.on('end', async () => {
buffer = Buffer.concat(bytes);
if(fname.mimeType === 'application/vnd.ms-excel' ||
fname.mimeType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet') {
workbook = XLSX.read(buffer, {type: 'buffer'});
json = excelToJson(workbook);
console.log(json);
finalData = json;
}
if (fname.mimeType === 'text/csv') {
var csv = [];
const stream = Readable.from(buffer.toString());
stream.pipe(CSV.parse({delimiter: ','}))
.on('error', (err) => {
console.log('csv parsing error');
console.log(err.message);
}).on('data', (row) => {
csv.push(row);
}).on('end', () => {
console.log('csv file properly processed');
console.log(csv);
// CSV PARSING LOGIC TO COME, JUST TESTING RIGHT NOW
finalData = csv;
});
}
});
}
});
busboy.on('finish', () => {
console.log('busboy finished');
return finalData;
// WHEN ONLY PRINTED THE DATA IS PRESENT AND DISPLAYS PROPERLY HERE
})
// Processes request body bytes
busboy.end(req.rawBody);
}
There must be something I am misunderstanding but as of yet I cannot point out what.
Thanks in advance for your time :)
You're not waiting for your CSV parsing to actually finish.
It would be better to refactor your async code to use async/await.
Since you're using libraries that might only support callback-style async, you'll need to do some new Promise wrapping yourself.
Understandably, I haven't tested the below code, but something like this...
/**
* Parse the given buffer as a CSV, return a promise of rows
*/
function parseCSV(buffer) {
return new Promise((resolve, reject) => {
const csv = [];
const stream = Readable.from(buffer.toString());
stream
.pipe("text/csv".parse({ delimiter: "," }))
.on("error", reject)
.on("data", (row) => csv.push(row))
.on("end", () => resolve(csv));
});
}
/**
* Parse the given buffer as a spreadsheet, return a promise
*/
async function parseSpreadsheet(mimeType, buffer) {
if (
mimeType === "application/vnd.ms-excel" ||
mimeType === "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
) {
const workbook = XLSX.read(buffer, { type: "buffer" });
return excelToJson(workbook);
}
if (mimeType === "text/csv") {
return parseCSV(buffer);
}
throw new Error(`Unknown mime type ${mimeType}`);
}
/**
* Get the bytes of the field `fieldName` in the request.
*/
function getFileFromRequest(req, fieldName) {
return new Promise((resolve, reject) => {
const busboy = Busboy({ headers: req.headers });
busboy.on("file", (name, file, info) => {
// Only process the field we care about
if (name != fieldName) {
return;
}
const bytes = [];
file.on("data", (data) => bytes.push(data));
file.on("end", () =>
resolve({
info,
buffer: Buffer.concat(bytes),
}),
);
file.on("error", reject);
});
busboy.end(req.rawBody);
});
}
async function parseRequest(req) {
// (1) Get the file as a buffer
const { info, buffer } = await getFileFromRequest(req, "file");
// (2) Try parsing it as a spreadsheet
const data = await parseSpreadsheet(info.mimeType, buffer);
// (3) Do something with the data?
return data;
}

Electron Method Callback

I'm new to electron and trying to build an application to control smarthome-components from my Mac. To do this, I need many HTTP-Request so the idea is to make an own method/function for this job.
Now my problem is, that I don't know how to use this callback-thing ;)
This is my code now:
const {app, Tray, Menu, BrowserWindow, net} = require('electron');
const path = require('path');
const iconPath = path.join(__dirname, 'icon.png');
let appIcon = null;
let win = null;
var http = require('http');
function httpGet(url, callback) {
http.get(url, (res) => {
const { statusCode } = res;
const contentType = res.headers['content-type'];
res.setEncoding('utf8');
let rawData = '';
res.on('data', (chunk) => { rawData += chunk; });
res.on('end', () => {
return callback(rawData);
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
});
}
app.on('ready', function(){
win = new BrowserWindow({show: false});
appIcon = new Tray(iconPath);
var contextMenu = Menu.buildFromTemplate([
{
label: 'http',
click: function() {
console.log(httpGet('http://192.168.178.10/switches/status_1'),
function(result) {
console.log(result);
}
);
}
}
]);
appIcon.setToolTip('This is my application.');
appIcon.setContextMenu(contextMenu);
});
Trey works, but the httpGet function does not return anything (undefined [function]) and electron crashes.
Would be really thankful if someone could help me with this.
Greetings,
Lukas
Bad news is that I do not know electron, so the TL;DR for the text below is as simple as: put a console.log line into the callback passed to http.get.
There is no return value in httpGet (and it is normal), that is why you can not log it. JavaScript event handling works with callbacks, and getting data via HTTP is an event.
What you have as second argument for http.get is the event handler.
While now it appears as a fancy lambda,
(res)=>{...}
in oldschool way it would look like
function(res){...}
So that is a function, and it will get invoked when something happens to the HTTP request you have issued, and that is where you could log/dig into the result (res).
What you see in your log at the moment:
undefined is the return value of a function which does not return anything, httpGet
[function] is the function you pass as second argument for console.log
function a(){} // a function which does not return anything
console.log(a(),a); // logs function result and function itself
First of all, the callback pattern is usually to write a function that accepts 2 parameters: err and result. If there was no error err becomes null, if there was an error result becomes null. So this would be the best way to write httpGet:
function httpGet(url, callback) {
http.get(url, (res) => {
const {
statusCode
} = res;
const contentType = res.headers['content-type'];
res.setEncoding('utf8');
let rawData = '';
res.on('data', (chunk) => {
rawData += chunk;
});
res.on('end', () => {
return callback(null, rawData);
});
}).on('error', (e) => {
console.error(`Got error: ${e.message}`);
return callback(e, null)
});
}
Second, the way you wrote your call to httpGet is wrong. Change this:
console.log(httpGet('http://192.168.178.10/switches/status_1'),
function(result) {
console.log(result);
}
);
To
httpGet('http://192.168.178.10/switches/status_1', function(err, rawData) {
if (err) {
console.error('httpGet failed!');
} else {
console.log(rawData)
}
});

Get File from external URL with fs.readFile

I have links on a page that when clicked I want a external docx file to open. Unfortunately fs.readFile only reads local paths.
I tried
app.get('/getfile', function (req, res) {
var externalURL = 'http://www.examplesite.com/example.docx';
// var externalURL = req.query.external;
fs.readFile(externalURL, function(err, data) {
var fileData = new Buffer(data).toString('base64');
res.send(fileData);
});
});
Try this:
const http = require("http");
const file = fs.createWriteStream("file.docx");
http.get("http://www.example.com/test.docx", response => {
response.pipe(file);
});
Try node-fetch.
It follows regular client syntax for the fetch command (MDN).
you can use get from http package provided by nodejs library.
in my case i created a function that return a promise that is fulfilled when the file is completely fetched:
getFileFromURL(pathFile: string): Promise<Buffer> {
return new Promise(function(resolve, reject) {//create a promise
http.get(pathFile.replace('https', 'http'), function(res) {
let bufferImage = Buffer.from(''); // create an empty buffer
res.on('data', function(chunk) { // listen to 'data' event and concatenate each chunk when it is received
bufferImage = Buffer.concat([bufferImage, chunk]);
});
res.on('end', function() {
resolve(bufferImage); // fulfil promise
});
res.on('error', function(err) {
reject(err); // reject promise
})
})
})
}
and finally you can use the function like this:
async function() {
const fileBuffer = await this.getFileFromURL('your external url');
// here you can you do what you want with your **filerBuffer**
// you can for example convert it to string like this:
const fileString = fileBuffer.toString('utf-8');
// or may be send it in the response like this;
yourResponse.end(fileBuffer);
// ....
}
I'd recommend to use request to do it.
I don't rly know you mean this...
TRY:
const request = require('request');
request.get('http://examplesite.com/example.docx', function(err, res, body) {
// Body is the example.docx data.
})

Downloading images with node.js [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 4 years ago.
Improve this question
I'm trying to write a script to download images using node.js. This is what I have so far:
var maxLength = 10 // 10mb
var download = function(uri, callback) {
http.request(uri)
.on('response', function(res) {
if (res.headers['content-length'] > maxLength*1024*1024) {
callback(new Error('Image too large.'))
} else if (!~[200, 304].indexOf(res.statusCode)) {
callback(new Error('Received an invalid status code.'))
} else if (!res.headers['content-type'].match(/image/)) {
callback(new Error('Not an image.'))
} else {
var body = ''
res.setEncoding('binary')
res
.on('error', function(err) {
callback(err)
})
.on('data', function(chunk) {
body += chunk
})
.on('end', function() {
// What about Windows?!
var path = '/tmp/' + Math.random().toString().split('.').pop()
fs.writeFile(path, body, 'binary', function(err) {
callback(err, path)
})
})
}
})
.on('error', function(err) {
callback(err)
})
.end();
}
I, however, want to make this more robust:
Are there libraries that do this and do this better?
Is there a chance that response headers lie (about length, about content type)?
Are there any other status codes I should care about? Should I bother with redirects?
I think I read somewhere that binary encoding is going to be deprecated. What do I do then?
How can I get this to work on windows?
Any other ways you can make this script better?
Why: for a feature similar to imgur where users can give me a URL, I download that image, and rehost the image in multiple sizes.
I'd suggest using the request module. Downloading a file is as simple as the following code:
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', 'google.png', function(){
console.log('done');
});
I ran into this problem some days ago, for a pure NodeJS answer I would suggest using Stream to merge the chunks together.
var http = require('http'),
Stream = require('stream').Transform,
fs = require('fs');
var url = 'http://www.google.com/images/srpr/logo11w.png';
http.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync('image.png', data.read());
});
}).end();
The newest Node versions won't work well with binary strings, so merging chunks with strings is not a good idea when working with binary data.
*Just be careful when using 'data.read()', it will empty the stream for the next 'read()' operation. If you want to use it more than once, store it somewhere.
You can use Axios (a promise-based HTTP client for Node.js) to download images in the order of your choosing in an asynchronous environment:
npm i axios
Then, you can use the following basic example to begin downloading images:
const fs = require('fs');
const axios = require('axios');
/* ============================================================
Function: Download Image
============================================================ */
const download_image = (url, image_path) =>
axios({
url,
responseType: 'stream',
}).then(
response =>
new Promise((resolve, reject) => {
response.data
.pipe(fs.createWriteStream(image_path))
.on('finish', () => resolve())
.on('error', e => reject(e));
}),
);
/* ============================================================
Download Images in Order
============================================================ */
(async () => {
let example_image_1 = await download_image('https://example.com/test-1.png', 'example-1.png');
console.log(example_image_1.status); // true
console.log(example_image_1.error); // ''
let example_image_2 = await download_image('https://example.com/does-not-exist.png', 'example-2.png');
console.log(example_image_2.status); // false
console.log(example_image_2.error); // 'Error: Request failed with status code 404'
let example_image_3 = await download_image('https://example.com/test-3.png', 'example-3.png');
console.log(example_image_3.status); // true
console.log(example_image_3.error); // ''
})();
var fs = require('fs'),
http = require('http'),
https = require('https');
var Stream = require('stream').Transform;
var downloadImageToUrl = (url, filename, callback) => {
var client = http;
if (url.toString().indexOf("https") === 0){
client = https;
}
client.request(url, function(response) {
var data = new Stream();
response.on('data', function(chunk) {
data.push(chunk);
});
response.on('end', function() {
fs.writeFileSync(filename, data.read());
});
}).end();
};
downloadImageToUrl('https://www.google.com/images/srpr/logo11w.png', 'public/uploads/users/abc.jpg');
if you want progress download try this:
var fs = require('fs');
var request = require('request');
var progress = require('request-progress');
module.exports = function (uri, path, onProgress, onResponse, onError, onEnd) {
progress(request(uri))
.on('progress', onProgress)
.on('response', onResponse)
.on('error', onError)
.on('end', onEnd)
.pipe(fs.createWriteStream(path))
};
how to use:
var download = require('../lib/download');
download("https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_150x54dp.png", "~/download/logo.png", function (state) {
console.log("progress", state);
}, function (response) {
console.log("status code", response.statusCode);
}, function (error) {
console.log("error", error);
}, function () {
console.log("done");
});
note: you should install both request & request-progress modules using:
npm install request request-progress --save
This is an extension to Cezary's answer. If you want to download it to a specific directory, use this. Also, use const instead of var. Its safe this way.
const fs = require('fs');
const request = require('request');
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};
download('https://www.google.com/images/srpr/logo3w.png', './images/google.png', function(){
console.log('done');
});
Building on the above, if anyone needs to handle errors in the write/read streams, I used this version. Note the stream.read() in case of a write error, it's required so we can finish reading and trigger close on the read stream.
var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
if (err) callback(err, filename);
else {
var stream = request(uri);
stream.pipe(
fs.createWriteStream(filename)
.on('error', function(err){
callback(error, filename);
stream.read();
})
)
.on('close', function() {
callback(null, filename);
});
}
});
};

Resources