This question already has answers here:
How to pipe one readable stream into two writable streams at once in Node.js?
(3 answers)
Closed 4 years ago.
I'm new to Node and I'm trying to download a file, hash it, then save it to disk using the hash as the file name. I'm getting correct hashes (I think) but the files are blank.
const fs = require("fs-extra")
const fetch = require("node-fetch")
const crypto = require('crypto')
const response = await fetch(url)
const sha256 = crypto.createHash("sha256")
sha256.setEncoding('hex')
response.body.pipe(sha256)
await new Promise(resolve => response.body.on("end", resolve))
sha256.end()
const hash = sha256.read()
const file = fs.createWriteStream(hash + ".jpg")
response.body.pipe(file)
My trick for resolving your problem is storing your file with unique name (using current timestamp as name), and then you can hash stream (from response) and rename it.
I've tested this code and it's working well:
const fs = require("fs-extra")
const path = require('path');
const fetch = require("node-fetch")
const crypto = require('crypto')
const downloadImage = async (url) => {
try {
const response = await fetch(url);
/** Store file with name current timestamp */
const filename = "IMG_" + Date.now() + ".jpg";
const dest = path.join(__dirname, filename);
response.body.pipe(fs.createWriteStream(dest))
/** Hash file */
const sha256 = crypto.createHash("sha256")
sha256.setEncoding('hex')
response.body.pipe(sha256)
await new Promise(resolve => response.body.on("end", resolve))
sha256.end()
const hash = sha256.read()
/** Rename file with hash value */
await fs.rename(dest, path.join(__dirname, hash + ".jpg"))
} catch (err) {
console.log(err);
}
}
const url = "https://i0.wp.com/wptavern.com/wp-content/uploads/2016/07/stack-overflow.png?ssl=1";
downloadImage(url);
But you can create function for hashing stream (response you received) as utility, this is my recommend for your code:
const fs = require("fs-extra")
const path = require('path');
const fetch = require("node-fetch")
const crypto = require('crypto')
function streamHash(stream, algorithm = 'md5') {
return new Promise((resolve, reject) => {
let shasum = crypto.createHash(algorithm);
try {
stream.on('data', function (data) {
shasum.update(data)
})
stream.on('end', function () {
const hash = shasum.digest('hex')
return resolve(hash);
})
} catch (error) {
return reject(error);
}
});
}
const downloadImage = async (url) => {
try {
const response = await fetch(url);
/** Store file with name current timestamp */
const filename = "IMG_" + Date.now() + ".jpg";
const dest = path.join(__dirname, filename);
response.body.pipe(fs.createWriteStream(dest))
/** Hash file */
const hash = await streamHash(response.body, 'sha256');
/** Rename file with hash value */
await fs.rename(dest, path.join(__dirname, hash + ".jpg"))
} catch (err) {
console.log(err);
}
}
const url = "https://i0.wp.com/wptavern.com/wp-content/uploads/2016/07/stack-overflow.png?ssl=1";
downloadImage(url);
Related
I have a NodeJS function that writes several small svg files locally and then is attempting to upload those files to cloud bucket.
in the function log, i am only seeing the message that file written to local disk. now will upload. But there is no file in the bucket and no error logged anywhere. i have made sure the timeout is set to 9 min (max) so i am sure its not timing out. what else shoudl i check?
any pointers will be appreciated.
exports.createQRCode = functions.storage.object().onFinalize(async (object) =>{
const qrcodeMonkeyKey = functions.config().qrcodemonkey.key;
//console.log(`key for qrcode monkey is ${qrcodeMonkeyKey}`);
const fileBucket = object.bucket; // The Storage bucket that contains the file.
const filePath = object.name; // File path in the bucket.
const contentType = object.contentType; // File content type.
const metageneration = object.metageneration; // Number of times metadata has been generated. New objects have a value of 1.
console.log(fileBucket);
console.log(filePath);
if(!filePath.toLowerCase().endsWith('.csv'))
return console.log('not a csv so no need to anything fancy');
const bucket = admin.storage().bucket(fileBucket);
const filePathComps = filePath.split('/');
const folderName = filePathComps[filePathComps.length-3];
if(folderName !== "qrcode")
return console.log('not a qr code csv so no need to anything fancy');
const fileName = filePathComps[filePathComps.length-1];
console.log(fileName);
const path = require('path');
const os = require('os');
const fs = require('fs');
const tempFilePath = path.join(os.tmpdir(), fileName);
const metadata = {
contentType: contentType,
};
await bucket.file(filePath).download({destination: tempFilePath});
const csv = require('csv-parser')
const results = [];
fs.createReadStream(tempFilePath)
.pipe(csv({headers:
['uri','filename','foldername']
,skipLines:1
}))
.on('data', async (data) => {
const x = data;
results.push(data);
//results.push({id:x.id,phoneNumber:x.phoneNumber,isInternational:x.isInternational,message:x.messageText,respStatus:resp.status,responsedata:resp.data});
})
.on('end',async () => {
pArray = [];
results.forEach(x =>{
pArray.push(createQRCodeAndUpload(qrcodeMonkeyKey,x.filename,x.uri,x.foldername));
});
const finaloutput = await Promise.all(pArray);
console.log(JSON.stringify(finaloutput));
return;
});
});
const createQRCodeAndUpload = async (qrcodeMonkeyKey,fileName, url,foldername) =>{
const bucket = admin.storage().bucket('vmallapp.appspot.com');
const path = require('path');
const os = require('os');
const fs = require('fs');
var axios = require("axios").default;
console.log('processing ' + url);
if(url !==""){
const dataToSend = {
data : url,
config :{
body:'circle',
eye:'frame14',
eyeBall:'ball16',
bodyColor:"#032b5c",
bgColor:"#84d4e2",
"logo":"ae600e1267b9e477f0b635b60ffaec1d1c18d93b.png"
},
size:1200,
download:false,
file:'svg',
gradientOnEyes:true
}
var options = {
method: 'POST',
url: 'https://qrcode-monkey.p.rapidapi.com/qr/custom',
headers: {
'content-type': 'application/json',
'x-rapidapi-host': 'qrcode-monkey.p.rapidapi.com',
'x-rapidapi-key': qrcodeMonkeyKey
},
data: dataToSend
};
var response = await axios.request(options);
console.log('qrcode monkey returned status' + response.status);
const outputFilePath = path.join(os.tmpdir(), `${fileName}.svg`);
fs.writeFileSync(outputFilePath, response.data);
console.log(`${fileName}.svg written to local disk. now will upload`);
try{
await bucket.upload(outputFilePath, {
destination: `qrcode/output/${fileName}.svg`
});
}catch(error){
console.log('error in uploding ' + error);
}
console.log('lets delete the file now and clean up local storage');
fs.unlinkSync(outputFilePath);
return 'all done';
}
}
I have a project with Node JS in which I am collecting the information of a JSON by http using the node-fetch module.
This is the way I have found to use the node-fetch module with async, if it is possible to improve this function, suggestions are added, I am new to this module.
This is my code where I read the information:
const fetch = require('node-fetch');
(async () => {
try {
const res = await fetch('https://jsonplaceholder.typicode.com/users');
const headerDate = res.headers && res.headers.get('date') ? res.headers.get('date') : 'no response date';
const users = await res.json();
for(user of users) {
console.log(`Got user with id: ${user.id}, name: ${user.name}`);
}
} catch (err) {
console.log(err.message); //can be console.error
}
})();
My problem: how can I extract all the information to a CSV with a limit of lines ?, that is, the CSV has a limit of 10 lines (the limit can vary), if the JSON information occupies 30 lines, 3 CSVs would be created to store all the information. I have added the json-2-csv module, but I don't know how to use it or if this module is necessary or something else is better.
const { Parser } = require("json2csv");
const fetch = require("node-fetch");
const fs = require("fs");
const csvLimit = 3;
const getJson = async () => {
const response = await fetch("https://jsonplaceholder.typicode.com/users");
const responseJson = await response.json();
return responseJson;
};
const jsonToCsv = async () => {
const json = await getJson();
const json2csvParser = new Parser();
let i = 0,
j = 0;
while (j < json.length) {
let csv = [];
let temp = [];
for (j = i * csvLimit; j < (i + 1) * csvLimit; j++) {
temp.push(json[j]);
}
csv.push(json2csvParser.parse(temp));
fs.writeFileSync(`file${(i * csvLimit) / 3}.csv`, csv);
i++;
}
};
jsonToCsv();
If you want only specific fields in the csv file, then you can pass the fields as parameter in this way.
const json2csvParser = new Parser({fields})
I used the flat package to extract the field names from the keys of the first record of the JSON and then used the json-2-csv package to convert from JSON to CSV.
const converter = require("json-2-csv");
const fetch = require("node-fetch");
const fs = require("fs");
const flatten = require('flat');
const maxRecords = 3;
const getJson = async () => {
const response = await fetch("https://jsonplaceholder.typicode.com/users");
const responseJson = await response.json();
return responseJson;
};
const convertToCSV = async () => {
const json = await getJson();
let keys = Object.keys(flatten(json[0]));
let options = {
keys: keys
};
converter.json2csv(json, json2csvCallback, options);
};
let json2csvCallback = function (err, csv) {
if (err) throw err;
const headers = csv.split('\n').slice(0,1);
const records = csv.split('\n').slice(0,);
for(let i=1;i<records.length;i=i+maxRecords) {
let dataOut = headers.concat(records.slice(i, i+3)).join('\n');
let id = Math.floor(i/maxRecords)+1;
fs.writeFileSync('data' + id + '.csv', dataOut)
}
};
convertToCSV();
Here's one of the files opened in Excel.
In my attempt to become a better developer I'm trying to refactor the below Node code into 2 (or even 3) separate functions.
The code simply takes in a file, parses some of the data, and rewrites the parsed data to another file.
So my question is, can the code below be broken down to 2 functions (one function to read and parse, the other to write)? Can it be broken down even further to 3 functions (one to read and parse, one to write, one that calls the other two)?
I have attempted to refactor the code into 2 functions but am not having any luck
const neatCsv = require('neat-csv');;
const fs = require('fs');
const ObjectsToCsv = require('objects-to-csv');
fs.readFile('./someFile.csv', async (err, data) => {
if (err) {
console.error(err);
return;
}
const neat = await neatCsv(data);
const sortArray = neat.filter((obj) => obj.Type !== 'Name');
const priceSortArray = sortArray.filter((obj) => +obj.Price > 5);
const filterSortArray = priceSortArray.sort((a,b) => parseFloat(b.IV) - parseFloat(a.IV));
(async () => {
const csv = new ObjectsToCsv(filterSortArray);
// Save to file:
await csv.toDisk('./someOtherFile.csv');
})();
});
Please try below approach. I hope this is asked by you.
const neatCsv = require('neat-csv');;
const fs = require('fs');
const ObjectsToCsv = require('objects-to-csv');
const readFile = (filePath) => {
fs.readFile('./someFile.csv', async (err, data) => {
if (err) {
console.error(err);
return;
};
const neat = await neatCsv(data);
const sortArray = neat.filter((obj) => obj.Type !== 'Name');
const priceSortArray = sortArray.filter((obj) => +obj.Price > 5);
const filterSortArray = priceSortArray.sort((a,b) => parseFloat(b.IV) - parseFloat(a.IV));
saveFile(filterSortArray);
});
}
const saveFile = (filterSortArray) => {
const csv = new ObjectsToCsv(filterSortArray);
// Save to file:
await csv.toDisk('./someOtherFile.csv');
}
readFile();
I'm trying to merge 2 pdf files using pdf-lib (I got the example of code from the official site of pdf-lib). The goal is to trigger the cloud function when new file is uploaded to bucket. The function then collect urls of files to be merged in the same bucket with the new one. I am able to get urls but I have an error in pdf-lib. Maybe I'm importing it the wrong way. Because in example it is in ES6 syntax (import) but nodejs needs require. I'm new to backed and nodejs. So any help is highly appreciated.
const functions = require('firebase-functions');
const { Storage } = require('#google-cloud/storage');
const storage = new Storage();
const admin = require('firebase-admin');
admin.initializeApp();
const { PDFDocument } = require('pdf-lib');
const fetch = require('node-fetch');
exports.testCloudFunc = functions.storage.object().onFinalize(async object => {
const filePath = object.name;
const { Logging } = require('#google-cloud/logging');
console.log(`Logged: FILEPATH: ${filePath}`);
const id = filePath.split('/');
console.log(`Logged: ID: ${id[0]}/${id[1]}`);
const bucket = object.bucket;
console.log(`Logged: BUCKET: ${object.bucket}`);
async function listFilesByPrefix() {
const options = {
prefix: id[0] + '/' + id[1]
};
const [files] = await storage.bucket(bucket).getFiles(options);
const endFiles = files.filter(el => {
return (
el.name === id[0] + '/' + id[1] + '/' + 'invoiceReport.pdf' ||
el.name === id[0] + '/' + id[1] + '/' + 'POD.pdf' ||
el.name === id[0] + '/' + id[1] + '/' + 'rateConfirmation.pdf'
);
});
endFiles.forEach(el => console.log(el.name));
const promises = [];
for (let i = 0; i < endFiles.length; i++) {
console.log(endFiles[i].name);
promises.push(
endFiles[i].getSignedUrl({
action: 'read',
expires: '03-17-2025'
})
);
}
const urlsArray = await Promise.all(promises);
return urlsArray;
}
listFilesByPrefix()
.then(results => {
results.forEach(el => console.log(el));
copyPages(results[0], results[1]);
return results;
})
.catch(console.error);
});
async function copyPages(url1, url2) {
const firstDonorPdfBytes = await fetch(url1).then(res => res.arrayBuffer());
const secondDonorPdfBytes = await fetch(url2).then(res => res.arrayBuffer());
const firstDonorPdfDoc = await PDFDocument.load(firstDonorPdfBytes);
const secondDonorPdfDoc = await PDFDocument.load(secondDonorPdfBytes);
const pdfDoc = await PDFDocument.create();
const [firstDonorPage] = await pdfDoc.copyPages(firstDonorPdfDoc, [0]);
const [secondDonorPage] = await pdfDoc.copyPages(secondDonorPdfDoc, [742]);
pdfDoc.addPage(firstDonorPage);
pdfDoc.insertPage(0, secondDonorPage);
const pdfBytes = await pdfDoc.save();
}
But in firebase cloud console logs I'm getting this:
TypeError: Cannot read property 'node' of undefined
at PDFDocument.<anonymous> (/srv/node_modules/pdf-lib/cjs/api/PDFDocument.js:459:62)
at step (/srv/node_modules/tslib/tslib.js:136:27)
at Object.next (/srv/node_modules/tslib/tslib.js:117:57)
at fulfilled (/srv/node_modules/tslib/tslib.js:107:62)
at <anonymous>
at process._tickDomainCallback (internal/process/next_tick.js:229:7)
I was facing the same problem
Make sure your files are public or generate a signed url
an example follows:
const options = {
prefix: 'notas', //folder name
};
const optionsBucket = {
version: 'v2',
action: 'read',
expires: Date.now() + 1000 * 60 * 9, // 9 minutes
};
const [files] = await storage.bucket('your-bucket-name').getFiles(options);
const mergedPdf = await PDFDocument.create();
for (let nota of files) {
let fileName = nota.name;
if (fileName.endsWith('.pdf')) {
const [url] = await storage
.bucket(bucketName)
.file(fileName)
.getSignedUrl(optionsBucket); //generate signed url
const arrayBuffer = await fetch(url).then(res => res.arrayBuffer());
const pdf = await PDFDocument.load(arrayBuffer);
const copiedPages = await mergedPdf.copyPages(pdf, pdf.getPageIndices());
copiedPages.forEach((page) => {
mergedPdf.addPage(page);
});
}
const mergedPdfFile = await mergedPdf.save();
const file = bucket.file(`folder/filename.pdf`);
await file.save(
mergedPdfFile
);
}
The scenario is as follows:
From an Amazon S3 bucket a file is fetched, then it is stored in a temporary folder and then Object Character Recognition is to be performed using the API.
Unfortunately, this doesn't work, I think it's due to the asynchronous/synchronous execution, but I've already tried several variants with callbacks/promises and didn't get any further.
If someone can give me a hint on how to construct this scenario I would be grateful!
The current error is:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
/**
* Responds to any HTTP request.
*
* #param {!express:Request} req HTTP request context.
* #param {!express:Response} res HTTP response context.
*/
const AWS = require('aws-sdk');
const fs = require('fs').promises;
const Vision = require('#google-cloud/vision');
var os = require('os');
exports.helloWorld = async (req,res) => {
var bucket, fileName, fileUrl;
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
req.on('end', () => {
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
var s3 = new AWS.S3();
s3.getObject({
Bucket: bucket,
Key: fileName
},
async function(error, data) {
if (error != null) {
console.log("Failed to retrieve an object: " + error);
} else {
console.log("Loaded " + data.ContentType + " bytes");
var tmpdir = os.tmpdir();
var dir = tmpdir+'/'+fileName;
try{
await fs.writeFile(dir,data);
const vision = new Vision.ImageAnnotatorClient();
let text;
await vision
.textDetection('/tmp/' + fileName)
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
console.log(1);
text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
console.log(1);
console.log(text);
resolve("Finished ocr successfully");
})
.catch(error =>{
console.log(error);
reject("Error with OCR");
})
}catch(error){
console.log(error);
}
}
},
);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
});
};
You're getting that error, because you're running on an older version of Node (< 10.0.0), where fs.promises is not available. That's why fs is undefined, and you're getting:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
Either use a newer version, or just promisify the code.
const { promisify } = require('util');
const fs = require('fs');
// const fs = require('fs').promises
const writeFile = promisify(fs.writeFile);
And now use writeFile instead of fs.writeFile in your code.
Aside from that, there are a few issues with your code.
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
data is not defined anywhere, and it doesn't make sense to push data into an array and then running JSON.parse on that array, given the next few lines.
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
Furthermore, JSON.parse should be called only once, instead of parsing the same string (which is an array in your code, and will yield an error) 3 times.
const values = JSON.parse(body); // should be body instead of data with the posted code
bucket = values.value1;
fileName = values.value2;
fileUrl = values.value3;
This can be improved greatly by just posting bucket, fileName & fileUrl in the JSON instead of valueN.
const { bucket, fileName, fileUrl } = JSON.parse(body);
The whole code can be rewritten into:
const AWS = require('aws-sdk');
const { promisify } = require('util');
const fs = require('fs');
const Vision = require('#google-cloud/vision');
const os = require('os');
const path = require('path');
const writeFile = promisify(fs.writeFile);
exports.helloWorld = async (req,res) => {
let body = '';
req.on('data', chunk => {
body += chunk.toString();
});
req.on('end', async() => {
// post { "bucket": "x", "fileName": "x", "fileUrl": "x" }
const { bucket, fileName, fileUrl } = JSON.parse(body);
var s3 = new AWS.S3();
try {
const data = await s3.getObject({
Bucket: bucket,
Key: fileName
}).promise();
const tmpdir = os.tmpdir();
const filePath = path.join(tmpdir, fileName)
await writeFile(filePath, data);
const vision = new Vision.ImageAnnotatorClient();
const [detections] = await vision.textDetection(filePath)
const annotation = detections.textAnnotations[0];
const text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
} catch(e) {
console.error(e);
res.status(500).send(e.message);
}
});
};
NOTE: I don't know if Vision API works like this, but I used the same logic and parameters that you're using.