How to chain writeFile() and OCR with NodeJS in Google Cloud Functions? - node.js

The scenario is as follows:
From an Amazon S3 bucket a file is fetched, then it is stored in a temporary folder and then Object Character Recognition is to be performed using the API.
Unfortunately, this doesn't work, I think it's due to the asynchronous/synchronous execution, but I've already tried several variants with callbacks/promises and didn't get any further.
If someone can give me a hint on how to construct this scenario I would be grateful!
The current error is:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
/**
* Responds to any HTTP request.
*
* #param {!express:Request} req HTTP request context.
* #param {!express:Response} res HTTP response context.
*/
const AWS = require('aws-sdk');
const fs = require('fs').promises;
const Vision = require('#google-cloud/vision');
var os = require('os');
exports.helloWorld = async (req,res) => {
var bucket, fileName, fileUrl;
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
req.on('end', () => {
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
var s3 = new AWS.S3();
s3.getObject({
Bucket: bucket,
Key: fileName
},
async function(error, data) {
if (error != null) {
console.log("Failed to retrieve an object: " + error);
} else {
console.log("Loaded " + data.ContentType + " bytes");
var tmpdir = os.tmpdir();
var dir = tmpdir+'/'+fileName;
try{
await fs.writeFile(dir,data);
const vision = new Vision.ImageAnnotatorClient();
let text;
await vision
.textDetection('/tmp/' + fileName)
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
console.log(1);
text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
console.log(1);
console.log(text);
resolve("Finished ocr successfully");
})
.catch(error =>{
console.log(error);
reject("Error with OCR");
})
}catch(error){
console.log(error);
}
}
},
);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
});
};

You're getting that error, because you're running on an older version of Node (< 10.0.0), where fs.promises is not available. That's why fs is undefined, and you're getting:
TypeError: Cannot read property 'writeFile' of undefined at Response.<anonymous> (/srv/index.js:38:32) (it's the 'await fs.writeFile(dir,data);' line)
Either use a newer version, or just promisify the code.
const { promisify } = require('util');
const fs = require('fs');
// const fs = require('fs').promises
const writeFile = promisify(fs.writeFile);
And now use writeFile instead of fs.writeFile in your code.
Aside from that, there are a few issues with your code.
req.on('data', chunk => {
body += chunk.toString();
data.push(chunk);
});
data is not defined anywhere, and it doesn't make sense to push data into an array and then running JSON.parse on that array, given the next few lines.
bucket = JSON.parse(data).value1;
fileName = JSON.parse(data).value2;
fileUrl = JSON.parse(data).value3;
Furthermore, JSON.parse should be called only once, instead of parsing the same string (which is an array in your code, and will yield an error) 3 times.
const values = JSON.parse(body); // should be body instead of data with the posted code
bucket = values.value1;
fileName = values.value2;
fileUrl = values.value3;
This can be improved greatly by just posting bucket, fileName & fileUrl in the JSON instead of valueN.
const { bucket, fileName, fileUrl } = JSON.parse(body);
The whole code can be rewritten into:
const AWS = require('aws-sdk');
const { promisify } = require('util');
const fs = require('fs');
const Vision = require('#google-cloud/vision');
const os = require('os');
const path = require('path');
const writeFile = promisify(fs.writeFile);
exports.helloWorld = async (req,res) => {
let body = '';
req.on('data', chunk => {
body += chunk.toString();
});
req.on('end', async() => {
// post { "bucket": "x", "fileName": "x", "fileUrl": "x" }
const { bucket, fileName, fileUrl } = JSON.parse(body);
var s3 = new AWS.S3();
try {
const data = await s3.getObject({
Bucket: bucket,
Key: fileName
}).promise();
const tmpdir = os.tmpdir();
const filePath = path.join(tmpdir, fileName)
await writeFile(filePath, data);
const vision = new Vision.ImageAnnotatorClient();
const [detections] = await vision.textDetection(filePath)
const annotation = detections.textAnnotations[0];
const text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
let message = bucket + fileName + fileUrl;
res.status(200).send(message);
} catch(e) {
console.error(e);
res.status(500).send(e.message);
}
});
};
NOTE: I don't know if Vision API works like this, but I used the same logic and parameters that you're using.

Related

google cloud function not uploading to bucket but no error in function

I have a NodeJS function that writes several small svg files locally and then is attempting to upload those files to cloud bucket.
in the function log, i am only seeing the message that file written to local disk. now will upload. But there is no file in the bucket and no error logged anywhere. i have made sure the timeout is set to 9 min (max) so i am sure its not timing out. what else shoudl i check?
any pointers will be appreciated.
exports.createQRCode = functions.storage.object().onFinalize(async (object) =>{
const qrcodeMonkeyKey = functions.config().qrcodemonkey.key;
//console.log(`key for qrcode monkey is ${qrcodeMonkeyKey}`);
const fileBucket = object.bucket; // The Storage bucket that contains the file.
const filePath = object.name; // File path in the bucket.
const contentType = object.contentType; // File content type.
const metageneration = object.metageneration; // Number of times metadata has been generated. New objects have a value of 1.
console.log(fileBucket);
console.log(filePath);
if(!filePath.toLowerCase().endsWith('.csv'))
return console.log('not a csv so no need to anything fancy');
const bucket = admin.storage().bucket(fileBucket);
const filePathComps = filePath.split('/');
const folderName = filePathComps[filePathComps.length-3];
if(folderName !== "qrcode")
return console.log('not a qr code csv so no need to anything fancy');
const fileName = filePathComps[filePathComps.length-1];
console.log(fileName);
const path = require('path');
const os = require('os');
const fs = require('fs');
const tempFilePath = path.join(os.tmpdir(), fileName);
const metadata = {
contentType: contentType,
};
await bucket.file(filePath).download({destination: tempFilePath});
const csv = require('csv-parser')
const results = [];
fs.createReadStream(tempFilePath)
.pipe(csv({headers:
['uri','filename','foldername']
,skipLines:1
}))
.on('data', async (data) => {
const x = data;
results.push(data);
//results.push({id:x.id,phoneNumber:x.phoneNumber,isInternational:x.isInternational,message:x.messageText,respStatus:resp.status,responsedata:resp.data});
})
.on('end',async () => {
pArray = [];
results.forEach(x =>{
pArray.push(createQRCodeAndUpload(qrcodeMonkeyKey,x.filename,x.uri,x.foldername));
});
const finaloutput = await Promise.all(pArray);
console.log(JSON.stringify(finaloutput));
return;
});
});
const createQRCodeAndUpload = async (qrcodeMonkeyKey,fileName, url,foldername) =>{
const bucket = admin.storage().bucket('vmallapp.appspot.com');
const path = require('path');
const os = require('os');
const fs = require('fs');
var axios = require("axios").default;
console.log('processing ' + url);
if(url !==""){
const dataToSend = {
data : url,
config :{
body:'circle',
eye:'frame14',
eyeBall:'ball16',
bodyColor:"#032b5c",
bgColor:"#84d4e2",
"logo":"ae600e1267b9e477f0b635b60ffaec1d1c18d93b.png"
},
size:1200,
download:false,
file:'svg',
gradientOnEyes:true
}
var options = {
method: 'POST',
url: 'https://qrcode-monkey.p.rapidapi.com/qr/custom',
headers: {
'content-type': 'application/json',
'x-rapidapi-host': 'qrcode-monkey.p.rapidapi.com',
'x-rapidapi-key': qrcodeMonkeyKey
},
data: dataToSend
};
var response = await axios.request(options);
console.log('qrcode monkey returned status' + response.status);
const outputFilePath = path.join(os.tmpdir(), `${fileName}.svg`);
fs.writeFileSync(outputFilePath, response.data);
console.log(`${fileName}.svg written to local disk. now will upload`);
try{
await bucket.upload(outputFilePath, {
destination: `qrcode/output/${fileName}.svg`
});
}catch(error){
console.log('error in uploding ' + error);
}
console.log('lets delete the file now and clean up local storage');
fs.unlinkSync(outputFilePath);
return 'all done';
}
}

An issue with reading a gzipped file (.gz) with IBM Cloud Function (Action: Node.js 12)

I can read the data.json.gz file on my local machine with the code mentioned below (node --version: v14.15.0). But when I try to use the same in IBM Cloud with an Action (Node.js 12) to read the same file from an Object Store Bucket, I get the below error
["stderr: ERROR: undefined - input_buf.on is not a function"].
I am very new to NodeJS; Can someone help to identify the issue here?
I do appreciate your support.
Code that works on Local machine (Windows 10):
function decompressFile(filename) {
var fs = require("fs"),
zlib = require("zlib"),
var input = fs.createReadStream(filename);
var data = [];
input.on('data', function(chunk){
data.push(chunk);
}).on('end', function(){
var buf = Buffer.concat(data);
zlib.gunzip(buf, function(err, buffer) {
if (!err) {
var dataString = buffer.toString()
console.log(dataString, dataString+'\n');
var dataJSON = JSON.parse(dataString.toString('utf8'));
}else{
console.log(err);
}
});
});
}
decompressFile("data.json.gz");
Code that does not work on IBM Cloud Function and Object Store Bucket:
// Get file contents of gzipped item
async function getGzippedItem(cosClient, bucketName, itemName) { // <<< async keyword added
const fs = require('fs');
const zlib = require('zlib');
return await cosClient.getObject({ // <<< turned into assignment with await
Bucket: bucketName,
Key: itemName
}).promise()
.then((instream=fs.createReadStream(itemName)) => {
if (instream != null) {
var data = [];
var input_buf = instream.Body
input_buf.on('data', function(chunk){
data.push(chunk);
}).on('end', function() {
var buf = Buffer.concat(data);
zlib.gunzip(buf, function (err, buffer) {
if (!err) {
var dataString = buffer.toString()
var dataJSON = JSON.parse(dataString.toString('utf8'));
} else {
console.log(err);
}
});
});
return buf
}
})
.catch((e) => {
console.error(`ERROR: ${e.code} - ${e.message}\n`);
});
};
async function main(params) {
bucketName = 'bucket'
itemName = 'data.json.gz'
var ibm = require('ibm-cos-sdk');
var util = require('util');
var fs = require('fs');
// Initializing configuration
const myCOS = require('ibm-cos-sdk');
var config = {
endpoint: 'endpoint',
apiKeyId: 'apiKeyId',
ibmAuthEndpoint: 'ibmAuthEndpoint',
serviceInstanceId: 'serviceInstanceId',
};
var cosClient = new myCOS.S3(config);
gzippedItemContent = await getGzippedItem(cosClient, bucketName, itemName) // <<< await keyword added
console.log(">>>>>>>>>>>>>>>: ", typeof gzippedItemContent, gzippedItemContent )
}
The message is telling you, that your input_buf object is not of the type you expect it to be. The result of your createReadStream() call is just a stream:
[Stream] the readable stream object that can be piped or read from (by registering 'data' event listeners).
So you should be able to access the value directly
(not declaring var input_buf = instream.Body):
var getObjectStream = cosClient.getObject({
Bucket: 'BUCKET',
Key: 'KEY'
}).createReadStream();
getObjectStream.on('data', function(c) {
data += c.toString();
});
Have a look at the test section of the ibm-cos-sdk-js project, it is describing how to use the API.

Unresolved Promise Assistance Node.js

I have the following code below, which is a lambda function to get content from a s3Object zip file. I know for a fact that I am not resolving the list of promises and need a little direction on how to resolve. I have read several codes on here but having a hard time applying it to my code. Any assistance would be greatly appreciated.
// dependencies
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
var zip = JSZip.loadAsync(data.Body).then(function (content){
return content;
});
zip.then(function(result){
var entries = Object.keys(result.files).map(function (name) {
if(name.indexOf("TestStatus") != -1){
return name;
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map(function(entry) {
return result.file(entry).async("text").then(function(fileContent){
return fileContent;
});
});
Promise.all(listOfPromises).then((values) =>{
values.forEach(function(value){
console.log(value);
});
});
});
} catch (error) {
context.fail(error);
return;
}
};
Modified/Corrected code
// dependencies
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
var zip = JSZip.loadAsync(data.Body);
return zip.then(function(result){
var entries = Object.keys(result.files).map((name) =>{
if(name.indexOf("TestStatus") != -1){
return result.files[name];
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map((entry) => {
return entry.async("text")
.then((u8) => {
return [entry.name, u8];
}).catch(error => console.error(error));
});
var promiseOfList = Promise.all(listOfPromises);
promiseOfList.then(function (list) {
console.log(list.toString());
});
});
} catch (error) {
context.fail(error);
return;
}
};
If you look closely you are not retuning anything it is why it stays on Pending
const AWS = require('aws-sdk');
var JSZip = require('jszip');
// get reference to S3 client
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
// Read options from the event parameter.
const srcBucket = event.Records[0].s3.bucket.name;
// Object key may have spaces or unicode non-ASCII characters.
const srcKey = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, " "));
// Download the file from the S3 source bucket.
try {
const params = {
Bucket: srcBucket,
Key: srcKey
};
const data = await s3.getObject(params).promise();
// here is the problem
// var zip = JSZip.loadAsync(data.Body).then(function (content){
// return content;
// }
var zip = await JSZip.loadAsync(data.body)
return zip.then(function(result){
var entries = Object.keys(result.files).map(function (name) {
if(name.indexOf("TestStatus") != -1){
return name;
}
}).filter(notUndefined => notUndefined !== undefined);
var listOfPromises = entries.map(function(entry) {
return result.file(entry).async("text").then(function(fileContent){
return fileContent;
});
});
console.log("Helo");
Promise.all(listOfPromises).then((values) =>{
values.forEach(function(value){
console.log(value);
});
});
});
} catch (error) {
context.fail(error);
return;
}
};
```

Read excel file uploaded to s3 via node lambda function

I am trying to parse through an excel file that is uploaded to s3 using read-excel-file in a node lambda function that triggers on any s3 put. Here is my code which currently doesn't work. Can somebody tell me where I am going wrong?
const aws = require("aws-sdk");
const s3 = new aws.S3({ apiVersion: "2006-03-01" });
const readXlsxFile = require("read-excel-file/node");
exports.handler = async (event, context) => {
// Get the object from the event and show its content type
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(
event.Records[0].s3.object.key.replace(/\+/g, " ")
);
const params = {
Bucket: bucket,
Key: key
};
try {
const doc = await s3.getObject(params);
const parsedDoc = await readXlsxFile(doc);
console.log(parsedDoc)
} catch (err) {
console.log(err);
const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
console.log(message);
throw new Error(message);
}
};
I haven't used lambda functions, but I have done something very similar in firebase functions. I used convert-excel-to-json.
I first downloaded the excel file from firebase storage to the firebase functions machine. Then use this npm module to extract the information.
I don't have time to format the code, but I can leave it here for reference:
// Runs when excel file is uploaded to storage
exports.uploadOrder = functions.storage.object().onFinalize(async (file) => {
const fileBucket = file.bucket;
const filePath = file.name || "null";
const filePathList = filePath?.split("/") || ["null"];
const fileName = path.basename(filePath);
if (filePathList[0] !== "excel_orders") {
return;
}
const uid = filePathList[1];
console.log("User ID: " + uid);
const bucket = admin.storage().bucket(fileBucket);
const tempFilePath = path.join(os.tmpdir(), fileName);
console.log(tempFilePath);
await bucket.file(filePath).download({ destination: tempFilePath });
const result = excelToJson({
sourceFile: tempFilePath,
});
var ordersObj: any[] = result.Sheet1;
ordersObj.shift();
console.log(ordersObj);
var orders: any[] = [];
for (let i = 0; i < ordersObj.length; i++) {
const order: Order = {
package_description: ordersObj[i].A,
package_type: ordersObj[i].B,
country: ordersObj[i].C,
address: ordersObj[i].D,
curstomer_name: ordersObj[i].E,
customer_phone: ordersObj[i].F,
collection_ammount: ordersObj[i].G,
order_date: ordersObj[i].H,
delivery_date: ordersObj[i].I,
delivery_time: ordersObj[i].J,
status: "pending",
assignedTo: "",
merchantID: uid,
};
orders.push(order);
}
});

Node.js Firebase Function sending Base64 image to External API

I’m using Firebase Functions with a Storage trigger in Node.js to send uploaded image data to an external API endpoint where photos are uploaded.
I’m currently taking images uploaded to a bucket in my Firebase storage, converting them to base64 strings, and plug them into my dictionary for the request.
My current issue is that seems like the dictionary is being cut short. I looked at the console logs on the Firebase console and seems like it ends after the base64 variable.
I’m not sure whether this is a bug with the syntax, or with the way I’m using the base64, or with Firebase Functions. If anyone knows what might be going on, please let me know.
const request = require('request-promise');
const gcs = require('#google-cloud/storage')();
const path = require('path');
const os = require('os');
const fs = require('fs');
const firebase = require('firebase');
exports.identifyUpdate = functions.storage.object().onFinalize((object) => {
const fileBucket = object.bucket;
const filePath = object.name;
const contentType = object.contentType;
const fileName = path.basename(filePath);
if(!filePath.substring(0,filePath.indexOf('/')) == 'updates') {
console.log("Triggered by non-update photo")
return null;
}
console.log("Update photo added")
// Create Firebase app (for Realtime Database access)
var config = {
apiKey: "[apikey]",
authDomain: "[PROJECT_ID].firebaseapp.com",
databaseURL: "https://[PROJECT_ID].firebaseio.com",
storageBucket: "[PROJECT_ID].appspot.com",
};
if(!firebase.apps.length) {
firebase.initializeApp(config);
}
// Trace back to Update stored in Realtime Database
const database = firebase.database().ref()
const pendingRef = database.child('pendingUpdates')
console.log(filePath)
const splitPath = filePath.split(path.sep)
const patientID = splitPath[1]
console.log('Patient ID: ' + patientID)
const updateID = splitPath[2]
console.log('Update ID: ' + updateID)
const updateRef = pendingRef.child(patientID).child(updateID)
console.log('Found Update reference')
const photoRef = updateRef.child('photoURLs').child(fileName)
console.log('Photo Reference: ' + photoRef)
// Download and convert image to base64
const bucket = gcs.bucket(fileBucket)
const tempFilePath = path.join(os.tmpdir(), fileName)
const metadata = {
contentType: contentType
};
var base64;
return bucket.file(filePath).download({
destination: tempFilePath
}).then(() => {
console.log('Image downloaded locally to', tempFilePath)
}).then(() => {
base64 = base64_encode(tempFilePath)
console.log("Base 64: " + base64)
}).then(() => {
// Send image data to Kairos
var options = {
method: 'POST',
uri: 'https://api.kairos.com/recognize',
body: {
'image': base64,
'gallery_name': 'gallerytest1'
},
headers: {
'app_id': '[id]',
'app_key': '[key]'
},
json: true
}
return new Promise (() => {
console.log(options)
request(options)
.then(function(repos) {
console.log('API call succeeded');
console.log('Kairos response: ' + repos);
const apiResult = repos['images']['transaction']['subject_id']
console.log("Transaction " + JSON.stringify(apiResult))
})
.catch(function(err) {
console.log('API call failed')
})
});
})
// Delete app instance (to prevent concurrency leaks)
const deleteApp = () => app.delete().catch(() => null);
deleteApp.call
})
function base64_encode(file) {
// read binary data
var bitmap = fs.readFileSync(file);
// convert binary data to base64 encoded string
return new Buffer(bitmap).toString('base64');
}
Image Output:

Resources