Google Cloud Functions Text Extraction from image using Vision API

Google Cloud Functions Text Extraction from image using Vision API - node.js

I am following the tutorial to extract text from images at:
https://cloud.google.com/functions/docs/tutorials/ocr?authuser=1
But I do not wish to translate the text, I wish to detect and save the text.
The tutorial implements 3 functions:
gcloud beta functions deploy ocr-extract --trigger-bucket [YOUR_IMAGE_BUCKET_NAME] --entry-point processImage
gcloud beta functions deploy ocr-translate --trigger-topic [YOUR_TRANSLATE_TOPIC_NAME] --entry-point translateText
gcloud beta functions deploy ocr-save --trigger-topic [YOUR_RESULT_TOPIC_NAME] --entry-point saveResult
I just wish to detect text and save the text but I could not remove the translation portion of the code below:
/**
* Detects the text in an image using the Google Vision API.
*
* #param {string} bucketName Cloud Storage bucket name.
* #param {string} filename Cloud Storage file name.
* #returns {Promise}
*/
function detectText (bucketName, filename) {
let text;
console.log(`Looking for text in image ${filename}`);
return vision.textDetection({ source: { imageUri: `gs://${bucketName}/${filename}` } })
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
text = annotation ? annotation.description : '';
console.log(`Extracted text from image (${text.length} chars)`);
return translate.detect(text);
})
.then(([detection]) => {
if (Array.isArray(detection)) {
detection = detection[0];
}
console.log(`Detected language "${detection.language}" for ${filename}`);
// Submit a message to the bus for each language we're going to translate to
const tasks = config.TO_LANG.map((lang) => {
let topicName = config.TRANSLATE_TOPIC;
if (detection.language === lang) {
topicName = config.RESULT_TOPIC;
}
const messageData = {
text: text,
filename: filename,
lang: lang,
from: detection.language
};
return publishResult(topicName, messageData);
});
return Promise.all(tasks);
});
}
After that, I just wish to save the detectec text to a file, as the code below shows:
/**
* Saves the data packet to a file in GCS. Triggered from a message on a Pub/Sub
* topic.
*
* #param {object} event The Cloud Functions event.
* #param {object} event.data The Cloud Pub/Sub Message object.
* #param {string} event.data.data The "data" property of the Cloud Pub/Sub
* Message. This property will be a base64-encoded string that you must decode.
*/
exports.saveResult = (event) => {
const pubsubMessage = event.data;
const jsonStr = Buffer.from(pubsubMessage.data, 'base64').toString();
const payload = JSON.parse(jsonStr);
return Promise.resolve()
.then(() => {
if (!payload.text) {
throw new Error('Text not provided. Make sure you have a "text" property in your request');
}
if (!payload.filename) {
throw new Error('Filename not provided. Make sure you have a "filename" property in your request');
}
if (!payload.lang) {
throw new Error('Language not provided. Make sure you have a "lang" property in your request');
}
console.log(`Received request to save file ${payload.filename}`);
const bucketName = config.RESULT_BUCKET;
const filename = renameImageForSave(payload.filename, payload.lang);
const file = storage.bucket(bucketName).file(filename);
console.log(`Saving result to ${filename} in bucket ${bucketName}`);
return file.save(payload.text);
})
.then(() => {
console.log(`File saved.`);
});
};

So, the tutorials there are based in a much more 'complex' setup (using Pub Sub and Translate also), and you only want to extract the text, so, with this, you should be able:
'use strict';
const Storage = require('#google-cloud/storage');
const Vision = require('#google-cloud/vision');
const bucketName = 'YOUR_BUCKET';
const srcFilename = 'YOUR_IMAGE.jpg';
const projectId = 'YOUR_PROJECT_ID';
const storage = new Storage({
projectId: projectId
});
const vision = new Vision.ImageAnnotatorClient({
projectId: projectId
});
exports.processImage = (req, res) => {
let text;
vision.textDetection(`gs://${bucketName}/${srcFilename}`)
.then(([detections]) => {
const annotation = detections.textAnnotations[0];
text = annotation ? annotation.description : '';
console.log(`Extracted text: ${text}`);
console.log(`Extracted text from image (${text.length} chars)`);
}).catch(vis_err => {
console.error("Vision error:" , vis_err);
});
res.status(200).send("OK");
}
My dependencies, in my package.json file:
"dependencies": {
"#google-cloud/vision": "0.21.0"
},
You can later on extend this to save this text to Storage, if you wish to. There are other tutorials on how to do so.

Related

Firebase cloud functions update document with storage public URL

A frontend application is creating documents in firestorm with following model
fileRef is string : "gs://bucket-location/folder/fileName.extention"
Now after creation I want to get the public URL of the file and update the document with the URL
import * as functions from "firebase-functions";
import * as admin from "firebase-admin";
const firebase = admin.initializeApp();
interface DocumentDataType {
fileRef: string;
fileType: "image" | "video";
fileUrl: string;
timestamp: FirebaseFirestore.Timestamp;
location: FirebaseFirestore.GeoPoint;
}
exports.onDocumentCreated = functions.firestore
.document("db/{docId}")
.onCreate((snapshot, context) => {
const bucket = firebase.storage().bucket();
const { fileRef } = <DocumentDataType>snapshot.data();
const file = bucket.file(fileRef);
const fileUrl = file.publicUrl();
const batch = admin.firestore().batch();
batch.update(snapshot.ref, { ...snapshot.data(), fileUrl });
});
The functions get triggered but the file URL does not update.
is it the right approach for getting the file in cloud storage? -
and also does SDK v9 update is with batch? I really got confused reading the documentation and could not find a proper solution.

Batched writes are useful when you are trying to add/update/delete multiple documents and want to ensure all the operations either pass or fail. In the provided code you are not commiting the batch. Using commit() should update the document:
batch.commit().then(() => console.log("Document updated"));
However if you just want to update a single document then I would prefer update() instead:
exports.onDocumentCreated = functions.firestore
.document("db/{docId}")
.onCreate(async (snapshot, context) => {
const bucket = firebase.storage().bucket();
const { fileRef } = <DocumentDataType>snapshot.data();
const file = bucket.file(fileRef);
const fileUrl = file.publicUrl();
return snapshot.ref.update({ ...snapshot.data(), fileUrl });
});

Cannot upload to AWS S3 inside my Lambda function

I have the following lambda function. It received an XML, looks through it, finds a base64 pdf file and tries to upload it to S3.
index.js
const AWS = require('aws-sdk');
const xml2js = require('xml2js');
const pdfUpload = require('./upload_pdf');
const s3 = new AWS.S3();
exports.handler = async (event, context, callback) => {
let attachment;
xml2js.parseString(event.body, function(err, result) {
attachment =
result.Attachment[0].Data[0];
if (attachment) {
pdfUpload(attachment);
}
});
return {
statusCode: 200
}
};
upload_pdf.js
/**
*
* #param {string} base64 Data
* #return {string} Image url
*/
const pdfUpload = async (base64) => {
const AWS = require('aws-sdk');
const s3 = new AWS.S3();
const base64Data = new Buffer.from(base64, 'base64');
// With this setup, each time your user uploads an image, will be overwritten.
// To prevent this, use a different Key each time.
// This won't be needed if they're uploading their avatar, hence the filename, userAvatar.js.
const params = {
Bucket: 'mu-bucket',
Key: `123.pdf`,
Body: base64Data,
ACL: 'public-read',
ContentEncoding: 'base64',
ContentType: `application/pdf`
}
let location = '';
let key = '';
try {
const { Location, Key } = await s3.upload(params).promise();
location = Location;
key = Key;
} catch (error) {
// console.log(error)
}
console.log(location, key);
return location;
}
module.exports = pdfUpload;
No matter what I do, the file does not get uploaded. I have checked the permissions, and the lambda has access to the bucket. Running the lambda I'm not receiving any errors either. Can anybody see what might be wrong here?

First, as an advice, I think you should put more logs to see at which steps the function is stuck / failing
The second thing you can try is to put await
await pdfUpload(attachment);

unable to separate the encryption and decryption logic in AWS KMS client-node example

I am trying to use the examples provided by AWS KMS team for the client-node encryption in #aws-crypto to encrypt and decrypt the files in the node js with AWS KMS.
**AWS KMS TEAM EXAMPLE **
import {
KmsKeyringNode,
decryptStream,
encryptStream,
MessageHeader // eslint-disable-line no-unused-vars
} from '#aws-crypto/client-node'
import { finished } from 'stream'
import { createReadStream } from 'fs'
import { promisify } from 'util'
const finishedAsync = promisify(finished)
export async function kmsStreamTest (filename: string) {
/* A KMS CMK is required to generate the data key.
* You need kms:GenerateDataKey permission on the CMK in generatorKeyId.
*/
const generatorKeyId = 'arn:aws:kms:us-west-2:658956600833:alias/EncryptDecrypt'
/* The KMS keyring must be configured with the desired CMKs */
const keyring = new KmsKeyringNode({ generatorKeyId })
/* Encryption context is a *very* powerful tool for controlling and managing access.
* It is ***not*** secret!
* Encrypted data is opaque.
* You can use an encryption context to assert things about the encrypted data.
* Just because you can decrypt something does not mean it is what you expect.
* For example, if you are are only expecting data from 'us-west-2',
* the origin can identify a malicious actor.
* See: https://docs.aws.amazon.com/encryption-sdk/latest/developer-guide/concepts.html#encryption-context
*/
const context = {
stage: 'demo',
purpose: 'simple demonstration app',
origin: 'us-west-2'
}
/* Create a simple pipeline to encrypt the package.json for this project. */
const stream = createReadStream(filename)
.pipe(encryptStream(keyring, { encryptionContext: context }))
.pipe(decryptStream(new KmsKeyringNode({ discovery: true })))
.on('MessageHeader', ({ encryptionContext }: MessageHeader) => {
/* Verify the encryption context.
* Depending on the Algorithm Suite, the `encryptionContext` _may_ contain additional values.
* In Signing Algorithm Suites the public verification key is serialized into the `encryptionContext`.
* Because the encryption context might contain additional key-value pairs,
* do not add a test that requires that all key-value pairs match.
* Instead, verify that the key-value pairs you expect match.
*/
Object
.entries(context)
.forEach(([key, value]) => {
console.log();''
if (encryptionContext[key] !== value) throw new Error('Encryption Context does not match expected values')
})
})
/* This is not strictly speaking part of the example.
* Streams need a place to drain.
* To test this code I just accumulate the stream.
* Then I can return that Buffer and verify.
* In a real world case you do not want to always buffer the whole stream.
*/
const buff: Buffer[] = []
stream.on('data', (chunk: Buffer) => {
buff.push(chunk)
})
await finishedAsync(stream)
return Buffer.concat(buff)
}
What I am stuck at is, how to separate the logic for encryption and decryption. here they are using Pipes and when I did something like :
** My Implementation **
const crypto = require('#aws-crypto/client-node');
const KmsKeyringNode = crypto.KmsKeyringNode;
const encryptStream = crypto.encryptStream;
const decryptStream = crypto.decryptStream;
const MessageHeader = crypto.MessageHeader;
const finished = require('stream');
const fs = require('fs');
const promisify = require('util');
const finishedAsync = promisify.promisify(finished)
kmsStreamTest = async (filename) => {
try {
const generatorKeyId = 'mykey'
/* The KMS keyring must be configured with the desired CMKs */
const keyring = new KmsKeyringNode({ generatorKeyId })
const context = {
stage: 'demo',
purpose: 'simple demonstration app',
origin: 'us-west-2'
}
/* Create a simple pipeline to encrypt the package.json for this project. */
const encryptedStream = fs.createReadStream(filename)
.pipe(encryptStream(keyring, { encryptionContext: context }))
const buff = []
encryptedStream.on('data', (chunk) => {
console.log(chunk.toString());
buff.push(chunk);
fs.writeFileSync(`${global.appRoot}/fileHandler/encrypted.json`, buff, { flag: 'wx' });
})
await finishedAsync(stream)
return Buffer.concat(buff)
}
catch (err) {
console.log("Errror----->>", Error);
}
}
kmsDecryptionTest =async (filename) => {
try {
const generatorKeyId = 'arn:aws:kms:us-east-2:362495994317:alias/data-key-encryptor';
/* The KMS keyring must be configured with the desired CMKs */
const keyring = new KmsKeyringNode({ generatorKeyId })
const context = {
stage: 'demo',
purpose: 'simple demonstration app',
origin: 'us-west-2'
}
const decryptedStream = fs.createReadStream(filename)
.pipe(decryptStream(new KmsKeyringNode({ discovery: true })))
const decryptedBuffer = []
decryptedStream.on('data', (chunk) => {
console.log("Decryption Buffer ------->".toString());
decryptedBuffer.push(chunk);
})
await finishedAsync(stream)
return Buffer.concat(buff)
}
catch (err) {
console.log("Error Data", err);
}
}
module.exports = { kmsStreamTest, kmsDecryptionTest }
When I try to call the encrypt function the file gets created with some gibberish text (as expected). But when I try to read that file and call the kmsDecryptionTest it gives me error as `malformed Header.
Can someone help me figure this out? why this is happening?
PS: My goal is to encrypt and decrypt a .zip file but right now I am just trying this on JSON.

How to use bucket.upload() instead of file.createWriteStream() in Google Cloud Storage?

I'm trying to get the permanent (unsigned) download URL after uploading a file to Google Cloud Storage. I can get the signed download URL using file.createWriteStream() but file.createWriteStream() doesn't return the UploadResponse that includes the unsigned download URL. bucket.upload() includes the UploadResponse, and Get Download URL from file uploaded with Cloud Functions for Firebase has several answers explaining how to get the unsigned download URL from the UploadResponse. How do I change file.createWriteStream() in my code to bucket.upload()? Here's my code:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage({ projectId: 'my-app' });
const bucket = storage.bucket('my-app.appspot.com');
var file = bucket.file('Audio/' + longLanguage + '/' + pronunciation + '/' + wordFileType);
const config = {
action: 'read',
expires: '03-17-2025',
content_type: 'audio/mp3'
};
function oedPromise() {
return new Promise(function(resolve, reject) {
http.get(oedAudioURL, function(response) {
response.pipe(file.createWriteStream(options))
.on('error', function(error) {
console.error(error);
reject(error);
})
.on('finish', function() {
file.getSignedUrl(config, function(err, url) {
if (err) {
console.error(err);
return;
} else {
resolve(url);
}
});
});
});
});
}
I tried this, it didn't work:
function oedPromise() {
return new Promise(function(resolve, reject) {
http.get(oedAudioURL, function(response) {
bucket.upload(response, options)
.then(function(uploadResponse) {
console.log('Then do something with UploadResponse.');
})
.catch(error => console.error(error));
});
});
}
The error message was Path must be a string. In other words, response is a variable but needs to be a string.

I used the Google Cloud text-to-speech API to simulate what you are doing. Getting the text to create the audio file from a text file. Once the file was created, I used the upload method to add it to my bucket and the makePublic method to got its public URL. Also I used the async/await feature offered by node.js instead of function chaining (using then) to avoid the 'No such object: ..." error produced because the makePublic method is executed before the file finishes uploading to the bucket.
// Imports the Google Cloud client library
const {Storage} = require('#google-cloud/storage');
// Creates a client using Application Default Credentials
const storage = new Storage();
// Imports the Google Cloud client library
const textToSpeech = require('#google-cloud/text-to-speech');
// Get the bucket
const myBucket = storage.bucket('my_bucket');
// Import other required libraries
const fs = require('fs');
const util = require('util');
// Create a client
const client = new textToSpeech.TextToSpeechClient();
// Create the variable to save the text to create the audio file
var text = "";
// Function that reads my_text.txt file (which contains the text that will be
// used to create my_audio.mp3) and saves its content in a variable.
function readFile() {
// This line opens the file as a readable stream
var readStream = fs.createReadStream('/home/usr/my_text.txt');
// Read and display the file data on console
readStream.on('data', function (data) {
text = data.toString();
});
// Execute the createAndUploadFile() fuction until the whole file is read
readStream.on('end', function (data) {
createAndUploadFile();
});
}
// Function that uploads the file to the bucket and generates it public URL.
async function createAndUploadFile() {
// Construct the request
const request = {
input: {text: text},
// Select the language and SSML voice gender (optional)
voice: {languageCode: 'en-US', ssmlGender: 'NEUTRAL'},
// select the type of audio encoding
audioConfig: {audioEncoding: 'MP3'},
};
// Performs the text-to-speech request
const [response] = await client.synthesizeSpeech(request);
// Write the binary audio content to a local file
const writeFile = util.promisify(fs.writeFile);
await writeFile('my_audio.mp3', response.audioContent, 'binary');
console.log('Audio content written to file: my_audio.mp3');
// Wait for the myBucket.upload() function to complete before moving on to the
// next line to execute it
let res = await myBucket.upload('/home/usr/my_audio.mp3');
// If there is an error, it is printed
if (res.err) {
console.log('error');
}
// If not, the makePublic() fuction is executed
else {
// Get the file in the bucket
let file = myBucket.file('my_audio.mp3');
file.makePublic();
}
}
readFile();

bucket.upload() is a convenience wrapper around file.createWriteStream() that takes a local filesystem path and upload the file into the bucket as an object:
bucket.upload("path/to/local/file.ext", options)
.then(() => {
// upload has completed
});
To generate a signed URL, you'll need to get a file object from the bucket:
const theFile = bucket.file('file_name');
The file name will either be that of your local file, or if you specified an alternate remote name options.destination for the file on GCS.
Then, use File.getSignedUrl() to get a signed URL:
bucket.upload("path/to/local/file.ext", options)
.then(() => {
const theFile = bucket.file('file.ext');
return theFile.getSignedURL(signedUrlOptions); // getSignedURL returns a Promise
})
.then((signedUrl) => {
// do something with the signedURL
});
See:
Bucket.upload() documentation
File.getSignedUrl() documentation

You can make a specific file in a bucket publicly readable with the method makePublic.
From the docs:
const {Storage} = require('#google-cloud/storage');
const storage = new Storage();
// 'my-bucket' is your bucket's name
const myBucket = storage.bucket('my-bucket');
// 'my-file' is the path to your file inside your bucket
const file = myBucket.file('my-file');
file.makePublic(function(err, apiResponse) {});
//-
// If the callback is omitted, we'll return a Promise.
//-
file.makePublic().then(function(data) {
const apiResponse = data[0];
});
Now the URI http://storage.googleapis.com/[BUCKET_NAME]/[OBJECT_NAME] is a public link to the file, as explained here.
The point is that you only need this minimal code to make an object public, for instance with a Cloud Function. Then you already know how the public link is and can use it directly in your app.

SyntaxError: Unexpected identifier in json function

I am going to build a weather data pipeline that starts with an Internet of Things (IoT) device, utilizes a message queue to receive and deliver data, leverages a serverless function to move the data to a data warehouse and then create a dashboard that displays the information. I am getting error in function.
/**
* Background Cloud Function to be triggered by PubSub.
*
* #param{
object
}event The Cloud Functions event.
* #param{
function
}callback The callback function.
*/
exports.subscribe = function (event,
callback){
const BigQuery = require('#google-cloud/bigquery');
const projectId = "iot2analytics-ca4"; //Enter your project ID here
const datasetId = "weatherData"; //Enter your BigQuery dataset name here
const tableId = "weatherDatatable"; //Enter your BigQuery table name here -- make sure it is setup correctly
const PubSubMessage = event.data;
// Incoming data is in JSON format
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data,
'base64' ).toString():"{'sensorID':'na','timecollected':'1/1/1970 00:00:00','zipcode':'00000','latitude':'0.0','longitude':'0.0','temperature':'-273','humidity':'-1','dewpoint':'-273','pressure':'0'}" ;
const jsonData = JSON.parse(incomingData);
var rows = [
jsonData
] ;
console.log(`Uploading data:$ {
JSON.stringify(rows)
} ` );
// Instantiates a client
const bigquery = BigQuery( {
projectId:projectId
} );
// Inserts data into a table
bigquery
.dataset(datasetId)
.table(tableId)
.insert(rows)
.then((foundErrors) => {
rows.forEach((row) => console.log('Inserted:', row));
if (foundErrors && foundErrors.insertErrors != undefined) {
foundErrors.forEach((err) => {
console.log(' Error:', err);
})
}
})
.catch((err) => {
console.error(' ERROR:',
err);
} );
// [
END bigquery_insert_stream
] callback();
};

You are using single quotes to wrap the string in the JSON object which is not allowed with standard formats. Please replace the single quotes with double quotes and then stringify your object. Here, use this
let temp = {
"sensorID":"na",
"timecollected":"1/1/1970 00:00:00",
"zipcode":"00000",
"latitude":"0.0",
"longitude":"0.0",
"temperature":"-273",
"humidity":"-1",
"dewpoint":"-273",
"pressure":"0"
}
temp = JSON.stringify(temp)
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data,
'base64' ).toString(): temp;

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Google Cloud Functions Text Extraction from image using Vision API - node.js

Related

Firebase cloud functions update document with storage public URL

Cannot upload to AWS S3 inside my Lambda function

unable to separate the encryption and decryption logic in AWS KMS client-node example

How to use bucket.upload() instead of file.createWriteStream() in Google Cloud Storage?

SyntaxError: Unexpected identifier in json function

Categories

Resources