Scheduling Node.js script on GCP using Cloud Functions - node.js

I've created a script that scrapes information from a webpage and writes it to a Google Sheet. This is working great on my local machine, but I'd like to schedule this on GCP.
It sounds like Cloud Functions are the way to go, but when I deploy my function I'm getting this error:
Function failed on loading user code. Error message: Node.js module defined by file working.js is expected to export function named run
I'm not sure what I should be using as the "Function to execute". Here's the function I've uploaded:
const puppeteer = require('puppeteer');
const jsonexport = require('jsonexport');
const GoogleSpreadsheet = require('google-spreadsheet');
const creds = require('./google-generated-creds.json');
const fs = require('fs');
var doc = new GoogleSpreadsheet('1qaFi0xnhaCZEduylUvGXWpyMJv00Rz6Y9qqyFR1E9oI');
function run() {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
const page = await browser.newPage();
const urls = [
"https://www.marksandspencer.com/pure-cotton-long-sleeve-jumpsuit/p/p60258655?image=SD_01_T42_6701_XB_X_EC_90&color=INDIGO&prevPage=plp",
"https://www.marksandspencer.com/cotton-rich-striped-3-4-sleeve-t-shirt/p/p60210598?prevPage=plp",
"https://www.marksandspencer.com/high-neck-long-sleeve-blouse/p/p60260040?image=SD_01_T43_5168_HD_X_EC_90&color=LIGHTDENIM&prevPage=plp",
"https://www.marksandspencer.com/pure-cotton-printed-short-sleeve-t-shirt/p/p60263529?image=SD_01_T41_8030Z_Z4_X_EC_90&color=WHITEMIX&prevPage=plp",
"https://www.marksandspencer.com/pure-cotton-button-detailed-denim-mini-skirt/p/p60260145?image=SD_01_T57_4004_QP_X_EC_90&color=DARKINDIGO&prevPage=plp",
"https://www.marksandspencer.com/pure-cotton-long-sleeve-shirt-midi-dress/p/p60258654?image=SD_01_T42_6703_HP_X_EC_90&color=DENIM&prevPage=plp",
"https://www.marksandspencer.com/mid-rise-skinny-leg-ankle-grazer-jeans/p/p60220155?prevPage=plp",
"https://www.marksandspencer.com/pure-cotton-long-sleeve-shirt/p/p60260208?image=SD_01_T43_5181_HP_X_EC_90&color=DENIM&prevPage=plp",
"https://www.marksandspencer.com/long-sleeve-shirt-mini-dress/p/p60258652?image=SD_01_T42_6704_HP_X_EC_90&color=DENIM&prevPage=plp",
"https://www.marksandspencer.com/wide-fit-suede-lace-up-trainers/p/p60216277?prevPage=plp",
"https://www.marksandspencer.com/suede-ankle-boots/p/p60226911?prevPage=plp",
"https://www.marksandspencer.com/leather-buckle-hip-belt/p/p60186701?prevPage=plp",
"https://www.marksandspencer.com/cross-body-bag/p/p60215352?prevPage=plp"
];
const productsList = [];
for (let i = 0; i < urls.length; i++) {
const url = urls[i];
await page.goto(url);
let products = await page.evaluate(() => {
let product = document.querySelector('h1[itemprop=name]').innerText;
let results = [];
let items = document.querySelectorAll('[data-ttip-id=sizeGridTooltip] tbody tr td label');
items.forEach((element) => {
let size = element.getAttribute('for');
let stockLevel = "";
let nearest_td = element.closest('td');
if (nearest_td.classList.contains('low-stock')) {
stockLevel = "Low stock"
} else if (nearest_td.classList.contains('out-of-stock')) {
stockLevel = "Out of stock"
} else {
stockLevel = "In stock"
}
results.push({
product: product,
size: size,
stock: stockLevel
})
});
return results
})
productsList.push(products)
}
browser.close();
function flatten(arr) {
return arr.reduce(function(flat, toFlatten) {
return flat.concat(Array.isArray(toFlatten) ? flatten(toFlatten) : toFlatten);
}, []);
}
var flatProducts = flatten(productsList)
flatProducts.forEach(function(row) {
// Authenticate with the Google Spreadsheets API.
doc.useServiceAccountAuth(creds, function(err) {
// Get all of the rows from the spreadsheet.
doc.addRow(1, row, function(err, rows) {
console.log(row);
});
});
});
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
I've never used Cloud Functions before so unsure how much I'd need to modify my script.

You can't just upload any script to run. You have to define a function using either the Cloud tools (via gcloud) or the Firebase tools and SDK. You will also have to figure out how you want to trigger it. When the function is triggered, then you can arrange to have your code executed.
I would say that it's mostly non-trivial to just port an existing script to Cloud Functions. You will have to take time to learn about how the system works in order to make effective use of it.

What that errors is referring to is that Cloud Functions can't find a function to run in that file (working.js) because you haven't exported one. For example, if you create a Cloud Function named run, then you must export a function in the script by assigning it to exports.run in your module:
exports.run = (event, callback) => {
callback(null, `Hello ${event.data.name || 'World'}!`);
};
There's more examples in the documentation, but it's likely that other changes will be necessary in your script for authentication, etc, to work from GCP.

Related

Getting 'Cannot use import statement outside a module' in my test

I've written a small program using Playwright and I'm trying to make it use worker_threads. Now I've written a test for it but for some reason I'm getting the above error in my test.
I tried tweaking the tsconfig settings, but that also didn't work. What I read most about is using commonjs as the module, which I'm doing. I also installed ts-node, but that also didn't do anything.
Below is the function I wrote to create a worker.
import { Worker } from "worker_threads";
const create_worker = (file: string, tasksData: {}) =>
new Promise((resolve, reject) => {
const worker = new Worker(file, {
workerData: tasksData,
});
worker.on("message", (data) => {
resolve(data);
});
worker.on("error", (msg) => {
reject(`An error occurred: ${msg}`);
});
});
export default create_worker;
And below is my test.
const browser = await chromium.launch();
const context = await browser.newContext();
const page = await context.newPage();
const search_grid = await construct_search_grid();
const chunks = chunkify_array(search_grid, 4);
const workerPromises: unknown[] = [];
for (let i = 0; i < 5; i++) {
const worker = await create_worker(
"./src/adapters/playwright.ts",
{
search_grid: chunks![i],
}
);
workerPromises.push(worker);
}
const thread_results = await Promise.all(workerPromises);
Now I believe everything checks out so I'm not really sure what to look for now. Does anyone know?
It feels like you don't have tsconfig.json "module" configured?
I have mine set to "commonjs". Without it, I also get that same error.
I was able to overcome this error by following this guide: https://wanago.io/2019/05/06/node-js-typescript-12-worker-threads/

get GCP cloud function logs label.executionid

i want to get the execution id, that GCP gave to the Cloud functions, in order to store it in a database.
this is what i want to get =>
let currId = log.label.execution_id
In order to do it i'm fetching the logs thanks to this function (inside my cloudFunctions):
const logging = new Logging();
console.log(`executed ${eId}`)
printEntryMetadata(eId, sId);
async function printEntryMetadata(eId, sId) {
const options = {
filter: `textPayload = "executed ${eId}"`
};
const [entries] = await logging.getEntries(options);
console.log('Logs:');
console.log(`textPayload = "executed ${eId}"`)
console.log(JSON.stringify(entries))
// const metadata = entries[0].metadata
console.log(`${metadata.labels.execution_id}`)
}
But the JSON.stringify(entries) return an empty array. And when i use the filter mannualy it's working...
is the cloud function unable to fetch it own logs?
This is what i've done:
exports.LogMetadata = async(executionId, scopeId, ProjectID, Logging) => {
const logging = new Logging({ProjectID});
const options = {
filter: `textPayload = "executed ${executionId}.${scopeId}"`
};
const [entries] = await logging.getEntries(options);
console.log(JSON.stringify(entries))
try {
const metadata = entries[0].metadata
console.log(`${metadata.labels.execution_id}`)
} catch (error) {
console.log("can't find the log, cause' it's the first function executed...")
}
}
The only thing that doesn't work is that i can't fetch the first log of the first esxecuted function.

Firebase Authentication causes cloud functions to return empty

I have a firebase function that's supposed to return Items that are sold by a seller. I want to get the seller's profile picture via firebase authentication. But whenever I AWAIT the function
edit: worth noting that mAuth is firebase authentication*
await mAuth.geUser(sellerData.UID);
the application returns me an empty json or []
Here is the full code for the function, the error occurs on line 11 or somewhere around there.
export const getHottestItems = functions.region("asia-east2").https.onRequest(async (data, response) => {
try {
var arrayItem = new Array<Item>();
let itemSeller: Seller;
const sellerSnapshot = await db.collection("users").get();
// this is the list of promises/awaitables for all items
const promises = new Array<Promise<FirebaseFirestore.QuerySnapshot<FirebaseFirestore.DocumentData>>>();
sellerSnapshot.forEach(async (sellerDoc) => {
const sellerData = sellerDoc.data();
// THIS PART CAUSES THE API TO RETURN []
const sellerAuth = await mAuth.getUser(sellerData.UID);
// check for non null / empty strings
if (sellerData.Name as string && sellerData.UID as string) {
// this is all the seller information we need
itemSeller = new Seller(sellerData.Name, sellerData.UID, sellerAuth.photoURL); // placeholder profile picture
const refItem = sellerDoc.ref.collection("Items");
// push all the promises to a list so we can run all our queries in parallel
promises.push(refItem.get());
}
});
// wait for all promises to finish and get a list of snapshots
const itemSnapshots = await Promise.all(promises);
itemSnapshots.forEach((ItemSnapshot) => {
ItemSnapshot.forEach((ItemDoc) => {
// get the data
const itemData = ItemDoc.data();
// if title is not null, the rest of the fields are unlikely to be.
if (itemData.Title as string) {
// the rest of the logic to convert from database to model is in the constructor
arrayItem.push(new Item(ItemDoc.id, itemData.Title, itemSeller, itemData.Likes, itemData.ListedTime, itemData.Rating, itemData.Description, itemData.TransactionInformation, itemData.ProcurementInformation, itemData.Category, itemData.Stock, itemData.Image1, itemData.Image2, itemData.Image3, itemData.Image4, itemData.AdvertisementPoints, itemData.isDiscounted, itemData.isRestocked));
}
});
});
// sort by performance level
arrayItem = arrayItem.sort(x => x.Performance);
if (data.body.userID) {
arrayItem = await markLikedItems(data.body.userID, arrayItem);
}
//send the responseafter all the final modifications
response.send(arrayItem);
} catch (err) {
// log the error
console.log(err);
response.status(500).send(err);
}
});

Do I have to use google cloud storage for batch annotation?

I am trying to do a asyncBatchAnnotation() request to annotate a bunch of images using Google Cloud Vision API.
Here is a snippet of my code:
My function to create a request for batching:
module.exports = createRequests
const LABEL_DETECTION = 'LABEL_DETECTION'
const WEB_DETECTION = 'WEB_DETECTION'
function createRequests(imageUris) {
let resources = {
requests: [],
outputConfig
}
for (let i = 0; i < imageUris.length; i++) {
let request = {
image: {source: {imageUri: imageUris[i]}},
features: [{type: LABEL_DETECTION}, {type: WEB_DETECTION}]
}
resources.requests.push(request)
}
console.log(resources)
return resources
}
My function for making the request itself:
// Imports the Google Cloud Client Library
const vision = require('#google-cloud/vision')
// Creates a client
const client = new vision.ImageAnnotatorClient()
const getImageUrls = require('./get-image-urls.js')
const createRequests = require('./create-requests.js')
const BUCKET_NAME = 'creative-engine'
function detectLabelsFromImage() {
return new Promise(async(resolve, reject) => {
try {
let imageUris = await getImageUrls(BUCKET_NAME)
let resources = createRequests(imageUris)
try {
let responses = await client.asyncBatchAnnotateImages(resources)
const imageResponses = responses[0].responses
imageResponses.forEach(imageResponse => {
console.log('LABELS: ')
const labels = imageResponse.labelAnnotations
labels.forEach(label => {
console.log(`label: ${label.description} | score: ${label.score}`)
});
console.log('WEB ENTITIES: ')
const webEntities = imageResponse.webDetection.webEntities
webEntities.forEach(webEntity => {
console.log(`label: ${webEntity.description} | score: ${webEntity.score}`)
});
})
} catch (err) {
console.error('ERROR: ', err)
}
} catch (e) {
reject(e)
}
})
}
Here is the error I get:
ERROR: Error: 3 INVALID_ARGUMENT: OutputConfig is required.
When I look at the Google Documentation here it states I need to use Google Cloud Storage for the JSON output.
I don't want to create a billing account with my information for Google Cloud. Is there a way to do this where I write to a local JSON file?
Thank you for your help!
As per the link you shared says, it is not possible to write a local JSON file using the Cloud Vision API. You must to use GCS to store the file.

nodejs async/await nested API progress

I have an API that searches for the user-provided term, returns an array of results, then fires off async requests for each of the results and gets results for each of these second batch of requests. I'd like the API to report progress as it happens rather than just the final result. So, if I do the following request, I should get updates like so
$ curl 'http://server/?q=foobar'
searching for ${q}…
found 76… now getting images…
found 30 images… done
{
result
}
Most of relevant code is shown below. Fwiw, I am using hapijs for my application.
let imagesOfRecords = {};
const getImages = async function (q) {
console.log(`searching for ${q}…`);
const uri = `http://remoteserver/?q=${q}`;
const {res, payload} = await Wreck.get(uri);
const result = JSON.parse(payload.toString()).hits;
const numOfFoundRecords = result.total;
if (result.total) {
console.log(`found ${result.total}… now getting images…`);
const foundRecords = result.hits.map(getBuckets);
Promise.all(foundRecords).then(function() {
console.log(`found ${Object.keys(imagesOfRecords).length} images… done`);
reply(imagesOfRecords).headers = res.headers;
}).catch(error => {
console.log(error)
});
}
else {
console.log('nothing found');
reply(0).headers = res.headers;
}
};
const getBuckets = async function(record) {
const { res, payload } = await Wreck.get(record.links.self);
const bucket = JSON.parse(payload.toString()).links.bucket;
await getImageFiles(bucket, record.links.self);
};
const getImageFiles = async function(uri, record) {
const { res, payload } = await Wreck.get(uri);
const contents = JSON.parse(payload.toString()).contents;
imagesOfRecords[record] = contents.map(function(el) {
return el.links.self;
});
};
Once I can implement this, my next task would be to implement this progressive update in a web app that uses the above API.
To show result with each step of your requests for backend you can use EventEmitter, which will emit event on each progress step. You can read about events here.
Simple implementation:
const events = require('events');
const eventEmitter = new events.EventEmitter();
//your request code
Promise.all(foundRecords).then(function() {
console.log(`found ${Object.keys(imagesOfRecords).length} images… done`);
eventEmitter.emit('progress');
reply(imagesOfRecords).headers = res.headers;
})
const eventReaction = (e) => {
// do something with event, console log for example.
}
eventEmitter.on('progress', eventReaction);
More examples you can find here and here.
To show events to client you can use library socket.io. I think you can find pretty straightforward explanations how socket.io works in documentation.
If you want to send events between servers or processes and want to go little further, you can read more about 0MQ (zero mq) and it's node implementation

Resources