Cloud Functions for Firebase BigQuery sync error - node.js

We're working on a cloud function that allows us to keep our bigquery and firebase database in sync. The function triggers when a place is created/updated/deleted.
Based on the trigger action (create/update/delete) we add a property called big_query_active to signal if the object exists or not. Same goes for the date.
Our current problem is that the call to big query sometimes returns an error. So that would mean that the data is not in sync anymore. How can this be prevented?
'use strict';
// Default imports.
const functions = require('firebase-functions');
const bigQuery = require('#google-cloud/bigquery');
// If you want to change the nodes to listen to REMEMBER TO change the constants below.
// The 'id' field is AUTOMATICALLY added to the values, so you CANNOT add it.
const ROOT_NODE = 'places';
const VALUES = [
'country_id',
'category_id',
'name',
'active',
'archived'
];
// This function listens to the supplied root node, but on child added/removed/changed.
// When an object is inserted/deleted/updated the appropriate action will be taken.
exports.children = functions.database.ref(ROOT_NODE + '/{id}').onWrite(event => {
const query = bigQuery();
const dataset = query.dataset('stampwallet');
const table = dataset.table(ROOT_NODE);
if (!event.data.exists() && !event.data.previous.exists()) {
return;
}
const item = event.data.exists() ? event.data.val() : event.data.previous.val();
const data = {};
data['id'] = event.params.id;
for (let index = 0; index < VALUES.length; index++) {
const key = VALUES[index];
data[key] = item[key] !== undefined ? item[key] : null;
}
data['big_query_date'] = new Date().getTime() / 1000;
data['big_query_active'] = event.data.exists();
return table.insert(data).then(() => {
return true;
}).catch((error) => {
if (error.name === 'PartialFailureError') {
console.log('A PartialFailureError happened while uploading to BigQuery...');
} else {
console.log(JSON.stringify(error));
console.log('Random error happened while uploading to BigQuery...');
}
});
});
This is the error that we (sometimes) receive
{"code":"ECONNRESET","errno":"ECONNRESET","syscall":"read"}
How could it be prevented that the data goes out of sync? Or is there a way to retry so that it always succeeds?

Related

how to save data in aws neptune db using node js?

Is there a way to save the data in amazon aws neptune db using node js?
I am running this code on a lambda.
I made the connection to neptune db using the below code.
const gremlin = require('gremlin');
const DriverRemoteConnection = gremlin.driver.DriverRemoteConnection;
const Graph = gremlin.structure.Graph;
dc = new DriverRemoteConnection('endpoint',{});
const graph = new Graph();
const g = graph.traversal().withRemote(dc);
Here's a JavaScript Lambda function that writes data to Neptune (and wraps the write in a retry block in case of concurrent modifications). The function gets the Neptune endpoint and port from environment variables. The write query is in the query() method. It's a simple upsert example that tries to create a vertex using a randomly generated ID. If a vertex with that ID already exists, the query returns that vertex rather than creating a new one.
This example creates a single connection that persists for the lifetime of the Lambda container (rather than per invocation). There's some error checking in the retry code that recreates the connection in the case of an untoward network issue.
const gremlin = require('gremlin');
const async = require('async');
const traversal = gremlin.process.AnonymousTraversalSource.traversal;
const DriverRemoteConnection = gremlin.driver.DriverRemoteConnection;
let conn = createRemoteConnection();
let g = createGraphTraversalSource(conn);
const t = gremlin.process.t;
const __ = gremlin.process.statics;
async function query(id) {
return g.V(id)
.fold()
.coalesce(
__.unfold(),
__.addV('User').property(t.id, id)
)
.id().next();
}
exports.handler = async (event, context) => {
const id = Math.floor(Math.random() * 10000).toString();
return async.retry(
{
times: 5,
interval: 1000,
errorFilter: function (err) {
// Add filters here to determine whether error can be retried
console.warn('Determining whether retriable error: ' + err.message);
// Check for connection issues
if (err.message.startsWith('WebSocket is not open')){
console.warn('Reopening connection');
conn.close();
conn = createRemoteConnection();
g = createGraphTraversalSource(conn);
return true;
}
// Check for ConcurrentModificationException
if (err.message.includes('ConcurrentModificationException')){
console.warn('Retrying query because of ConcurrentModificationException');
return true;
}
return false;
}
},
async function (cb) {
let result = await query(id);
return result['value'];
});
};
function createRemoteConnection() {
return new DriverRemoteConnection(
connectionString(),
{
mimeType: 'application/vnd.gremlin-v2.0+json',
pingEnabled: false
});
}
function createGraphTraversalSource(conn) {
return traversal().withRemote(conn);
}
function connectionString() {
return 'wss://' +
process.env['neptuneEndpoint'] +
':' +
process.env['neptunePort'] +
'/gremlin';
}
Simple demo based off the TinkerPop documentation
const handler = async (event) => {
// add person vertex with a property name and value stephen.
await g.addV('person').property('name','stephen').next();
// fetch all vertex' and get the name properties.
const result = await g.V().values('name').toList();
console.log(result);
return {
statusCode: 201,
body: JSON.stringify({message:"Testing Gremlin!", data:result}),
};
}

Single Http Request to get multiple file data Parse.com

I'm using Back4app.
My Profile class schema has 4 File columns containing pictures.
So when I retrieve an object , I have to make an HTTP request for each file URL and get the byte data like this.
const data = await Parse.Cloud.httpRequest({url:profilePhoto.url()});
return data.buffer.toString('base64');
But for all four files I have to do 4 HTTP requests to the server.
Is there anyway to do a batch HTTP request so that with just 1 request I can get data for all 4 files ?
My main aim is to do the least amount of requests to the server as possible.
There is no out-of-the-box way to retrieve multiple files with one request in Parse Server.
You could implement your own Parse Cloud Code function to retrieve multiple files, but you would have to manually combine them server side and separate them client side.
As a starting point you could look at packages like multistream that allow you to combine multiple file streams into one to get some inspiration.
You might be able to do something similar to what I've done in cloud code.
I had to load up a bunch of information at the start of my application, requiring many round trips to the server.
So I wrote a function called getUserData().
This does many unrelated queries, and jams all of the results into one big object. I then return the object from the function.
Here is the entire function:
console.log("startig getUserData");
var callCount = 0;
var lastLoadTime=0;
// Given a user, load all friends. Save the objects to ret.objects,
// and save the objectIds to ret.friends
//
// Note: we always load the exhaustive friend list, because
// otherwise, we would have no way of recognizing
// removed friendships.
//
async function loadFriends(user, ret) {
const friendQuery = user.relation("friends").query();
const friends = await findFully(friendQuery);
for(var i=0;i<friends.length;i++){
ret.friends[friends[i].id]=1;
ret.objects[friends[i].id]=friends[i];
};
}
// Given a user, load all owned cells. Save the objects to ret.owned,
// and save their objectIds to ret.ownedCells.
//
// Also, save the ids of members, which we will use to flesh out ret.objects with
// the objects who are not friends, but share a cell with the current user.
async function loadPublicCells(user, ret, memberIds) {
const ownedCellQ = new Parse.Query('PublicCell');
ownedCellQ.equalTo('owner',user);
const joinedCellQ = new Parse.Query('PublicCell');
joinedCellQ.equalTo('members',user);
const publicCellQ = Parse.Query.or(ownedCellQ,joinedCellQ);
publicCellQ.greaterThan("updatedAt",new Date(lastLoadTime));
const publicCells=await findFully(publicCellQ);
for(var i=0;i<publicCells.length;i++) {
const cell = publicCells[i];
ret.ownedCells[cell.id]=cell;
const owner = cell.get("owner");
if(owner==null)
continue;
ret.objects[cell.id]=cell;
if(owner.id === user.id) {
ret.ownedCells[cell.id]=1;
} else {
ret.joinedCells[cell.id]=1;
};
const memberQ = cell.relation("members").query();
const members = await findFully(memberQ);
if(ret.memberMap[cell.id]==null)
ret.memberMap[cell.id]={};
const map = ret.memberMap[cell.id];
for(var j=0;j<members.length;j++){
const member=members[j];
map[member.id]=1;
ret.objects[member.id]=member;
};
};
};
// given a list of all members of all cells, load those objects and store
// them in ret.objects. We do not have to record which cells they belong
// to, because that information is in ret.memberMap
async function loadMembers(memberIds, ret) {
const memberQ = new Parse.Query(Parse.User);
var partIds;
while(memberIds.length){
partIds = memberIds.splice(0,100);
memberQ.containedIn('objectId',partIds);
const part = await findFully(memberQ);
for(var i=0;i<part.length;i++) {
ret.objects[part[i].id]=part[i];
}
};
};
// given a user, save all of the objectIds of people who have annoyed him with
// spam. We save only the ids, they don't go on ret.objects, because we only
// need to filter them out of things. The objectIds are sufficient.
//
// We always send all spam objects, otherwise we would not recognize deletions
async function loadUserSpams(user, ret) {
const userSpamsQ = new Parse.Query("_User");
userSpamsQ.equalTo("spamUsers",user);
userSpamsQ.greaterThan("updatedAt", new Date(lastLoadTime));
const userSpams = await findFully(userSpamsQ);
for(var i=0;i<userSpams.length;i++){
ret.userSpams[userSpams[i].id]=1;
};
};
// given a user, save all of the objectIds of people who have been annoyed *BY*
// him with spam. We save only the ids, they don't go on ret.objects, because we
// only need to filter them out of things. The objectIds are sufficient.
//
// We always send all spam objects, otherwise we would not recognize deletions
async function loadSpamUsers(user, ret) {
const spamUserR = user.relation('spamUsers');
const spamUserQ = spamUserR.query();
spamUserQ.greaterThan("updatedAt", new Date(lastLoadTime));
const spamUsers = await findFully(spamUserQ);
for(var i=0;i<spamUsers.length;i++){
ret.spamUsers[spamUsers[i].id]=1;
};
};
// given a user, save all of the objectIds of people to whom he has sent a
// friend request which is still pending. We save only the ids, they don't go
// on ret.objects, because we only need to filter them out of things. The
// objectIds are sufficient.
async function loadPendingFriends(user, ret) {
const request1Q = new Parse.Query('Request');
request1Q.equalTo("owner",user);
const request2Q = new Parse.Query('Request');
request2Q.equalTo("sentTo",user);
const requestQ = Parse.Query.or(request1Q,request2Q);
requestQ.equalTo("status",'PENDING');
const requests = await findFully(requestQ);
for(var i=0;i<requests.length;i++){
const request = requests[i];
const sentBy = request.get("owner");
if(sentBy==null){
console.warn("sentBy==null");
continue;
};
const sentTo = request.get("sentTo");
if(sentTo==null){
console.warn("sentTo==null");
continue;
};
console.dump({sentTo,sentBy});
if(sentBy.id==user.id){
ret["pendingFriends"][sentTo.id]=sentTo;
} else if ( sentTo.id==user.id ) {
ret["friendingPends"][sentBy.id]=sentBy;
};
};
};
// given a user, load all of his private cells. We do not store
// the user objects, because only friends will be in your private cells.
async function loadPrivateCells(user, ret) {
const privateCellQ = new Parse.Query('PrivateCell');
privateCellQ.equalTo("owner", user);
privateCellQ.greaterThan("updatedAt", new Date(lastLoadTime));
const privateCells = await findFully(privateCellQ);
for(var i=0;i<privateCells.length;i++) {
const cell = privateCells[i];
ret.objects[cell.id]=cell;
ret.privateCells[cell.id]=cell;
if(ret.memberMap[cell.id]==null)
ret.memberMap[cell.id]={};
const map = ret.memberMap[cell.id];
const memberQ = cell.relation("members").query();
const members = await findFully(memberQ);
for(var j=0;j<members.length;j++){
const member=members[j];
map[member.id]=1;
ret.objects[member.id]=member;
};
};
//});
}
// we use objects as maps to weed out duplicate objects and cells.
// when we are done, we use this function to replace the object
// with an array of objects. we don't need to send the keys, since
// they already exist within the objects.
function objToValueList(k,ret){
const objs = [];
for( var id in ret[k] )
objs.push(ret[k][id]);
ret[k]=objs;
ret.counts[k]=objs.length;
};
// convert the objects which have been used to accumulate key lists
// to arrays of objectIds. k is the name of the list we are working
// on. ret[k] is the list itself.
function objToKeyList(k,ret) {
const objs = [];
for( var id in ret[k] ) {
objs.push(id);
};
ret[k]=objs;
ret.counts[k]=objs.length;
};
async function checkUserConsent(user){
const query = new Parse.Query("PrivacyPolicy");
query.descending("createdAt");
query.limit(1);
const res = await query.find();
if(res.length==0) {
return true;
};
const policy=res[0];
console.dump(policy);
console.log(policy);
const userConsent=user.get("lastConsent");
return userConsent!=null && userConsent.id == policy.id;
};
async function loadAlerts(user,ret) {
const q1 = new Parse.Query("Alert");
q1.equalTo("owner", user);
const q2 = new Parse.Query("Response");
q2.equalTo("owner", user);
const q3 = new Parse.Query("Alert");
q3.matchesKeyInQuery("objectId", "alert", q2);
const q = Parse.Query.or(q1,q3);
const list = await q.find();
var time = new Date().getTime();
time -= 1000*86400;
time=Math.max(lastLoadTime, time);
q.greaterThan("updatedAt",time);
for(var i=0;i<list.length;i++) {
const item=list[i];
ret.alerts[item.id]=1;
ret.objects[item.id]=item;
};
}
async function doGetUserData(user) {
if(!user)
return {fatal: 'not logged in!' };
const ret = {
owner: {},
privateCells: {},
friends: {},
alerts: {},
objects: {},
ownedCells: {},
joinedCells: {},
spamUsers: {},
userSpams: {},
pendingFriends: {},
friendingPends: {},
memberMap: {},
loadTime: lastLoadTime,
counts: {callCount: callCount++},
};
{
user.fetch();
ret.owner=user.id;
const memberIds={};
ret.objects[user.id]=user;
console.log("loadFriends");
await loadFriends(user,ret);
console.log("loadPrivateCells");
await loadPrivateCells(user,ret,memberIds);
console.log("loadPublicCells");
await loadPublicCells(user,ret,memberIds);
console.log("loadPendingFriends");
await loadPendingFriends(user,ret);
console.log("loadUserSpams");
await loadUserSpams(user,ret);
console.log("loadSpamUsers");
await loadSpamUsers(user,ret);
console.log("loadAlerts");
await loadAlerts(user,ret);
const memberList=[];
for( var id in memberIds ) {
console.log(ret.objects[id]);
memberList.push(id);
};
console.log("loadMembers");
await loadMembers(memberList,ret);
}
for(var cell in ret.memberMap) {
var map = ret.memberMap[cell];
var list = [];
ret.memberMap[cell]=list;
for(var member in map) {
list.push(member);
};
}
delete ret.objects[user.id];
[
'friends', "friendingPends", 'pendingFriends',
'privateCells', 'ownedCells', 'joinedCells',
'userSpams', 'spamUsers', "alerts"
].forEach((k)=>{
objToKeyList(k,ret);
});
objToValueList('objects',ret);
delete ret.counts;
return ret;
}
async function getUserData(req) {
try {
var nextLoadTime=new Date().getTime();
const user = req.user;
console.log(user);
lastLoadTime = req.params.lastLoadTime;
if(lastLoadTime==null)
lastLoadTime=0;
lastLoadTime = new Date(lastLoadTime);
const ret = await doGetUserData(user);
ret.loadTime=nextLoadTime;
return ret;
} catch ( err ) {
console.log(err);
try {
console.log(err.stack());
} catch ( xxx ) {
console.log(err);
};
throw (`error getting data: ${err}`);
};
};
Parse.Cloud.define("getUserData", getUserData);
Something like this could easily be done to get your data for you. Like this solution, it is unlikely to be entirely pretty, but it would probably work.

How to convert the auth response into array of objects?

I am trying to get the response of the users using auth function and i have to create an excel sheet using the xlsx-populate library and i am able to convert that into an array of objects as the limit is 1000 so there are multiple arrays of objects. and i am not able to figure out how can i do this problem.in this problem, i am simply fetching results using auth and try to get the results into an array of objects. and i am also tried to use the objects to pass into the excel sheet but it gives the excel sheet with last 1000 queries response
const admin = require("firebase-admin");
const momentTz = require("moment-timezone");
const XlsxPopulate = require("xlsx-populate");
momentTz.suppressDeprecationWarnings = true;
const {
alphabetsArray
} = require("./constant");
var start = momentTz().subtract(4, "days").startOf("day").format();
var start = momentTz(start).valueOf();
const end = momentTz().subtract(1, "days").endOf("day").format();
const listAllUsers = async(nextPageToken) =>{
const [workbook] = await Promise.all([
XlsxPopulate.fromBlankAsync()
]);
const reportSheet = workbook.addSheet("Signup Report");
workbook.deleteSheet("Sheet1");
reportSheet.row(1).style("bold", true);
[
"Date",
"TIME",
"Phone Number"
].forEach((field, index) => {
reportSheet.cell(`${alphabetsArray[index]}1`).value(field);
});
let count = 0
// List batch of users, 1000 at a time.
const data = [];
admin
.auth()
.listUsers(1000, nextPageToken)
.then (async (listUsersResult) => {
listUsersResult.users.forEach((userRecord) =>{
const time = userRecord.metadata.creationTime;
const timestamp = momentTz(time).valueOf();
// console.log(timestamp)
if (timestamp >= 1585704530967 ) {
console.log(time);
let column = count+2;
count++;
data.push(userRecord.toJSON())
reportSheet.cell(`A${column}`).value(time);
reportSheet.cell(`C${column}`).value(userRecord.phoneNumber);
}
});
console.log(JSON.stringify(data))//this is the array of the object and i am getting after 1000 response
if (listUsersResult.pageToken) {
// List next batch of users.
listAllUsers(listUsersResult.pageToken);
await workbook.toFileAsync("./SignUp.xlsx");
}
})
// .catch(function (error) {
// console.log("Error listing users:", error);
// });
// const datas = []
// datas.push(data)
// console.log(datas)
return ;
}
// Start listing users from the beginning, 1000 at a time.
listAllUsers();
and the output i am getting is like this
[]
[]
[]
[]
[]
i want to convert this into a single array of response
You have a race condition. When you perform your console.log(JSON.stringify(data)) your listUserQuery is in progress (and in async mode) and you don't have yet the answer when you print the array. Thus the array is empty.
Try this (I'm not sure of this optimal solution, I'm not a nodeJS dev)
admin
.auth()
.listUsers(1000, nextPageToken)
.then (async (listUsersResult) => {
listUsersResult.users.forEach((userRecord) =>{
const time = userRecord.metadata.creationTime;
const timestamp = momentTz(time).valueOf();
// console.log(timestamp)
if (timestamp >= 1585704530967 ) {
console.log(time);
let column = count+2;
count++;
data.push(userRecord.toJSON())
reportSheet.cell(`A${column}`).value(time);
reportSheet.cell(`C${column}`).value(userRecord.phoneNumber);
}
}
console.log(JSON.stringify(data))//this is the array of the object and i am getting after 1000 response
if (listUsersResult.pageToken) {
// List next batch of users.
listAllUsers(listUsersResult.pageToken);
await workbook.toFileAsync("./SignUp.xlsx");
}
);

Google api SQL database dump via google cloud functions

With a node.js script via googleapis I done dump of all databases created on my Google SQL instance, the dump generate a single file for all databases which i store in a bucket. My target is to have one file for each database and not one file for all, the main problem is that if I run an export request for database A i can't run another for database B until the first is done.
You may use Async with callback in order to run the exports sequentially, you may use the operations list method that will get you the status of the exports in order to be able to know when the export has finished and when to move to the next step using callback. For more information check this other post
My solution is to use a recursive function like this:
"use strict"
const { google } = require("googleapis");
const { auth } = require("google-auth-library");
const dateFormat = require('date-format');
var sqladmin = google.sql("v1beta4");
const project = "my-project-name";
const instanceName = "my-sql-instance-name";
const dbToDump = [];
exports.dumpDatabase = (_req, res) => {
async function dump() {
let count = 0;
let currentRequestName = '';
const authRes = await auth.getApplicationDefault();
var authClient = authRes.credential;
let databases = await sqladmin.databases.list({
project: project,
instance: instanceName,
auth: authClient
});
for (let i = 0; i < databases.data.items.length; i++) {
const element = databases.data.items[i];
// the system databases will be omitted
if (
element.name != "information_schema" &&
element.name != "sys" &&
element.name != "mysql" &&
element.name != "performance_schema"
) {
dbToDump.push(element.name);
}
}
async function recursiveCall() {
//exit condition
if (count >= dbToDump.length) {
res.status(200).send("Command complete");
return true;
}
// no request running
if (currentRequestName == '') {
// set data for next export call
var request = {
project: project,
instance: instanceName,
resource: {
exportContext: {
kind: "sql#exportContext",
fileType: "SQL",
uri: 'gs://my-gsc-bucket/${dbToDump[count]}-${dateFormat.asString('yyyyMMddhhmm', new Date())}.gz',
databases: [dbToDump[count]]
}
},
auth: authClient
};
let r = await sqladmin.instances.export(request); //dump start
currentRequestName = r.data.name;
}
// call to monitor request status
let requestStatus = await sqladmin.operations.get({ project: project, operation: currentRequestName, auth: authClient });
if (requestStatus.data.status == 'DONE') {
// the current request is completed, prepare for next call
count++;
currentRequestName = '';
recursiveCall();
} else {
// wait 10 seconds before check status
setTimeout(recursiveCall, 10000)
}
}
recoursiveCall();
}
dump();
};
This work for me, the only one more setting is to increase the timeout over the 60s.
Thank's Andres S for the support

SyntaxError: Unexpected identifier in json function

I am going to build a weather data pipeline that starts with an Internet of Things (IoT) device, utilizes a message queue to receive and deliver data, leverages a serverless function to move the data to a data warehouse and then create a dashboard that displays the information. I am getting error in function.
/**
* Background Cloud Function to be triggered by PubSub.
*
* #param{
object
}event The Cloud Functions event.
* #param{
function
}callback The callback function.
*/
exports.subscribe = function (event,
callback){
const BigQuery = require('#google-cloud/bigquery');
const projectId = "iot2analytics-ca4"; //Enter your project ID here
const datasetId = "weatherData"; //Enter your BigQuery dataset name here
const tableId = "weatherDatatable"; //Enter your BigQuery table name here -- make sure it is setup correctly
const PubSubMessage = event.data;
// Incoming data is in JSON format
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data,
'base64' ).toString():"{'sensorID':'na','timecollected':'1/1/1970 00:00:00','zipcode':'00000','latitude':'0.0','longitude':'0.0','temperature':'-273','humidity':'-1','dewpoint':'-273','pressure':'0'}" ;
const jsonData = JSON.parse(incomingData);
var rows = [
jsonData
] ;
console.log(`Uploading data:$ {
JSON.stringify(rows)
} ` );
// Instantiates a client
const bigquery = BigQuery( {
projectId:projectId
} );
// Inserts data into a table
bigquery
.dataset(datasetId)
.table(tableId)
.insert(rows)
.then((foundErrors) => {
rows.forEach((row) => console.log('Inserted:', row));
if (foundErrors && foundErrors.insertErrors != undefined) {
foundErrors.forEach((err) => {
console.log(' Error:', err);
})
}
})
.catch((err) => {
console.error(' ERROR:',
err);
} );
// [
END bigquery_insert_stream
] callback();
};
You are using single quotes to wrap the string in the JSON object which is not allowed with standard formats. Please replace the single quotes with double quotes and then stringify your object. Here, use this
let temp = {
"sensorID":"na",
"timecollected":"1/1/1970 00:00:00",
"zipcode":"00000",
"latitude":"0.0",
"longitude":"0.0",
"temperature":"-273",
"humidity":"-1",
"dewpoint":"-273",
"pressure":"0"
}
temp = JSON.stringify(temp)
const incomingData = PubSubMessage.data ? Buffer.from(PubSubMessage.data,
'base64' ).toString(): temp;

Resources