Google Document AI - Inconsistent Long Running Operation's metadata JSON representation - node.js

While checking the status of Document AI - Long Running Operation (Form processor), the JSON representation of decodedOperation.metadata seems to vary during the execution.
I suspect that operation response does not resolve straight away despite using then() on checkBatchProcessDocumentsProgress(operation.name).
This behaviour does not happen using similar code for Google Speech's LROs.
Context:
At console.log line #24 of implemented code (below), as expected, decodedOperation.metadata resolves to
{
"state":"RUNNING",
"createTime":{
"seconds":"1669278029",
"nanos":500249000
},
"updateTime":{
"seconds":"1669278029",
"nanos":500249000
}
}
Current behaviour:
At console.log line #27, decodedOperation.metadata.state returns 2 (!?)
Expected behaviour:
decodedOperation.metadata.state should return RUNNING.
More details of output in the code below.
Reproduction details:
Environment:
node.js 12.02
Package.json:
{
"dependencies": {
"#google-cloud/documentai": "latest",
}
}
Code:
function run() {
const documentai = require('#google-cloud/documentai').v1;
// Create a promise on object
let options = {
credentials: {
client_email: ** ** ** ,
private_key: ** ** * ,
},
projectId: ** ** *
};
return async (async callback => {
const client = new documentai.DocumentProcessorServiceClient(options);
client.checkBatchProcessDocumentsProgress(properties.operation)
.then(
(decodedOperation) => {
console.log("METADATA " + JSON.stringify(decodedOperation.metadata));
/* logs to console:
{
"state":"RUNNING",
"createTime":{
"seconds":"1669278029",
"nanos":500249000
},
"updateTime":{
"seconds":"1669278029",
"nanos":500249000
}
}
/// then
{
"state":"SUCCEEDED",
"createTime":{
"seconds":"1669278029",
"nanos":500249000
},
"updateTime":{
"seconds":"1669278048",
"nanos":758825000
},
"individualProcessStatuses":[
{
"inputGcsSource":"gs://bucket/intake-form.pdf",
"status":{
},
"outputGcsDestination":"gs://bucket/ocr/7371120297544371692/0",
"humanReviewStatus":{
"state":"SKIPPED",
"stateMessage":"HumanReviewConfig is DISABLED, skipping human review."
}
}
]
}
*/
console.log("STATE " + JSON.stringify(decodedOperation.metadata.state));
/* log to console: 2
when above is "RUNNING" */
/* log to console: 3
when above is "SUCCEEDED" */
if (decodedOperation.metadata.state == "SUCCEEDED") { // Never triggers as decodedOperation.metadata.state evaluates to an integer at this line
};
let response = {
"operationStatus": decodedOperation.metadata.state
};
callback(undefined, response);
})
.catch(
(err) => {
callback(err);
});
})
}
Update on investigation
util.inspect(decodedOperation.metadata, { showHidden: false }) returns:
BatchProcessMetadata {
{
"individualProcessStatuses":[
"IndividualProcessStatus"{
"inputGcsSource":"gs://bucketxxx/intake-form.pdf",
"status":[
"Status"
],
"outputGcsDestination":"gs://bucketxxx/ocr/7999521463088838887/0",
"humanReviewStatus":[
"HumanReviewStatus"
]
}
],
"state":3,
"createTime":"Timestamp"{
"seconds":"Long"{
"low":1670011754,
"high":0,
"unsigned":false
},
"nanos":105214000
},
"updateTime":"Timestamp"{
"seconds":"Long"{
"low":1670011773,
"high":0,
"unsigned":false
},
"nanos":489028000
}
}
util.inspect(decodedOperation.metadata, { showHidden: true }) returns (section of interest only):
[...] [root]: [Getter], [fullName]: [Getter] }, State: { STATE_UNSPECIFIED: 0, WAITING: 1, RUNNING: 2, SUCCEEDED: 3, CANCELLING: 4, CANCELLED: 5, FAILED: 6, '0': 'STATE_UNSPECIFIED', '1': 'WAITING', '2': 'RUNNING', '3': 'SUCCEEDED', '4': 'CANCELLING', '5': 'CANCELLED', '6': 'FAILED' }, encode: <ref *5> [Function: BatchProcessMetadata$encode] [...]

To fix this issue, you can access the string representation of the state enum value by using the documentai.v1.BatchProcessMetadata.State object. For example:
console.log("STATE " + documentai.v1.BatchProcessMetadata.State[decodedOperation.metadata.state]);
instead of
console.log("STATE " + JSON.stringify(decodedOperation.metadata.state));
Read more about it.
https://cloud.google.com/php/docs/reference/cloud-document-ai/latest/V1.BatchProcessMetadata.State

Related

Adding additional spec files to an angular project, not loading/defining correctly?

Caveat: I am not the author of this project. Whoever originally wrote this is no longer with the organization and I am seemingly the most knowledgeable on this topic at this point.
I know a little about javascript and unit tests, so I successfully added one .spec.js file. I tried adding a second one for another module, reusing a lot of the spec setup, and it immediately broke.
Project resources:
Nodejs 12.16.1
jasmine-node-karma: "^1.6.1"
karma: "^6.3.12"
Contents of ./karma.conf.js:
module.exports = function(config) {
config.set({
basePath: './public',
frameworks: ['jasmine', 'jquery-3.2.1'],
files: [
"../node_modules/angular/angular.js",
"../node_modules/angular-mocks/angular-mocks.js",
"../node_modules/bootstrap/dist/js/bootstrap.js",
"../public/**/*.js",
],
exclude: [
],
preprocessors: {
},
client: {
captureConsole: true
},
browserConsoleLogOptions: {
terminal: true,
level: ""
},
reporters: ['progress'],
port: 9876,
colors: true,
logLevel: config.LOG_INFO,
autoWatch: true,
browsers: ['FirefoxHeadless', 'ChromeHeadlessNoSandbox', 'PhantomJS'],
customLaunchers: {
ChromeHeadlessNoSandbox: {
base: 'ChromeHeadless',
flags: ['--no-sandbox']
},
FirefoxHeadless: {
base: 'Firefox',
flags: ['-headless'],
}
},
singleRun: false,
concurrency: Infinity
})
}
Originally I added ./public/controllers.spec.js to match the existing ./public/controllers.js. These unit tests pass and continue to do so.
Yesterday I added ./public/backups/backupcontrollers.spec.js to match ./public/backups/backupcontrollers.js.
Contents of ./public/backups/backupcontrollers.js:
/**
* Angular controller.
*/
'use strict'
const backupApp = angular.module('backup', [])
const backupTypePath = 'elasticsearch'
backupApp.controller('BackupFormController', ['$scope', '$filter', '$http', function ($scope, $filter, $http) {
console.log('Started BackupFormController')
$scope.itemInstances = []
$scope.fetchStatus = 'Ready!'
$scope.processSelection = function (item, backupType = backupTypePath) {
$scope.currentItem = item.metadata.name
$scope.getBackup(backupType)
console.log('currentItem after selecting from dropdown: ' + $scope.currentItem)
}
$scope.init = function (backupType = backupTypePath) {
$scope.refreshItemInstances(backupType)
console.log('currentItem after loading page for first time: ' + $scope.currentItem)
}
$scope.getBackup = function (backupType = backupTypePath) {
const path = `/v1/backup/${backupType}`
$scope.fetchStatus = `Fetching Backups for Item ${$scope.currentItem}...`
console.log(`Fetching backups for item from ${path}`)
$http.get('/api', { headers: { path: path, item: $scope.currentItem } })
.success(function (data, status, headers, config) {
console.log(`Got data from GET on path ${path}, HTTP status ${status}: ${JSON.stringify(data)}`)
if (typeof data === 'string' || data instanceof String) {
$scope.backups = data.split(/\r?\n/)
} else {
$scope.backups = data
}
$scope.fetchStatus = 'Ready!'
console.log('Done fetching backup list for item:' + $scope.currentItem + '!')
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
// Refresh the list of displayed Item instances
$scope.refreshItemInstances = function (backupType = backupTypePath) {
console.log('Fetching list of all items in the system ...')
$scope.fetchStatus = 'Fetching Items ... '
$http.get('/env')
.success(function (data, status, headers, config) {
console.log(data)
for (let i = 0; i < data.length; i++) {
$scope.itemInstances.push(data[i])
}
$scope.currentItem = $scope.itemInstances[0].metadata.name
console.log('Done fetching list of all items!')
console.log('currentItem after fetching list of all items: ' + $scope.currentItem)
$scope.fetchStatus = 'Ready!'
$scope.getBackup(backupType)
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
}])
Contents of ./public/backups/backupcontrollers.spec.js:
describe('BackupFormController', function () {
let $controller, $rootScope, $httpBackend
beforeEach(module('backup'))
const mockBackupString = 'string of backup data'
const mockBackupData = {
body: mockBackupString
}
const mockItemsUnsorted = [
{
metadata: {
name: 'prod-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'dev-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'integ-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
}
]
beforeEach(inject(function ($injector) {
$rootScope = $injector.get('$rootScope')
const $controller = $injector.get('$controller')
$httpBackend = $injector.get('$httpBackend')
const mockEnv = $httpBackend.when('GET', '/env')
.respond(mockItemsUnsorted)
const mockAPI = $httpBackend.when('GET', '/api')
.respond(mockBackupString)
const createController = function () {
return $controller('BackupFormController', { '$scope': $rootScope })
}
}))
describe('$scope.getBackup', function () {
beforeEach(function () {
spyOn(console, 'log')
})
it('should GET /api and set $scope.backups', function () {
controller = createController()
console.log('Dumping fetchStatus: ', $rootScope.fetchStatus)
$rootScope.init()
$httpBackend.flush()
expect($rootScope.backups).toEqual(mockBackupString)
expect(console.log).toHaveBeenCalled()
})
})
})
It seems like this new spec isn't working correctly at all; when I run npm test I see the normal successful tests from ./public/controllers.spec.js but also:
Chrome Headless 105.0.5195.125 (Mac OS 10.15.7) BackupFormController $scope.getBackup should GET /api and set $scope.backups FAILED
ReferenceError: createController is not defined
at UserContext.<anonymous> (backup/backupcontrollers.spec.js:51:7)
at <Jasmine>
This is the only output concerning ./public/backups/backupcontrollers.spec.js.
Has anybody run into this before? I found some posts regarding including angular-mocks, but as you can see in karma.conf.js, it's being included.

Why is displayStart (Datatable 1.10) not working for me?

I am using Datable (1.10.3) and whatever value I set in the diplayStart field, the start parameter of the server request always goes as 0.
Here is my code:
this.table = $('#table').DataTable({
displayStart: 100,
order: [[0, 'desc']],
processing: true,
serverSide: true,
searching: true,
pageLength: 50,
searchDelay: 1000,
language: {
lengthMenu: 'Show _MENU_ records per page'
},
dom: '<"top"il>rt<"bottom"p><"clear">',
ajax: {
url: <url>,
type: 'POST',
headers: {
authorization: <token>
},
data: function (d) {
//setting request data
},
dataSrc: (json) =>{
return json.data;
},
error: function (xhr, error, thrown) {
if (xhr.status + '' === '401') {
location.href = '/';
}
}
},
columns: this.getColumns(),
drawCallback: function () {
//some operations
}
});
It seems to work fine if I initialise the table like the older version, like this:
this.table = $('#table').dataTable({...
But this initialisation breaks other preexisting function calls (like search and row) in the code.
Can anyone suggest where I am going wrong and how can I fix this?
I am not sure if displayStart works with server side.
I realize this is not an ideal solution if you dont find any other you can override the pipeline method forcing it to use whatever you want:
$.fn.dataTable.pipeline = function ( opts ) {
return function ( request, drawCallback, settings ) {
request.start = 20;
return $.ajax( {
"type": opts.method,
"url": opts.url,
"data": request,
"dataType": "json",
"success": drawCallback
} );
}
};
Taken the example from: https://datatables.net/examples/server_side/pipeline.html

Return a node js response inside session.withTransaction

I am using session.withTransaction() to execute multiple updates in the mongo db. Please note that promiseArray has multiple Stock.update statements to update stock quantities.
await session.withTransaction(
async () => {
promiseResults = await Promise.all(promiseArray);
for (const result of promiseResults) {
recordCounter++;
if (result.nModified === 1) {
stockItemsNoUpdate.push(goodReturnSummary[recordCounter]);
}
}
if (stockItemsNoUpdate.length > 0) {
return res.status(200).send(response);
}
existingGoodReturnSummary = GoodReturn.build({
_id: sheetId,
goodReturnSummary,
agency,
createdBy,
});
await existingGoodReturnSummary.save({ session: session });
existingGoodReturnSummary = await GoodReturn.calculateTotalGoodReturnAmount(
existingGoodReturnSummary,
session
);
},
{
readPreference: 'primary',
readConcern: { level: 'local' },
writeConcern: { w: 'majority' },
}
);
If stockItemsNoUpdate.length > 0 I need to abort this transaction and send the response. done by below code segment.
if (stockItemsNoUpdate.length > 0) {
return res.status(200).send(response);
}
But I cannot do this because of the below error
Any idea on how to resolve this ??
Cheers
See Nodejs mongodb's Transaction API `withTransaction` always return null and https://jira.mongodb.org/browse/NODE-2014.
https://jira.mongodb.org/browse/NODE-2014?focusedCommentId=2420255&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-2420255 provides a workaround.

Elasticsearch node js point in time search_phase_execution_exception

const body = {
query: {
geo_shape: {
geometry: {
relation: 'within',
shape: {
type: 'polygon',
coordinates: [$polygon],
},
},
},
},
pit: {
id: "t_yxAwEPZXNyaS1wYzYtMjAxN3IxFjZxU2RBTzNyUXhTUV9XbzhHSk9IZ3cAFjhlclRmRGFLUU5TVHZKNXZReUc3SWcAAAAAAAALmpMWQkNwYmVSeGVRaHU2aDFZZExFRjZXZwEWNnFTZEFPM3JReFNRX1dvOEdKT0hndwAA",
keep_alive: "1m",
},
};
Query fails with search_phase_execution_exception at onBody
Without pit query works fine but it's needed to retrieve more than 10000 hits
Well, using PIT in NodeJS ElasticSearch's client is not clear, or at least is not well documented. You can create a PIT using the client like:
const pitRes = await elastic.openPointInTime({
index: index,
keep_alive: "1m"
});
pit_id = pitRes.body.id;
But there is no way to use that pit_id in the search method, and it's not documented properly :S
BUT, you can use the scroll API as follows:
const scrollSearch = await elastic.helpers.scrollSearch({
index: index,
body: {
"size": 10000,
"query": {
"query_string": {
"fields": [ "vm_ref", "org", "vm" ],
"query": organization + moreQuery
},
"sort": [
{ "utc_date": "desc" }
]
}
}});
And then read the results as follows:
let res = [];
try {
for await (const result of scrollSearch) {
res.push(...result.body.hits.hits);
}
} catch (e) {
console.log(e);
}
I know that's not the exact answer to your question, but I hope it helps ;)
The usage of point-in-time for pagination of search results is now documented in ElasticSearch. You can find more or less detailed explanations here: Paginate search results
I prepared an example that may give an idea about how to implement the workflow, described in the documentation:
async function searchWithPointInTime(cluster, index, chunkSize, keepAlive) {
if (!chunkSize) {
chunkSize = 5000;
}
if (!keepAlive) {
keepAlive = "1m";
}
const client = new Client({ node: cluster });
let pointInTimeId = null;
let searchAfter = null;
try {
// Open point in time
pointInTimeId = (await client.openPointInTime({ index, keep_alive: keepAlive })).body.id;
// Query next chunk of data
while (true) {
const size = remained === null ? chunkSize : Math.min(remained, chunkSize);
const response = await client.search({
// Pay attention: no index here (because it will come from the point-in-time)
body: {
size: chunkSize,
track_total_hits: false, // This will make query faster
query: {
// (1) TODO: put any filter you need here (instead of match_all)
match_all: {},
},
pit: {
id: pointInTimeId,
keep_alive: keepAlive,
},
// Sorting should be by _shard_doc or at least include _shard_doc
sort: [{ _shard_doc: "desc" }],
// The next parameter is very important - it tells Elastic to bring us next portion
...(searchAfter !== null && { search_after: [searchAfter] }),
},
});
const { hits } = response.body.hits;
if (!hits || !hits.length) {
break; // No more data
}
for (hit of hits) {
// (2) TODO: Do whatever you need with results
}
// Check if we done reading the data
if (hits.length < size) {
break; // We finished reading all data
}
// Get next value for the 'search after' position
// by extracting the _shard_doc from the sort key of the last hit
searchAfter = hits[hits.length - 1].sort[0];
}
} catch (ex) {
console.error(ex);
} finally {
// Close point in time
if (pointInTime) {
await client.closePointInTime({ body: { id: pointInTime } });
}
}
}

How to return unauthorized response from before hook in feathersJS

I have parts of app (modules) that gonna be forbidden for certain people, so I wanna check that in before hook and send unauthorized response if its needed.
I'm successfully throwing error on backend, but on my frontend I still get successful response as if there was no error.
Here is how my code looks like:
1.Function that checks if app is forbidden for user that sent request:
function isAppForbidden(hook) {
let forbiddenApps = [];
hook.app.services.settings.find({
query: {
$limit: 1,
$sort: {
createdAt: -1
}
}
}).then(res => {
let array = hook.params.user.hiddenApps;
if(array.indexOf('qualitydocs') >= 0 || res.data[0].forbiddenApps.indexOf('qualitydocs') >= 0) {
hook.response = Promise.reject({error: '401 Unauthorized'});
//this part is important, the rest not so much
//what im expecting to do here is just to return unauthorized response
}
});
return hook;
}
But this for now just throws error on backend like:
"error: Unhandled Rejection at: Promise Promise {
{ error: { code: '401', message: 'Unauthorized' } } } code=401, message=Unauthorized"
And frontend still gets successful response (200 with requested data)
And I just call this function in before hooks:
before: {
all: [
authenticate('jwt'),
hook => includeBefore(hook),
hook => isAppForbidden(hook) //here, rest is not important
],
find: [],
get: [],
create: [(hook) => {
hook.data.authorId = hook.params.user.id;
}],
update: [],
patch: [],
remove: []
},
the response im expecting to get, looks something like this:
Found the solution... the key was to wrap the content of function in promise... so it now looks like this:
function isAppForbidden(hook) {
return new Promise((resolve, reject) => {
hook.app.services.settings.find({
query: {
$limit: 1,
$sort: {
createdAt: -1
}
}
}).then(res => {
if (hook.params.user.hiddenApps.indexOf('qualitydocs') >= 0 || res.data[0].forbiddenApps.indexOf('qualitydocs') >= 0) {
reject(new errors.NotAuthenticated());
} else {
resolve();
}
})
})
}
and it works as a charm

Resources