My transform pipeline not wait until message buffer loop end and not receive message from destination.
It can send out only when another message trigger and send as two messages together. The destination can receive two messages at that time.
How will I make it await until message buffer read until the end to able to send out individual messages, please ?
Start.js
import Proxy from "./Proxy.js";
const proxy = Proxy();
proxy.listen('4000');
Proxy.js
import net from "net";
import {filter} from '../utils/transformStream.js';
export default () =>
net.createServer((con1) => {
const con2 = net.connect(
'1234',
'127.0.0.1'
);
pipeline(con1, con2, filter(), con1, (err) => {
console.log("pipeline closed", err);
});
con2.on("data", async (data) => {
try {
console.log("sent from con2:", data);
}
}
}
con1.on("data", async (data) => {
try {
console.log("sent from con1:", data);
}
}
}
transformStream.js
import { Transform } from 'stream';
import { messageBuffer2 } from './msgBuffer2.js';
const filter = () => {
return new Transform({
async transform(chunk, encoding, cb) {
if (chunk.toString("utf-8").includes("Unprocessed")) {
cb(null, '');
} else {
messageBuffer2.push(chunk.toString("utf-8"));
while (!messageBuffer2.isFinished()) {
const message = messageBuffer2.handleData();
cb(null, message);
}
}
}
});
};
export { filter };
Thanks.
Related
what I'm basically trying to achieve is to get all items of a mongodb collection on a nodejs server, stream these items (in json format) over REST to another nodejs server and pipe the incoming readstream to stream-json to persist the parsed objects afterwards in another mongodb.
(I need to use streams because my items can be deeply nested objects which would consume a lot of memory. Additionally I'm unable to access the first mongodb from the second server directly due to a strict network segmentation.)
Well, the code I got so far is actually already working for smaller amounts of data, but one collection has about 1.2 GB. Therefore the processing at the receiving side continues to fail.
Here's the code of the sending server:
export const streamData = async (res: Response) => {
try {
res.type('json');
const amountOfItems = await MyModel.count();
if (JSON.stringify(amountOfItems) !== '0'){
const cursor = MyModel.find().cursor();
let first = true;
cursor.on('error', (err) => {
logger.error(err);
});
cursor.on('data', (doc) => {
if (first) {
// open json array
res.write('[');
first = false;
} else {
// add the delimiter before every object that isn't the first
res.write(',');
}
// add json object
res.write(`${JSON.stringify(doc)}`);
});
cursor.on('end', () => {
// close json array
res.write(']');
res.end();
logger.info('REST-API-Call to fetchAllItems: Streamed all items to the receiver.');
});
} else {
res.write('[]');
res.end();
logger.info('REST-API-Call to fetchAllItems: Streamed an empty response to the receiver.');
}
} catch (err) {
logger.error(err);
return [];
}
};
And that's the receiving side:
import { MyModel } from '../models/my-model';
import axios from 'axios';
import { logger } from '../services/logger';
import StreamArray from 'stream-json';
import { streamArray } from 'stream-json/streamers/StreamArray';
import { pipeline } from 'stream';
const persistItems = async (items:Item[], ip: string) => {
try {
await MyModel.bulkWrite(items.map(item => {
return {
updateOne: {
filter: { 'itemId': item.itemId },
update: item,
upsert: true,
},
};
}));
logger.info(`${ip}: Successfully upserted items to mongoDB`);
} catch (err) {
logger.error(`${ip}: Upserting items to mongoDB failed due to the following error: ${err}`);
}
};
const getAndPersistDataStream = async (ip: string) => {
try {
const axiosStream = await axios(`http://${ip}:${process.env.PORT}/api/items`, { responseType: 'stream' });
const jsonStream = StreamArray.parser({ jsonStreaming : true });
let items : Item[] = [];
const stream = pipeline(axiosStream.data, jsonStream, streamArray(),
(error) => {
if ( error ){
logger.error(`Error: ${error}`);
} else {
logger.info('Pipeline successful');
}
},
);
stream.on('data', (i: any) => {
items.push(<Item> i.value);
// wait until the array contains 500 objects, than bulkWrite them to database
if (items.length === 500) {
persistItems(items, ip);
items = [];
}
});
stream.on('end', () => {
// bulkwrite the last items to the mongodb
persistItems(items, ip);
});
stream.on('error', (err: any) => {
logger.error(err);
});
await new Promise(fulfill => stream.on('finish', fulfill));
} catch (err) {
if (err) {
logger.error(err);
}
}
}
As I said, the problem occurs only on a bigger collection holding about 1.2 Gb of data.
The problem seems to occur a few seconds after the sending side server is closing the stream.
This is the error message I get at the receiving server:
ERROR: Premature close
err: {
"type": "NodeError",
"message": "Premature close",
"stack":
Error [ERR_STREAM_PREMATURE_CLOSE]: Premature close
at IncomingMessage.onclose (internal/streams/end-of-stream.js:75:15)
at IncomingMessage.emit (events.js:314:20)
at Socket.socketCloseListener (_http_client.js:384:11)
at Socket.emit (events.js:326:22)
at TCP.<anonymous> (net.js:676:12)
"code": "ERR_STREAM_PREMATURE_CLOSE"
}
Can I somehow prevent the read stream from closing too early?
The only workaround I can imagine right now is to save the stream locally to a file first, then create a new readstream from that file, process/persist the data and delete the file afterwards, although I would prefer not to do that. Additionally I'm not quite sure if that's going to work out or if the closing read stream issue will remain if I try to save a large dataset to a file.
Edit: Well, as I guessed, this approach results in the same error.
Is there a better approach I'm not aware of?
Thanks in advance!
Found a solution using a combination of:
Websockets with stream api and websocket-express to trigger the streaming over websockets via routes
Backend
app.ts
import router from './router/router';
import WebSocketExpress from 'websocket-express';
const app = new WebSocketExpress();
const port = `${process.env.APPLICATION_PORT}`;
app.use(router);
app.listen(port, () => {
console.log(`App listening on port ${port}!`);
});
router.ts
import { Router } from 'websocket-express';
import streamData from './streamData';
const router = new Router();
router.ws('/my/api/path', streamData);
export default router;
streamData.ts (did some refactoring to the above version)
import { MyModel } from '../models/my-model';
import { createWebSocketStream } from 'ws';
export const streamData = async (res: Response) => {
const ws = await res.accept();
try {
const duplex = createWebSocketStream(ws, { encoding: 'utf8' });
duplex.write('[');
let prevDoc: any = null;
// ignore _id since it's going to be upserted into another database
const cursor = MyModel.find({}, { _id: 0 } ).cursor();
cursor.on('data', (doc) => {
if (prevDoc) {
duplex.write(`${JSON.stringify(prevDoc)},`);
}
prevDoc = doc;
});
cursor.on('end', () => {
if (prevDoc) {
duplex.write(`${JSON.stringify(prevDoc)}`);
}
duplex.end(']');
});
cursor.on('error', (err) => {
ws.close();
});
duplex.on('error', (err) => {
ws.close();
cursor.close();
});
} catch (err) {
ws.close();
}
};
Client (or the receiving server)
import { MyModel } from '../models/my-model';
import StreamArray from 'stream-json';
import { streamArray } from 'stream-json/streamers/StreamArray';
import { pipeline } from 'stream';
import WebSocket, { createWebSocketStream } from 'ws';
export const getAndPersistDataStream = async (ip: string) => {
try {
const ws = new WebSocket(`ws://${ip}:${process.env.PORT}/my/api/path`);
try {
const duplex = createWebSocketStream(ws, { encoding: 'utf8' });
const jsonStream = StreamArray.parser({ jsonStreaming: true });
let items: Items[] = [];
const stream = pipeline(duplex, jsonStream, streamArray(), error => {
if (error) {
ws.close();
}
});
stream.on('data', (i: any) => {
items.push(<Items>i.value);
if (items.length === 500) {
persistItems(items, ip);
items = [];
}
});
stream.on('end', () => {
persistItems(items, ip);
ws.close();
});
stream.on('error', (err: any) => {
ws.close();
});
await new Promise(fulfill => stream.on('finish', fulfill));
} catch (err) {
ws.close();
}
} catch (err) {
}
(I removed a lot of (error)-logging stuff, because of that the catch block is empty...)
I am trying to figure out how to write unit test cases for Websocket server which is using the ws library.
I did go through jest-websocket-mock but I think this is for browser based APIs and I want to test server using JEST.
Basic Code:
Server.js
import { createServer } from 'https';
import { WebSocketServer } from 'ws';
import { readFileSync } from 'fs';
const server = createServer({
cert: readFileSync(config.certs.sslCertPath),
key: readFileSync(config.certs.sslKeyPath),
});
const wss = new WebSocketServer({ noServer: true });
server.on('upgrade', (request, socket, head) => {
const origin = request && request.headers && request.headers.origin;
const corsRegex = <regex>;
if (origin.match(corsRegex) != null) {
wss.handleUpgrade(request, socket, head, (ws) => {
wss.emit('connection', ws, request);
});
} else {
socket.destroy();
}
});
wss.on('connection', (ws, req) => {
ws.on('message', (messageg) => {
try {
console.log(message);
} catch (error) {
console.log(error);
}
});
ws.on('close', () => {
console.log('close');
});
ws.on('error', (error) => {
console.log(error);
});
});
Can someone please help me with how can I test the original server?
you need to create some kind of dependency injection mechanism here
lets for example move all the socket initialization logic into a separate function
function initSocketEvents(wss) {
wss.on('connection', (ws, req) => {
ws.on('message', (messageg) => {
try {
console.log(message);
} catch (error) {
console.log(error);
}
});
ws.on('close', () => {
console.log('close');
});
ws.on('error', (error) => {
console.log(error);
});
});
return wss;
}
now at the server initilization just call the function from a different file
...
const {initSocketEvents} = require("./socket-handler")
const wss = new WebSocketServer({ noServer: true });
initSocketEvents(was);
...
everything stays the same except the fact its way much easier to test it now
at the test file
const {initSocketEvents} = require("./socket-handler")
const { assert } = require('console');
const { EventEmitter } = require('events');
class MyTestWebSocket extends EventEmitter { }
const mockWSS = new MyTestWebSocket()
initSocketEvents(mockWSS)
mockWSS.emit('connection', mockWSS)
assert(mockWSS.listenerCount('connection')===1)
assert(mockWSS.listenerCount('message')===1)
assert(mockWSS.listenerCount('close')===1)
assert(mockWSS.listenerCount('error')===1)
now it should be straightforward to separate each listener's logic and inject it outside the function and then assert the desired logic.
How do can I fetch data from MQTT to Vue app, I've established a properly working connection and I can console log the data but I'm not able to load the data to component's data property.
created() {
client.on("connect", function() {
console.log("MQTT Connected");
client.subscribe("#", function(err) {
console.log(err);
});
});
client.on("message", (topic, message) => {
console.log("topic:", topic);
console.log(message.toString());
this.mqttData = JSON.parse(message.toString());
});
},
data() {
return {
mqttData: {}
}
};
Whenever I try to log the mqttData in console it seems to be a empty object. When I printed this inside of the client.on function I've got the correct Vue instance with all of it's fields and methods. This really bothers me because I can access the Vue object but I cannot modify it's contents.
Maybe try this in the "mounted" lifecycle hook. Here's an example of something I use that's listening to a websocket. It should be similar implementation to your application
mounted() {
let connection = new WebSocket('wss://somesocket.net/ws')
connection.onmessage = (event) => {
console.log("Data received!")
console.log(event.data)
const data = JSON.parse(event.data)
this.ws_data = data
}
connection.onopen = (event) => {
console.log("Connected! Waiting for data...")
}
},
This is how I did it using vue-mqtt package.
export default {
data () {
return {
sensor: ''
}
},
mqtt: {
/** Read incoming messages from topic test*/
'test' (data) {
this.sensor = data
console.log(data)
}
},
created () {
},
async mounted () {
this.$mqtt = await this.$mqtt
this.$mqtt.publish('test', 'hello from vuemqtt yo !!!!')
this.$mqtt.subscribe('test')
}
}
I am learning node.js and database. I am trying to stream heavy data about 7,700,000 rows and 96 columns from oracle to client. Later i use that data for virtual table. But in client it is showing only one row and then in node command error is displaying "Cannot set headers after they are sent to the client". How to stream data in client. Please help
var oracledb = require('oracledb');
const cors = require('cors');
var express = require('express');
var app = express();
app.use(cors());
oracledb.outFormat = oracledb.ARRAY;
oracledb.getConnection({
user: 'user',
password: 'password',
connectString: 'some string'
},
(err, connection) => {
if (err) {
console.error(err.message);
return;
}
var rowsProcessed = 0;
var startTime = Date.now();
var dataSize = 0;
var stream = connection.queryStream(
'SELECT * FROM table',
);
// stream.on('data', function (data) {
// rowsProcessed++;
// // console.log(JSON.stringify(data));
// // console.log(data);
// dataSize = dataSize + data.length;
// // oracleData.push(data);
// // console.log("pushing");
// // console.log(oracleData);
// // app.get('/data', (req, res) => {
// // res.send(data);
// // })
// // console.log(data);
// });
app.get('/data', (req, res) => {
stream.on('data', (data) => {
rowsProcessed++;
dataSize = dataSize + data.length;
res.send(JSON.stringify(data));
})
})
stream.on('end', function () {
var t = ((Date.now() - startTime) / 1000);
console.log('queryStream(): rows: ' + rowsProcessed +
', seconds: ' + t);
// console.log(dataSize + ' bytes');
connection.close(
function (err) {
if (err) {
console.error(err.message);
} else {
console.log("connection closed")
}
}
)
})
}
);
app.listen(5000, () => {
console.log('Listening at 5000')
})
I tried using above approach. But it is failing. How can I achieve the output?
The browser is freezing if I output entire data at single time that's why I am trying to use streaming and in the node command prompt it is displaying out of memory if I load entire data at single time.
Thank you.
The first thing you'll want to do is organize your app a little better. Separation of concerns is important, you should have a connection pool, etc. Have a look at this series for some ideas: https://jsao.io/2018/03/creating-a-rest-api-with-node-js-and-oracle-database/
Once you get the organization figured out, incorporate this example of streaming a large result set out.
const oracledb = require('oracledb');
async function get(req, res, next) {
try {
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
res.write('[');
stream.on('data', (row) => {
res.write(JSON.stringify(row));
res.write(',');
});
stream.on('end', () => {
res.end(']');
});
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;
If you find you're doing this a lot, simplify things by creating a reusable transform stream:
const oracledb = require('oracledb');
const { Transform } = require('stream');
class ToJSONArray extends Transform {
constructor() {
super({objectMode: true});
this.push('[');
}
_transform (row, encoding, callback) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
this.push(',');
}
this._prevRow = row;
callback(null);
}
_flush (done) {
if (this._prevRow) {
this.push(JSON.stringify(this._prevRow));
}
this.push(']');
delete this._prevRow;
done();
}
}
async function get(req, res, next) {
try {
const toJSONArray = new ToJSONArray();
const conn = await oracledb.getConnection();
const stream = await conn.queryStream('select * from employees', [], {outFormat: oracledb.OBJECT});
res.writeHead(200, {'Content-Type': 'application/json'});
stream.pipe(toJSONArray).pipe(res);
stream.on('close', async () => {
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
stream.on('error', async (err) => {
next(err);
try {
await conn.close();
} catch (err) {
console.log(err);
}
});
} catch (err) {
next(err);
}
}
module.exports.get = get;
I wrote this script to pull a few s3 objects and import them into a database, when I run this against a local db it imports all 3265 records that I know should be there. When I run it against my AWS Postgres instance, it hangs a little part way through and exits cleanly and only imports about 50 or so records. I figured it was a timeout of some sort, but haven't been able to find anything to that effect. It also ignores the custom process.exit. I've combed through the Postgres instance logs too and nothing popped out at me there. I'm a little at a loss and I think I may just be missing a subtlety of node and or the way I wrote this code.
const { NODE_ENV } = process.env
import config from '../config'
config()
import AWS from 'aws-sdk';
import db from './db/sequelize/models/db_connection'
process.on('uncaughtException', function (exception, p) {
console.log(p)
console.log(exception);
});
class FailedImport extends Error {
constructor(message) {
Error.captureStackTrace(this, this.constructor);
this.name = this.constructor.name;
this.message = message;
}
}
var s3 = new AWS.S3()
async function listObjects() {
return await s3.listObjects({Bucket: process.env.S3_BUCKET_NAME}).promise()
}
function importData(objectList) {
return objectList.Contents.map( async (obj) => {
try {
let data = await s3.getObject({ Bucket: process.env.S3_BUCKET_NAME, Key: obj.Key}).promise()
let body = data.Body
let dataLines = body.toString().split('\n')
return Promise.all(dataLines.map( async (line) => {
try {
let jsonifiedLine = JSON.parse(line)
return await db.Site.upsert({ url: jsonifiedLine['api_url'], quantcast_rank: 0})
} catch(e) {
console.error(e)
}
}))
}
catch(err) {
console.log(err)
}
})
}
export function runImport() {
listObjects().then((objects) => {
return Promise.all(importData(objects))
.then(() => console.log('Finished import.'))
.catch((err) => console.log(err))
}).catch((err) => {
console.log(err)
throw new FailedImport(err)
})
}
runImport()
It turns out that I was mixing two very different styles of promise architecture and to top it off the way I was using map was not asynchronous safe. I wound up switching to for of for my loops and try catch async awaits and it fixed the code right up. New code below to compare:
class FailedImport extends Error {
constructor(message) {
Error.captureStackTrace(this, this.constructor);
this.name = this.constructor.name;
this.message = message;
}
}
var s3 = new AWS.S3()
async function listObjects() {
return await s3.listObjects({Bucket: process.env.S3_BUCKET_NAME, Prefix: "datasets/cleaned.data/"}).promise()
}
async function importData(objectList) {
try {
for (let obj of objectList.Contents) {
let data = await s3.getObject({ Bucket: process.env.S3_BUCKET_NAME, Key: obj.Key}).promise()
console.log(obj.Key)
let body = data.Body
let dataLines = body.toString().split('\n')
dataLines.pop()
let jsonLines = dataLines.map((row) => JSON.parse(row.trim()))
for (let line of jsonLines) {
try {
await db.Site.upsert({ url: line['api_url'], quantcast_rank: 0})
}
catch(err) {
console.log(err)
}
}
}
}
catch(err) {
console.log(err)
}
}
export function runImport() {
listObjects().then((objects) => {
importData(objects)
.then(() => console.log('Finished.'))
.catch((err) => console.error(err))
}).catch((err) => {
console.error(err)
})
}