How to fix encoding in x-ray (NodeJS scraping library) response? - node.js

The following script is working perfectly in my NodeJS server, but rarely it returns response like this, when I'm trying to scrape some Cyrillic websites.
Script
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]#content',
metaDescription: 'meta[name="description"]#content',
ogImage: 'meta[property="og:image"]#content',
twitterImage: 'meta[name="name="twitter:image:src""]#content',
metaImage: 'meta[name="image"]#content',
headImage: 'head img#src',
contentImage_1: '.content img#src',
contentImage_2: '.image img#src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
Example of response with incorrect encoding
firstData { name: [ '(Rock, Pop) [15LP] [24/96] Queen - Studio Collection - 2015,
FLAC (tracks) :: RuTracker.org' ],
description:
[ 'RuTracker.org » ���������� ��� (����������� ���������) »
������� ������� (Rock, Pop) [15LP] [24/96] Queen -
Studio Collection - 2015, FLAC (tracks)',
undefined ],
image: [ undefined, undefined, undefined, undefined, undefined, undefined ] }
How do I fix this?

you can use request as x-ray's driver and iconv the body in it like this:
var options = {};
var conv = null;
options.encoding = 'binary';
iconv = new require('iconv').Iconv('Windows-1251', 'utf8');
conv = function(body) {
if (!body) return body;
body = new Buffer.from(body, 'binary');
return iconv.convert(body).toString();
}
var request = require('request').defaults(options);
var driver = function driver(context, callback) {
var url = context.url;
request(url, function(err, response, body) {
if (!err && conv) body = conv(body);
return callback(err, body);
})
};
x.driver(driver);
x(url, {
name: 'title',
ogDescription: 'meta[property="og:description"]#content',
metaDescription: 'meta[name="description"]#content',
ogImage: 'meta[property="og:image"]#content',
twitterImage: 'meta[name="name="twitter:image:src""]#content',
metaImage: 'meta[name="image"]#content',
headImage: 'head img#src',
contentImage_1: '.content img#src',
contentImage_2: '.image img#src'
})
(function (err, obj) {
var firstData = {
name: [
obj.name
],
description: [
obj.metaDescription,
obj.ogDescription,
],
image: [
obj.ogImage,
obj.twitterImage,
obj.metaImage,
obj.headImage,
obj.contentImage_1,
obj.contentImage_2
]
}
console.log(firstData);
});

Related

Adding additional spec files to an angular project, not loading/defining correctly?

Caveat: I am not the author of this project. Whoever originally wrote this is no longer with the organization and I am seemingly the most knowledgeable on this topic at this point.
I know a little about javascript and unit tests, so I successfully added one .spec.js file. I tried adding a second one for another module, reusing a lot of the spec setup, and it immediately broke.
Project resources:
Nodejs 12.16.1
jasmine-node-karma: "^1.6.1"
karma: "^6.3.12"
Contents of ./karma.conf.js:
module.exports = function(config) {
config.set({
basePath: './public',
frameworks: ['jasmine', 'jquery-3.2.1'],
files: [
"../node_modules/angular/angular.js",
"../node_modules/angular-mocks/angular-mocks.js",
"../node_modules/bootstrap/dist/js/bootstrap.js",
"../public/**/*.js",
],
exclude: [
],
preprocessors: {
},
client: {
captureConsole: true
},
browserConsoleLogOptions: {
terminal: true,
level: ""
},
reporters: ['progress'],
port: 9876,
colors: true,
logLevel: config.LOG_INFO,
autoWatch: true,
browsers: ['FirefoxHeadless', 'ChromeHeadlessNoSandbox', 'PhantomJS'],
customLaunchers: {
ChromeHeadlessNoSandbox: {
base: 'ChromeHeadless',
flags: ['--no-sandbox']
},
FirefoxHeadless: {
base: 'Firefox',
flags: ['-headless'],
}
},
singleRun: false,
concurrency: Infinity
})
}
Originally I added ./public/controllers.spec.js to match the existing ./public/controllers.js. These unit tests pass and continue to do so.
Yesterday I added ./public/backups/backupcontrollers.spec.js to match ./public/backups/backupcontrollers.js.
Contents of ./public/backups/backupcontrollers.js:
/**
* Angular controller.
*/
'use strict'
const backupApp = angular.module('backup', [])
const backupTypePath = 'elasticsearch'
backupApp.controller('BackupFormController', ['$scope', '$filter', '$http', function ($scope, $filter, $http) {
console.log('Started BackupFormController')
$scope.itemInstances = []
$scope.fetchStatus = 'Ready!'
$scope.processSelection = function (item, backupType = backupTypePath) {
$scope.currentItem = item.metadata.name
$scope.getBackup(backupType)
console.log('currentItem after selecting from dropdown: ' + $scope.currentItem)
}
$scope.init = function (backupType = backupTypePath) {
$scope.refreshItemInstances(backupType)
console.log('currentItem after loading page for first time: ' + $scope.currentItem)
}
$scope.getBackup = function (backupType = backupTypePath) {
const path = `/v1/backup/${backupType}`
$scope.fetchStatus = `Fetching Backups for Item ${$scope.currentItem}...`
console.log(`Fetching backups for item from ${path}`)
$http.get('/api', { headers: { path: path, item: $scope.currentItem } })
.success(function (data, status, headers, config) {
console.log(`Got data from GET on path ${path}, HTTP status ${status}: ${JSON.stringify(data)}`)
if (typeof data === 'string' || data instanceof String) {
$scope.backups = data.split(/\r?\n/)
} else {
$scope.backups = data
}
$scope.fetchStatus = 'Ready!'
console.log('Done fetching backup list for item:' + $scope.currentItem + '!')
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
// Refresh the list of displayed Item instances
$scope.refreshItemInstances = function (backupType = backupTypePath) {
console.log('Fetching list of all items in the system ...')
$scope.fetchStatus = 'Fetching Items ... '
$http.get('/env')
.success(function (data, status, headers, config) {
console.log(data)
for (let i = 0; i < data.length; i++) {
$scope.itemInstances.push(data[i])
}
$scope.currentItem = $scope.itemInstances[0].metadata.name
console.log('Done fetching list of all items!')
console.log('currentItem after fetching list of all items: ' + $scope.currentItem)
$scope.fetchStatus = 'Ready!'
$scope.getBackup(backupType)
})
.error(function (data, status, header, config) {
console.log(data)
$scope.fetchStatus = 'Ready!'
})
}
}])
Contents of ./public/backups/backupcontrollers.spec.js:
describe('BackupFormController', function () {
let $controller, $rootScope, $httpBackend
beforeEach(module('backup'))
const mockBackupString = 'string of backup data'
const mockBackupData = {
body: mockBackupString
}
const mockItemsUnsorted = [
{
metadata: {
name: 'prod-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'dev-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
},
{
metadata: {
name: 'integ-mock-1',
spec: 'asdf',
status: 'ok'
},
notes: []
}
]
beforeEach(inject(function ($injector) {
$rootScope = $injector.get('$rootScope')
const $controller = $injector.get('$controller')
$httpBackend = $injector.get('$httpBackend')
const mockEnv = $httpBackend.when('GET', '/env')
.respond(mockItemsUnsorted)
const mockAPI = $httpBackend.when('GET', '/api')
.respond(mockBackupString)
const createController = function () {
return $controller('BackupFormController', { '$scope': $rootScope })
}
}))
describe('$scope.getBackup', function () {
beforeEach(function () {
spyOn(console, 'log')
})
it('should GET /api and set $scope.backups', function () {
controller = createController()
console.log('Dumping fetchStatus: ', $rootScope.fetchStatus)
$rootScope.init()
$httpBackend.flush()
expect($rootScope.backups).toEqual(mockBackupString)
expect(console.log).toHaveBeenCalled()
})
})
})
It seems like this new spec isn't working correctly at all; when I run npm test I see the normal successful tests from ./public/controllers.spec.js but also:
Chrome Headless 105.0.5195.125 (Mac OS 10.15.7) BackupFormController $scope.getBackup should GET /api and set $scope.backups FAILED
ReferenceError: createController is not defined
at UserContext.<anonymous> (backup/backupcontrollers.spec.js:51:7)
at <Jasmine>
This is the only output concerning ./public/backups/backupcontrollers.spec.js.
Has anybody run into this before? I found some posts regarding including angular-mocks, but as you can see in karma.conf.js, it's being included.

CLIENT_ERROR when uploading Video To LinkedIn API

I followed the LinkedIn docs Here to upload a Video to their Vector Assets API
The video uploaded, but When I checked the status of the Video I keep getting CLIENT_ERROR under recipes.
This is the code I used:
let fileName = 'redacted.mp4'; //This sample file is about 24MB
let fileStream = fs.createReadStream(fileName);
let organisationId = 'redacted';
let isVideo = module.exports.isVideo(fileName);
let fileStats = fs.statSync(fileName);
let fileSizeBytes = fileStats.size;
let mediaUploadInitAction = `https://api.linkedin.com/v2/assets?action=registerUpload`;
let registerUploadRequest = {
"registerUploadRequest": {
"owner": `urn:li:organization:${organisationId}`,
"recipes": [
`urn:li:digitalmediaRecipe:feedshare-${isVideo ? 'video' : 'image'}`
],
"serviceRelationships": [
{
"identifier": "urn:li:userGeneratedContent",
"relationshipType": "OWNER"
}
]
}
};
if (!isVideo) {
registerUploadRequest['registerUploadRequest']["supportedUploadMechanism"] = [
"SYNCHRONOUS_UPLOAD"
]
} else {
if (fileSizeBytes > 52428800) { //I was told if the file length is more than 52428800, I should use multipart upload mechanism. My current file size is less than that though.
registerUploadRequest['registerUploadRequest']["supportedUploadMechanism"] = [
"MULTIPART_UPLOAD"
];
registerUploadRequest['registerUploadRequest']['fileSize'] = fileSizeBytes;
}
}
let { data: mediaUploadRegisterResponse } = await axios.post(mediaUploadInitAction, registerUploadRequest, { headers: headers });
let uploadRegisterData = mediaUploadRegisterResponse.value;
let uploadMechanism = uploadRegisterData['uploadMechanism'];
let singleUploadRequest = uploadMechanism['com.linkedin.digitalmedia.uploading.MediaUploadHttpRequest'];
let uploadUrl = singleUploadRequest ? singleUploadRequest['uploadUrl'] : '';
let uploadHeaders = singleUploadRequest ? singleUploadRequest['headers'] : '';
let multipartUpload = uploadMechanism['com.linkedin.digitalmedia.uploading.MultipartUpload'];
if (singleUploadRequest) { //This always work flawlessly for image uploads but not for video uploads
await axios.put(uploadUrl, fileStream, {
headers: {
'Accept': '*/*',
...uploadHeaders,
...headers,
'Content-Type': 'application/octet-stream'
},
maxContentLength: Infinity,
maxBodyLength: Infinity,
});
fs.unlink(fileName, () => { });
}
The above code works flawlessly for Image Uploads. But for Video Uploads, I keep getting back CLIENT_ERROR.
This is the full status message I keep getting back:
{
"recipes": [
{
"recipe": "urn:li:digitalmediaRecipe:feedshare-video",
"status": "CLIENT_ERROR"
}
],
"serviceRelationships": [
{
"relationshipType": "OWNER",
"identifier": "urn:li:userGeneratedContent"
}
],
"mediaTypeFamily": "VIDEO",
"created": 1641646213127,
"id": "C4adhdhahahhdKJZw",
"lastModified": 1641646215307,
"status": "ALLOWED"
}
Please, what can I do to resolve this?
Thank you
For anyone who might experience similar issue in the future. After brute-forcing all possible options, here is what worked for me.
If the video file is less than 52MB don't use fs.createReadStream. Use the code below instead:
fs.readFile(fileName, async (err, data) => {
if (err) {
console.log(`Error Reading LinkedIn Video File ${fileName}`, err);
return;
}
await axios.put(uploadUrl, data, {
headers: singleFileUploadHeaders,
maxContentLength: Infinity,
maxBodyLength: Infinity,
});
});
And the video finally uploaded without any CLIENT_ERROR again!!!

aws Nodejs sdk:: autoscaling.describeAutoScalingGroups

I need to get the status of the autoscaling group processes (whether they're suspended or resumed). I've written the below script which returns the properties for the given ASG but the SuspendedProcesses: value is "[Object]". How do I expand the object. The ASG I'm querying has the Terminate process suspended, so I'd expect to see this in the output of the script:
var AWS = require('aws-sdk');
var uuid = require('uuid');
AWS.config.update({ region: 'eu-west-1' });
AWS.config.apiVersions = {
autoscaling: '2011-01-01',
};
var autoscaling = new AWS.AutoScaling();
var params = {
AutoScalingGroupNames: ["myAutoScalingGroup"]
};
function status() {
autoscaling.describeAutoScalingGroups(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log(data); // successful response
});
}
status();
This is the response from the above script:
C:\Node.js\NodeJsSamples\package01>node SuspendProcess.js
{ ResponseMetadata: { RequestId: 'myRequestId' },
AutoScalingGroups:
[ { AutoScalingGroupName: 'myAutoScalingGroupName',
AutoScalingGroupARN: 'arn:aws:autoscaling:eu-west-
1:myAccNumber:autoScalingGroup:myAutoScalingGroupName',
LaunchConfigurationName: 'myLaunchConfigurationName',
MinSize: 1,
MaxSize: 1,
DesiredCapacity: 1,
DefaultCooldown: 300,
AvailabilityZones: [Object],
LoadBalancerNames: [Object],
TargetGroupARNs: [],
HealthCheckType: 'ELB',
HealthCheckGracePeriod: 300,
Instances: [Object],
CreatedTime: 2017-11-02T08:08:31.364Z,
SuspendedProcesses: [Object],
VPCZoneIdentifier: 'subnet-########,subnet-########,subnet-########',
EnabledMetrics: [],
Tags: [Object],
TerminationPolicies: [Object],
NewInstancesProtectedFromScaleIn: false } ] }
How can I expand the [Object] values?
thanks.
Use this snipped. Is native.
console.log('string', require('util').inspect(<variable>, 1, 10, 1));
in your code:
function status() {
autoscaling.describeAutoScalingGroups(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else console.log('output', require('util').inspect(data, 1, 10, 1)); // successful response
});
}
You need to JSON.stringify(data) instead of printing only data in else clause.
If you print console.log(JSON.stringify(data)) in else condition, you will get proper response.
Use JSON.stringify -
var obj = { "name":"John", "age":function () {return 30;}, "city":"New York"};
console.log(JSON.stringify(obj));

Node xmlbuilder mod TypeError: Converting circular structure to JSON

I'm trying to use the node xmlbuilder module, and copied / pasted their code from here but I get a
Converting circular structure to JSON error.
I have no clue why this is happening, here is the code:
Route:
app.get('/api/qb', function(req, res) {
qbwc.test(req, function(result){
res.send(result);
});
});
Module:
exports.test = function(data, next) {
var obj = {
person: {
name: "John",
'#age': 35,
address: {
city: "Istanbul"
},
phone: [
{
'#text': "555-1234",
'#type': 'home'
}, {
'#text': "555-1235",
'#type': 'mobile'
}
],
id: function() {
return 42;
}
}
};
var root = builder.create(obj);
return next(root);
}
EDIT:
I also tried it with something very simple to test, same issue:
var obj = { name: 'smith'};
var root = builder.create(obj);
return next(root);
Ok so after a lot of hair pulling, it seems you need to call .end() on the process, I have no idea why they don't have this in the example.
Here is what you need to do:
...
var root = builder.create(obj);
root = root.end({pretty: false});
return next(root);

Write a file keeping tabs and EOL

I am trying to write the content of a string in a file in node.js
I have some raml files and I am able to join them. If I print the variable in console, I see it well parsed but as soon as I save in on a file, the file just contains one single line:
var raml = require('raml-parser');
var fs = require('fs');
var path = require('path');
var os = require('os')
path.join(__dirname, './')
raml.loadFile('schema.raml').then( function(data) {
console.log(data);
var filePath = "schema.raml";
fs.unlinkSync(filePath);
fs.writeFile("./new.raml", JSON.stringify(data).replace('/\n', os.EOL), function(err) {
if(err) {
return console.log(err);
}
console.log("The file was saved!");
});
}, function(error) {
console.log('Error parsing: ' + error);
});
I added a replace EOL to change all "\n" in file. If I delete that, file will contain "\n" in each end of line.
On console, this is the output:
{ title: 'RAML Flattener',
baseUri: 'http://github.com/joeledwards/node-flat-raml',
version: '1',
mediaType: 'application/json',
protocols: [ 'HTTP' ],
resources:
[ { relativeUri: '/base',
methods: [Object],
resources: [Object],
relativeUriPathSegments: [Object] } ] }
data is a Javascript object; how that is being displayed when you console.log() it doesn't have much to do with how it will end up in the file you are writing.
The problem is that you are using JSON.stringify(), which, by default, will not pretty-print the output string.
Instead, try this:
JSON.stringify(data, null, 2)
This will make your output look like this:
{
"title": "RAML Flattener",
"baseUri": "http://github.com/joeledwards/node-flat-raml",
"version": "1",
"mediaType": "application/json",
"protocols": [
"HTTP"
],
"resources": [
{
"relativeUri": "/base",
"methods": { ... },
"resources": { ... },
"relativeUriPathSegments": { ... }
}
]
}
You may or may not need to call .replace() on its output. If you do, use this (the one you're using isn't valid):
.replace(/\n/, os.EOL)

Resources