using node js need to download the image from Facebook - node.js

my requirement is to read and store the simple image resources from facebook.
Image URL:https://badge.facebook.com/badge/1.png
following are the Code snippet ( which i used to read a image from google) is not working as per my requirement it just storing a invalid image in my local disk
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
request.get(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
var r = request(uri).pipe(fs.createWriteStream(filename));
r.on('close', callback);
r.on('error', function(err) {console.log(err)})
});
};
download('https://badge.facebook.com/badge/1.png', 'google.png', function(){
console.log('Done downloading..');
});
please help me on this....

It's not problem of nodejs or request but facebook.
You can just use text editor to open google.png, and it's a html file.
So perhaps facebook has some anti-crawler firewall.
just set header, for example:
var fs = require('fs'),
request = require('request');
var download = function(uri, filename, callback){
var r = request({
url: uri,
headers: {
'accept': 'image/*',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.118 Safari/537.36',
}
}).pipe(fs.createWriteStream(filename));
r.on('error', function(err) {console.log(err)})
r.on('close', callback);
};
download('https://badge.facebook.com/badge/1.png', 'google.png', function(){
console.log('Done downloading..');
});

Related

Sending http2 get requests with cookies in nodejs

I'm trying to send an HTTP/2 GET request using nodejs http2 module but I'm having trouble with sending cookies.
I have to read the cookies from a JSON file and send them with the request.
I tried this:
const http2 = require("http2");
const client = http2.connect("http://192.168.1.50");
const fs = require('fs');
var COOKIE = fs.readFileSync('cookies.json', 'utf8');
const req = client.request( {
':path': '/cookies.php',
':method': 'GET',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0',
'Cookie' : COOKIE,
} )
let data = "";
req.on("response", (headers, flags) => {
for (const name in headers) {
console.log(`${name}: ${headers[name]}`);
}
});
req.on("data", chunk => {
data += chunk;
});
req.on("end", () => {
console.log(data);
client.close();
});
req.end();
cookies.json:
[{"name":"test","value":"12345","domain":"192.168.1.50","path":"/"}]
But it doesn't work, because I made a quick php page that prints $_COOKIE but it prints an empty array
I'm not an expert with HTTP headers, how can I do this?

I am having issues properly sending headers to a SOAP endpoint

I have a SOAP endpoint I need to gather data from and I have a working prototype using python3 however I would like this to be a node JS project to diversify my portfolio however when sending Headers to the endpoint I have noticed an error (Media Type not supported) so I looked to the headers and noticed something odd with them, some of the keys are in quotes and other not and i believe this may be the source of the Issue, any help would be appreciated,
Headers when request is made
{
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36',
'Content-Type': 'text/xml; charset=utf-8',
SOAPAction: 'http://api.neatoscan.com/RequestOrdersReport',
Host: 'desktopbridge.neatoscan.com',
'Content-Length': '486',
Expect: '100-continue',
Connection: 'Keep-Alive'
}
Request Code (All vars in header are pulled from a config file to keep authentication separate from main file)
import {Client_User_Name, Token,start_date_time,end_date_time, SOAP_Header_CONTENT_TYPE,SOAP_Header_Host_Config,USER_AGENT,SOAP_actions,
SOAP_Content_len,SOAP_urls} from './config.js';
import { createRequire } from 'module';
const require = createRequire(import.meta.url);
const axios = require('axios');
const soapRequest = require('easy-soap-request');
var fs = require('fs'),parseString = require("xml2js").parseString,xml2js = require("xml2js");
var Request_Book_Order_report_XML = '/home/malachi/Desktop/Projects/Daily_Reports_Goodwill/NeatoScan/xml_files/Request_Book_Order_Report.xml'
var Report_Status_Books_XML = '/home/malachi/Desktop/Projects/Daily_Reports_Goodwill/NeatoScan/xml_files/Request_Report_Status.xml'
const url = SOAP_urls['Books_Url'];
function req(xml, headers) {
axios.post(url,xml, headers
).then(response => {console.log(response.data)}
).catch(err => {console.log(err)});
}
var SA = 'Book_Report_Request'
var CL = 'Book_Report_Request_Content_Len'
var url_Headers = {
"User-Agent": USER_AGENT,'Content-Type': SOAP_Header_CONTENT_TYPE['Books_Content_Type'],'SOAPAction': SOAP_actions[SA] ,
'Host': SOAP_Header_Host_Config['Books_Host'], 'Content-Length':SOAP_Content_len[CL], 'Expect':'100-continue', 'Connection': 'Keep-Alive'};
//
fs.readFile(Request_Book_Order_report_XML, "utf-8", function(err, data) {
parseString(data, function(err, result) {
var json = result;
var auth_filter = json['soap:Envelope']['soap:Body'][0]['RequestOrdersReport'][0];
var auth_Client_User = auth_filter['username'] = Client_User_Name;
var auth_token = auth_filter['token'] = Token;
var date_start_filter = auth_filter['startDate'] = start_date_time;
var date_end_filter = auth_filter['endDate'] = end_date_time;
var builder = new xml2js.Builder();
var xml = builder.buildObject(json);
console.log(xml);
console.log(url_Headers);
// req(xml, url_Headers)
});
});

Scrape and store Shopify ecommerce websites using Node.js

I wrote a code to scrape an array of Shopify ecommerce websites using website-scraper npm module in node.js but it is showing 403 error but the same code is working for other websites.
How can we get around this problem?
My scraperTest.js file is :
var scrape = require('website-scraper');
let test = require('./test')
let urls = [];
urlList = ['1500.academy'];
urlList.forEach(url =>{
test.checkRedirect(url)
.then(domain =>{
urls.push('https://' + domain);
console.log(urls);
var options = {
urls: urls,
directory: './autochat/',
'User-Agent': 'request',
};
// with promise
scrape(options).then((result) => {
/* some code here */
}).catch((err) => {
/* some code here */
});
// or with callback
scrape(options, (error, result) => {
/* some code here */
});
})
})
and test.js file is
const request = require('request');
const extractDomain = require('extract-domain');
//var link = 'oneplustwocase.com';
function checkRedirect(link) {
return new Promise((resolve, reject) => {
var url = "http://" + link;
var options = {
url: url,
headers: {
'User-Agent': 'request'
}
};
request(options, function (error, response, body) {
let redirectedDomain = extractDomain(response.request.uri.href);
if(response !== undefined){
extractDomain(response.request.uri.href);
if (response.statusCode === 200 && link !== redirectedDomain) {
resolve(redirectedDomain);
} else {
resolve(link);
}
} else {
resolve(link);
}
});
});
}
module.exports.checkRedirect = checkRedirect;
I got the solution.
We are able to fetch the html data of the domain using request();
The response.body contains the html data
the solution I got by using the following code :
const request = require('request');
const extractDomain = require('extract-domain');
let fs = require('fs');
function checkRedirect(link) {
var url = "http://" + link;
var options = {
url: url,
headers: {
'User-Agent': 'request'
}
};
request(options, function (error, response, body) {
if(response !== undefined){
let redirectedDomain = extractDomain(response.request.uri.href);
let writeStream = fs.createWriteStream(redirectedDomain + '.html');
writeStream.write(response.body)
writeStream.end();
});
}
module.exports.checkRedirect = checkRedirect;
//checkRedirect('oneplustwocase.com')
/*
var r = request(url, function (e, resp) {
r.uri
resp.request.uri
})*/
Since you are interested in data, save yourself the headache of scraping and simply download the site XML file. It contains all the products and interesting information, just like Google or any other search engine.
It seems that website http://1500.academy returns 403 if it doesn't like user-agent header. I suggest to try user-agent which looks like browser
According to website-scraper documentation https://www.npmjs.com/package/website-scraper#request you should pass headers for request in request property, not on root level
So options should be like:
const options = {
urls:[{url: 'http://1500.academy/'}],
directory: './autochat/',
request: {
headers: {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
}
}
By the way website-scraper follows redirects by default, so you can skip checking redirects

Invalid magic number of a png file

I am trying to verify if a link is a valid image with magic number. Most of the images link work fine. But here are set of images on trump's site that does not produce correct magic numbers, though they appear to work fine on browser. Magic number they produce is 3c21444f.
Below is my code, Any help would be appreciated:
var request = require('request');
var magic = {
jpg: 'ffd8ffe0',
jpg1: 'ffd8ffe1',
png: '89504e47',
gif: '47494638'
};
var options = {
method: 'GET',
url: 'https://assets.donaldjtrump.com/gallery/4749/screen_shot_2016-10-30_at_1.39.54_pm.png',
encoding: null // keeps the body as buffer
};
request(options, function (error, response, body) {
if(!error) {
var magicNumberInBody = body.toString('hex', 0, 4);
if (magicNumberInBody == magic.jpg ||
magicNumberInBody == magic.jpg1 ||
magicNumberInBody == magic.png ||
magicNumberInBody == magic.gif) {
console.log('Valid image');
} else {
console.log('Invalid Image', magicNumberInBody);
}
}
});
So apparently it seemed to be issue with cloudflare blocking my requests to image. So I fixed it using UserAgent Headers to request for those images.
var options = {
method: 'GET',
url: 'https://assets.donaldjtrump.com/gallery/4749/screen_shot_2016-10-30_at_1.39.54_pm.png',
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'
},
encoding: null // keeps the body as buffer
};

How to connect to gitHub api using nodejs from a secure network?

I am using the below code to get the repositories from gitHub. When I use it from Home network I am able to retrieve the list of repositories, but If I try to fetch the repo from other network it gives me this error: 'connect ECONNREFUSED'. I am new to nodejs, so still wondering how to go about solving this issue.
Any ideas?
var https = require("https");
var userName='xyz';
var options = {
host :"api.github.com",
path : '/users/'+userName+'/repos',
method : 'GET'
}
var request = https.request(options, function(response){
var body = '';
response.on('data',function(chunk){
body+=chunk;
});
response.on('end',function(){
var json = JSON.parse(body);
var repos =[];
json.forEach(function(repo){
repos.push({
name : repo.name,
description : repo.description
});
});
console.log('the repos are '+ JSON.stringify(repos));
});
});
request.on('error', function(e) {
console.error('and the error is '+e);
});
request.end();
Change your options as
var options = {
host:'api.github.com',
path: '/users/' + username+ '/repos',
method: 'GET',
headers: {'user-agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'}
};
i think this would be OK
var options = {
host:'api.github.com',
path: '/users/' + username+ '/repos',
method: 'GET',
headers: {'User-Agent':'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'}
};

Resources