Reddit API only returning one post - node.js

I am trying to get all of the links in a subreddit using the API, but it is only returning one url. Here is the code I have:
var request = require('request');
webpage = 'http://www.reddit.com/r/AmazonUnder5/top.json?limit=100';
//login
request.post('http://www.reddit.com/api/login',{form:{api_type:'json', passwd:'password', rem:true, user:'username'}});
//get urls
request({uri : webpage, json:true, headers:{useragent: 'mybot v. 0.0.1'}}, function(error, response, body) {
if(!error && response.statusCode == 200) {
for(var key in body.data.children) {
var url = body.data.children[key].data.url;
console.log(url);
}
}
});
When I visit the json link in my browser, it returns all 100 posts.

Thats because only 1 exists in the top
http://www.reddit.com/r/AmazonUnder5/top
You could use hot instead
http://www.reddit.com/r/AmazonUnder5/hot.json
Also, you don't need to log in to do public get requests
Edit: You are getting so few results because you are not logged in properly
When logging in, use the
"op" => "login"
Parameter and test what cookies and data is returned.
I also recommend using the ssl login url since that works for me
https://ssl.reddit.com/api/login/

Related

Is there an effective way to validate an image source url in node?

I am using Node to get Instagram images and have come across an edge case I am interested in solving. While using the oembed API call, I can then get thumbnail_url.
In my service, I return the image url and move on. The issue here is, for certain permalinks (carousel/albums), this thumbnail_url loads with a 5xx Instagram Error.
What I would like to do is to verify the image url loads an image, and if not, do something else instead of returning it.
I know what the "something else" is. My problem is that the url I get back from the oembed call is indeed a valid url, so I don't need to validate that. I need to validate the url loads as expected.
My initial thought process was to do something like but I have never tried to verify an image source url before, I've always just tried to get the url in the first place:
function urlTester(url) {
request(url, (error, response, body) => {
if (error) {
console.log('URL does not load');
} else {
console.log('URL loads image!');
}
})
}
If the urls are always valid, and you only need to check for the actual response, you are correct.
(Vanilla) Request Example :
request(url,function(error, response, body){
if(!error && response.statusCode == 200){
/* RESPONSE SUCCESS */
}else{
/* RESPONSE ERROR */
}
})
If you want to go on further and verify that the response indeed contains an image you can use : response.headers['content-type']
if(((response.headers['content-type']).match(/(image)+\//g)).length != 0){
/* It contains 'image/' as the content type */
}else{
/* no match with 'image/' */
}
You can also try using request-image-size
Detects image dimensions via request instead of Node.js native
http/https, allowing for options and following redirects by default.
It reduces network traffic by aborting requests as soon as image-size
is able to obtain the image size.
It will return an error if the response is not a valid image :
Since version 2.0.0 it returns an ES6 native Promise that resolves
with the size object or rejects with an Error. Requires Node.js v4+.
As #EMX pointed out, I was headed in the right direction. Here is what I ended up doing:
function testUrl(url) {
return new Promise((resolve, reject) => {
request(url, function(error, response, body) {
if (error) {
reject(error);
}
resolve(response.statusCode);
})
})
}
If the statusCode !== 200, then I can move on as initially required.

Insert var into url and follow in node js with cheerio/request

I am trying to take a variable from the original url, insert it into a second url, visit that url and then access variable from it. I have two problems:
Problem 1: 'myurl' variable returns value of
"http://api.trove.nla.gov.au/work/undefined?key=6k6oagt6ott4ohno&reclevel=full"
That is, it is not taking the 'myid' variable.
Problem 2: How do I then follow the 'myurl' url as I want to access the DOM? Do I make another 'request' for 'myurl'?
Here is my code so far:
var request = require('request'),
cheerio = require('cheerio');
request('http://api.trove.nla.gov.au/result?key=6k6oagt6ott4ohno&zone=book&l-advformat=Thesis&sortby=dateDesc&q=+date%3A[2000+TO+2014]&l-availability=y&l-australian=y&n=0&s=0', function(error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html, {
xmlMode: true
});
var myid = ($('work').attr('id'))
var myurl = "http://api.trove.nla.gov.au/work/" +(myid)+ "?key=6k6oagt6ott4ohno&reclevel=full"
console.log(myurl)
}
});
Your selector is probably wrong, did you mean '#work' or '.work' perhaps?
Yes, you'll have to make another request to myurl using request() (since that's what you're using)

Node js, Request body empty for certain websites

I'm experimenting with Node.js and web scraping. In this case, I'm trying to scrape the most recent songs from a local radio station for display. With this particular website, body returns nothing. When I try using google or any other website, body has a value.
Is this a feature of the website I'm trying to scrape?
Here's my code:
var request = require('request');
var url = "http://www.radiomilwaukee.org";
request(url, function(err,resp,body) {
if (!err && resp.statusCode == 200) {
console.log(body);
}
else
{
console.log(err);
}
});
That's weird, the website you're requesting doesn't seem to return anything unless the accept-encoding header is set to gzip. With that in mind, using this gist will work: https://gist.github.com/nickfishman/5515364
I ran the code within that gist, replacing the URL with "http://www.radiomilwaukee.org" and see the content within the sample.html file once the code has completed.
If you'd rather have access to the web page's content within the code, you could do something like this:
// ...
req.on('response', function(res) {
var body, encoding, unzipped;
if (res.statusCode !== 200) throw new Error('Status not 200');
encoding = res.headers['content-encoding'];
if (encoding == 'gzip') {
unzipped = res.pipe(zlib.createGunzip());
unzipped.on("readable", function() {
// collect the content in the body variable
body += unzipped.read().toString();
});
}
// ...

596 Service not found while using ESPN API

I am using this API to consume a ESPN API:
http://api.espn.com/v1/now?apikey=d4skkma8kt2ac8tqbusz38w6
In Node.js, using node-curl library, my snippet looks like this:
var Curl = require('node-curl/lib/Curl')
curl.setopt('URL', url);
curl.setopt('CONNECTTIMEOUT', 2);
curl.on('data', function(chunk) {
console.log(chunk);
});
But everytime when I run this code I keep on getting response as:
<h1>596 Service Not Found</h1>
Strange is, same URL if I hit from the browser I get the correct response so URL is not invalid. This happens only if I try to call from Node.js. Can anyone guide me how to resolve this error? I tried encoding/decoding url, still I get same response.
Also basically I am avoiding any vendor specific libraries as much as possible, since we should have generic api calling framework
You could use request, which is a very popular module. Here's an example of how you could do it:
var request = require('request');
var url = 'http://api.espn.com/v1/now?apikey=d4skkma8kt2ac8tqbusz38w6';
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
console.log(JSON.parse(body));
}
});

check on server side if youtube video exist

How to check if youtube video exists on node.js app server side:
var youtubeId = "adase268_";
// pseudo code
youtubeVideoExist = function (youtubeId){
return true; // if youtube video exists
}
You don't need to use the youtube API per-se, you can look for the thumbnail image:
Valid video = 200 - OK:
http://img.youtube.com/vi/gC4j-V585Ug/0.jpg
Invalid video = 404 - Not found:
http://img.youtube.com/vi/gC4j-V58xxx/0.jpg
I thought I could make this work from the browser since you can load images from a third-party site without security problems. But testing it, it's failing to report the 404 as an error, probably because the content body is still a valid image. Since you're using node, you should be able to look at the HTTP response code directly.
I can't think of an approach that doesn't involve making a separate HTTP request to the video link to see if it exists or not unless you know beforehand of a set of video IDs that are inactive,dead, or wrong.
Here's an example of something that might work for you. I can't readily tell if you're using this as a standalone script or as part of a web server. The example below assumes the latter, assuming you call a web server on /video?123videoId and have it respond or do something depending on whether or not the video with that ID exists. It uses Node's request library, which you can install with npm install request:
var request = require('request');
// Your route here. Example on what route may look like if called on /video?id=123videoId
app.get('/video', function(req, response, callback){
var videoId = 'adase268_'; // Could change to something like request.params['id']
request.get('https://www.youtube.com/watch?v='+videoId, function(error, response, body){
if(response.statusCode === 404){
// Video doesn't exist. Do what you need to do here.
}
else{
// Video exists.
// Can handle other HTTP response codes here if you like.
}
});
});
// You could refactor the above to take out the 'request.get()', wrap it in a function
// that takes a callback and re-use in multiple routes, depending on your problem.
#rodrigomartell is on the right track, in that your check function will need to make an HTTP call; however, just checking the youtube.com URL won't work in most cases. You'll get back a 404 if the videoID is a malformed ID (i.e. less than 11 characters or using characters not valid in their scheme), but if it's a properly formed videoID that just happens to not correspond to a video, you'll still get back a 200. It would be better to use an API request, like this (note that it might be easier to use the request-json library instead of just the request library):
request = require('request-json');
var client = request.newClient('https://www.googleapis.com/youtube/v3/');
youtubeVideoExist = function (youtubeId){
var apikey ='YOUR_API_KEY'; // register for a javascript API key at the Google Developer's Console ... https://console.developers.google.com/
client.get('videos/?part=id&id='+youtubeId+'&key='+apikey, function(err, res, body) {
if (body.items.length) {
return true; // if youtube video exists
}
else {
return false;
}
});
};
Using youtube-feeds module. Works fast (~200ms) and no need API_KEY
youtube = require("youtube-feeds");
existsFunc = function(youtubeId, callback) {
youtube.video(youtubeId, function(err, result) {
var exists;
exists = result.id === youtubeId;
console.log("youtubeId");
console.log(youtubeId);
console.log("exists");
console.log(exists);
callback (exists);
});
};
var notExistentYoutubeId = "y0srjasdkfjcKC4eY"
existsFunc (notExistentYoutubeId, console.log)
var existentYoutubeId = "y0srjcKC4eY"
existsFunc (existentYoutubeId, console.log)
output:
❯ node /pathToFileWithCodeAbove/FileWithCodeAbove.js
youtubeId
y0srjcKC4eY
exists
true
true
youtubeId
y0srjasdkfjcKC4eY
exists
false
false
All you need is to look for the thumbnail image. In NodeJS it would be something like
var http = require('http');
function isValidYoutubeID(youtubeID) {
var options = {
method: 'HEAD',
host: 'img.youtube.com',
path: '/vi/' + youtubeID + '/0.jpg'
};
var req = http.request(options, function(res) {
if (res.statusCode == 200){
console.log("Valid Youtube ID");
} else {
console.log("Invalid Youtube ID");
}
});
req.end();
}
API_KEY is not needed. It is quite fast because there is only header check for statusCode 200/404 and image is not loaded.

Resources