check on server side if youtube video exist - node.js

How to check if youtube video exists on node.js app server side:
var youtubeId = "adase268_";
// pseudo code
youtubeVideoExist = function (youtubeId){
return true; // if youtube video exists
}

You don't need to use the youtube API per-se, you can look for the thumbnail image:
Valid video = 200 - OK:
http://img.youtube.com/vi/gC4j-V585Ug/0.jpg
Invalid video = 404 - Not found:
http://img.youtube.com/vi/gC4j-V58xxx/0.jpg
I thought I could make this work from the browser since you can load images from a third-party site without security problems. But testing it, it's failing to report the 404 as an error, probably because the content body is still a valid image. Since you're using node, you should be able to look at the HTTP response code directly.

I can't think of an approach that doesn't involve making a separate HTTP request to the video link to see if it exists or not unless you know beforehand of a set of video IDs that are inactive,dead, or wrong.
Here's an example of something that might work for you. I can't readily tell if you're using this as a standalone script or as part of a web server. The example below assumes the latter, assuming you call a web server on /video?123videoId and have it respond or do something depending on whether or not the video with that ID exists. It uses Node's request library, which you can install with npm install request:
var request = require('request');
// Your route here. Example on what route may look like if called on /video?id=123videoId
app.get('/video', function(req, response, callback){
var videoId = 'adase268_'; // Could change to something like request.params['id']
request.get('https://www.youtube.com/watch?v='+videoId, function(error, response, body){
if(response.statusCode === 404){
// Video doesn't exist. Do what you need to do here.
}
else{
// Video exists.
// Can handle other HTTP response codes here if you like.
}
});
});
// You could refactor the above to take out the 'request.get()', wrap it in a function
// that takes a callback and re-use in multiple routes, depending on your problem.

#rodrigomartell is on the right track, in that your check function will need to make an HTTP call; however, just checking the youtube.com URL won't work in most cases. You'll get back a 404 if the videoID is a malformed ID (i.e. less than 11 characters or using characters not valid in their scheme), but if it's a properly formed videoID that just happens to not correspond to a video, you'll still get back a 200. It would be better to use an API request, like this (note that it might be easier to use the request-json library instead of just the request library):
request = require('request-json');
var client = request.newClient('https://www.googleapis.com/youtube/v3/');
youtubeVideoExist = function (youtubeId){
var apikey ='YOUR_API_KEY'; // register for a javascript API key at the Google Developer's Console ... https://console.developers.google.com/
client.get('videos/?part=id&id='+youtubeId+'&key='+apikey, function(err, res, body) {
if (body.items.length) {
return true; // if youtube video exists
}
else {
return false;
}
});
};

Using youtube-feeds module. Works fast (~200ms) and no need API_KEY
youtube = require("youtube-feeds");
existsFunc = function(youtubeId, callback) {
youtube.video(youtubeId, function(err, result) {
var exists;
exists = result.id === youtubeId;
console.log("youtubeId");
console.log(youtubeId);
console.log("exists");
console.log(exists);
callback (exists);
});
};
var notExistentYoutubeId = "y0srjasdkfjcKC4eY"
existsFunc (notExistentYoutubeId, console.log)
var existentYoutubeId = "y0srjcKC4eY"
existsFunc (existentYoutubeId, console.log)
output:
❯ node /pathToFileWithCodeAbove/FileWithCodeAbove.js
youtubeId
y0srjcKC4eY
exists
true
true
youtubeId
y0srjasdkfjcKC4eY
exists
false
false

All you need is to look for the thumbnail image. In NodeJS it would be something like
var http = require('http');
function isValidYoutubeID(youtubeID) {
var options = {
method: 'HEAD',
host: 'img.youtube.com',
path: '/vi/' + youtubeID + '/0.jpg'
};
var req = http.request(options, function(res) {
if (res.statusCode == 200){
console.log("Valid Youtube ID");
} else {
console.log("Invalid Youtube ID");
}
});
req.end();
}
API_KEY is not needed. It is quite fast because there is only header check for statusCode 200/404 and image is not loaded.

Related

AWS Cloudfront + lambda#edge modify html content (making all links absolute -> relative)

I (maybe falsely) assumed lambda#edge can modify origin.responce content,
so wrote a lambda function like this:
/* this does not work. response.Body is not defined */
'use strict';
exports.handler = (event, context, callback) => {
var response = event.Records[0].cf.response;
var data = response.Body.replace(/OLDTEXT/g, 'NEWTEXT');
response.Body = data;
callback(null, response);
};
Which fails because you can not reference origin responce body with this syntax.
Can I modify this script to make it work as I intended, or maybe should I consider using another service on AWS?
My background :
We are trying to set up an AWS Cloudfront distribution, that consolidates access to several websites, like this:
ttp://foo.com/ -> https:/newsite.com/foo/
ttp://bar.com/ -> https:/newsite.com/bar/
ttp://boo.com/ -> https:/newsite.com/boo/
the sites are currently managed by external parties. We want to disable direct public access to foo/bar/boo, and have just newsite.com as the only site visible on the internet.
Mapping the origins into a single c-f distribution is relatively simple.
however doing so will break html contents that specify files with an absolute url,
if their current domain names are removed from the web.
ttp://foo.com/images/1.jpg
-> (disable foo.com dns)
-> image not found
to benefit from cloudfront caching and other merits,
I want to modify/rewrite all absolute file references in html files to a relative url -
so
<img src="ttp://foo.com/images/1.jpg">
becomes
<img src="/foo/images/1.jpg">
//(accessed as https:/newsite.com/foo/images/1.jpg from a user)
//(maybe I should make it an absolte url for SEO purpose)
(http is changed to ttp, due to restriction of using the banned domain name foo.com)
(edit)
I found this AWS blog, which may be a great hint but feel a little too convoluted to my expectation. (set up a linux container so I can just use sed to process html files, maybe using S3 as a temp storage)
Hope I can find a simpler way:
https://aws.amazon.com/blogs/networking-and-content-delivery/resizing-images-with-amazon-cloudfront-lambdaedge-aws-cdn-blog/
From what I have just learnt myself you unfortunately cannot modify the response body within a Lambda#edge. You can only wipe out or totally replace the body content. I was hoping to be able to clean all responses from a legacy site, but using a Cloudfront Lambda#Edge will not allow this to be done.
As the AWS documentation states here :
When you’re working with the HTTP response, Lambda#Edge does not expose the body that is returned by the origin server to the origin-response trigger. You can generate a static content body by setting it to the desired value, or remove the body inside the function by setting the value to be empty. If you don’t update the body field in your function, the original body returned by the origin server is returned back to viewer.
I ran into the same issue, and have been able to pull some info out of the request headers to piece together a URL from which I can fetch the original body.
Beware: I haven't yet been able to confirm that this is a "safe" method, like maybe it's relying on undocumented behaviour etc, but for now it DOES fetch the original body properly, for me. Of course it also takes another request / round trip, possibly inferring some extra transfer costs, execution time, etc.
const fetchOriginalBody = (request) => {
const host = request['headers']['host'][0]['value']; // xxxx.yyy.com
const uri = request['uri'];
const fetchOriginalBodyUrl = 'https://' + host + uri;
return httpsRequest(fetchOriginalBodyUrl);
}
// Helper that turns https.request into a promise
function httpsRequest(options) {
return new Promise((resolve, reject) => {
const req = https.request(options, (res) => {
if (res.statusCode < 200 || res.statusCode >= 300) {
return reject(new Error('statusCode=' + res.statusCode));
}
var body = [];
res.on('data', function(chunk) {
body.push(chunk);
});
res.on('end', function() {
try {
body = Buffer.concat(body).toString();
// body = JSON.parse(Buffer.concat(body).toString());
} catch(e) {
reject(e);
}
resolve(body);
});
});
req.on('error', (e) => {
reject(e.message);
});
req.end();
});
}
exports.handler = async (event, context, callback) => {
const records = event.Records;
if (records && records.length > 0) {
const request = records[0].cf.request;
const body = await fetchOriginalBody(request);
}
...

How to use the full request URL in AWS Lambda to execute logic only on certain pages

I have a website running on www.mywebsite.com. The files are hosted in an S3 bucket in combination with cloudFront. Recently, I have added a new part to the site, which is supposed to be only for private access, so I wanted to put some form of protection on there. The rest of the site, however, should remain public. My goal is for the site to be accessible for everyone, but as soon as someone gets to the new part, they should not see any source files, and be prompted for a username/password combination.
The URL of the new part would be for example www.mywebsite.com/private/index.html ,...
I found that an AWS Lambda function (with node.js) is good for this, and it kind of works. I have managed to authenticate everything in the entire website, but I can't figure out how to get it to work on only the pages that contain for example '/private/*' in the full URL name. The lambda function I wrote looks like this:
'use strict';
exports.handler = (event, context, callback) => {
// Get request and request headers
const request = event.Records[0].cf.request;
const headers = request.headers;
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
// Continue request processing if authentication passed
callback(null, request);
return;
}
// Configure authentication
const authUser = 'USER';
const authPass = 'PASS';
// Construct the Basic Auth string
const authString = 'Basic ' + new Buffer(authUser + ':' + authPass).toString('base64');
// Require Basic authentication
if (typeof headers.authorization == 'undefined' || headers.authorization[0].value != authString) {
const body = 'Unauthorized';
const response = {
status: '401',
statusDescription: 'Unauthorized',
body: body,
headers: {
'www-authenticate': [{key: 'WWW-Authenticate', value:'Basic'}]
},
};
callback(null, response);
}
// Continue request processing if authentication passed
callback(null, request);
};
The part that doesn't work is the following part:
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
// Continue request processing if authentication passed
callback(null, request);
return;
}
My guess is that the request.uri does not contain what I expected it to contain, but I can't seem to figure out what does contain what I need.
My guess is that the request.uri does not contain what I expected it to contain, but I can't seem to figure out what does contain what I need.
If you're using a Lambda#Edge function (appears you are). Then you can view the Request Event structure here: https://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/lambda-event-structure.html#lambda-event-structure-request
You can see the actual value of the request URI field by using console.log and checking the respective logs in Cloudwatch.
The problem might be this line:
if (!request.uri.toLowerCase().indexOf("/private/") > -1) {
If you're strictly looking to check if a JavaScript string contains another string in it, you probably want to do this instead:
if (!request.uri.toLowerCase().indexOf("/private/") !== -1) {
Or better yet, using more modern JS:
if (!request.uri.toLowerCase().includes("/private/")) {

Generating zip archive on-the-fly using Express and node-archiver

I'm trying to generate a zip archive of icons on-the-fly and stream the response to the user to download, via a JSON POST request.
The zip itself is created and the response is returned, but the client-side is not prompted to download the file, and the response is garbled (which I assume is the contents of the archive).
app.post('/download', function(request, response) {
var icons = request.body;
var filename = 'icons.zip';
response.attachment(filename);
var zip = Archiver('zip');
zip.on('finish', function(error) {
return response.end();
});
zip.pipe(response);
for (var i = 0; i < icons.length; i++) {
var icon = getIcon(icons[i]);
zip.append(fs.createReadStream('public/' + icon.svg), { name: icon.title + '.svg' });
}
zip.finalize();
});
I'm wondering if there's anything missing from the server-side code that's preventing the download on the client-side, but from the example I've followed (https://github.com/archiverjs/node-archiver/blob/master/examples/express.js), it doesn't seem to be the case.
Here's some screenshots of the request made and the response received:
AJAX calls don't trigger file downloads in a browser, so you need to work around that.
One possibility is to change the request from a POST to a GET and put the names of the icons in the URL as parameters.
Your Express route would look like this:
app.get('/download/*?', function(request, response) {
// Make sure icons names were provided:
if (! request.params[0]) {
return response.sendStatus(400);
}
// Split on `/`, which is used as separator between icon names:
var icons = request.params[0].split(/\//);
// The rest can be the same
...
});
Client-side, you would use this:
location.href = 'http://your-server/download/Chevron%20Down/Close/Trash';
(obviously you can also generate that URL dynamically based on user input, as long as you make sure that the icon names are properly URL-encoded)

Node.js - Stream Binary Data Straight from Request to Remote server

I've been trying to stream binary data (PDF, images, other resources) directly from a request to a remote server but have had no luck so far. To be clear, I don't want to write the document to any filesystem. The client (browser) will make a request to my node process which will subsequently make a GET request to a remote server and directly stream that data back to the client.
var request = require('request');
app.get('/message/:id', function(req, res) {
// db call for specific id, etc.
var options = {
url: 'https://example.com/document.pdf',
encoding: null
};
// First try - unsuccessful
request(options).pipe(res);
// Second try - unsuccessful
request(options, function (err, response, body) {
var binaryData = body.toString('binary');
res.header('content-type', 'application/pdf');
res.send(binaryData);
});
});
Putting both data and binaryData in a console.log show that the proper data is there but the subsequent PDF that is downloaded is corrupt. I can't figure out why.
Wow, never mind. Found out Postman (Chrome App) was hijacking the request and response somehow. The // First Try example in my code excerpt works properly in browser.

how to publish a page using node.js

I have just begun to learn node.js. Over the last two days, I've been working on a project that accepts userinput and publishes a ICS file. I have all of that working. Now consider when I have to show this data. I get a router.get to see if I am at the /cal page and..
router.get('/cal', function(req, res, next)
{
var db = req.db;
var ical = new icalendar.iCalendar();
db.find({
evauthor: 'mykey'
}, function(err, docs) {
docs.forEach(function(obj) {
var event2 = ical.addComponent('VEVENT');
event2.setSummary(obj.evics.evtitle);
event2.setDate(new Date(obj.evics.evdatestart), new Date(obj.evics.evdateend));
event2.setLocation(obj.evics.evlocation)
//console.log(ical.toString());
});
});
res.send(ical.toString());
// res.render('index', {
// title: 'Cal View'
// })
})
So when /cal is requested, it loops through my db and creates an ICS calendar ical. If I do console.log(ical.toString) within the loop, it gives me a properly formatted calendar following the protocol.
However, I'd like to END the response with this. At the end I do a res.send just to see what gets published on the page. This is what gets published
BEGIN:VCALENDAR VERSION:2.0
PRODID:calendar//EN
END:VCALENDAR
Now the reason is pretty obvious. Its the nature of node.js. The response gets sent to the browser before the callback function finishes adding each individual VEVENT to the calendar object.
I have two related questions:
1) Whats the proper way to "wait" till the callback is done.
2) How
do I use res to send out a .ics dynamic link with
ical.toString() as the content. Do I need to create a new view for
this ?
edit: I guess for number 2 I'd have to set the HTTP headers like so
//set correct content-type-header
header('Content-type: text/calendar; charset=utf-8');
header('Content-Disposition: inline; filename=calendar.ics');
but how do I do this when using views.
Simply send the response, once you got the neccessary data! You are not required to end or send directly in your route but can do it in a nested callback as well:
router.get('/cal', function(req, res, next) {
var db = req.db;
var ical = new icalendar.iCalendar();
db.find({
evauthor: 'mykey'
}, function(err, docs) {
docs.forEach(function(obj) {
var event2 = ical.addComponent('VEVENT');
event2.setSummary(obj.evics.evtitle);
event2.setDate(new Date(obj.evics.evdatestart), new Date(obj.evics.evdateend));
event2.setLocation(obj.evics.evlocation)
});
res.type('ics');
res.send(ical.toString());
});
});
I also included sending the proper Content-Type by using res.type.
Also: Don't forget to add proper error handling. You can for example use res.sendStatus(500) if an error occured while retrieving the documents.

Resources