Finding the coordinates of an image in every HTML file in a directory? - node.js

I have a library of flat HTML files with similar image tags. How should I go through all of them and find the specific x, y coordinates on the page of a specific image tag?
I'm thinking that I'll need to either render each page as an image (replacing the image tag that I'm looking for with a specific color that I can then match on) or I could render headlessly render the page with something like phantom.js and find the coordinates that way (though I don't know if that will work). Any thoughts on which will be easier?
I'd prefer to use either a LAMP stack or Node.js.
Thanks!

I think using PhantomJS will be the easiest. No need for node.js.
You can combine examples/scandir.js and examples/phantomwebintro.js to get what you want.
var system = require('system');
var fs = require('fs');
if (system.args.length !== 2) {
console.log("Usage: phantomjs scandir.js DIRECTORY_TO_SCAN");
phantom.exit(1);
}
function scanDirectory(path, cb) {
if (fs.exists(path) && fs.isFile(path)) {
cb(path);
} else if (fs.isDirectory(path)) {
fs.list(path).forEach(function (e) {
if (e !== "." && e !== "..") {
scanDirectory(path + '/' + e, cb);
}
});
}
}
function parsePage(path) {
var page = require('webpage').create();
page.open(path, function(status) {
if (status === "success") {
page.includeJs("http://code.jquery.com/jquery-latest.js", function() {
var images = page.evaluate(function() {
var images = [];
$('img').each(function() {
images.push({ src: $(this).attr('src'), pos: $(this).position() });
});
return images;
});
console.log(images);
});
}
});
}
scanDirectory(system.args[1], parsePage);
This script (phantomjs img.js kittens) will scan the directory for files, load every file in that directory (and subdirectories, you can modify this behavior in scanDirectory) and find all <img> tags on that page and return an array with their src attributes and .position().
Took me about 20 minutes to get this to work, so I think this is the easiest way.

Related

Copy specified files with condition using gulp

I am trying copy my vendor files to my dev folder using gulp. When I was in development mode, I want copy only the unminified files, if unminified is not present copy minified files. And in production mode I want copy minifed files if files are not present minify the normal files.
my folder structure
js
app.js
jquery
jquery.min.js
jquery.js
fontawesome
fontawesome.min.js
fontawesome.min.css
fonts.ttf...
Here my basic I had written.
var scriptsPath = '../vendor/';
function getFolders(dir) {
return fs.readdirSync(dir)
.filter(function(file) {
return fs.statSync(path.join(dir, file)).isDirectory();
});
}
gulp.task('vendor', function() {
var folders = getFolders(scriptsPath);
var cssFilter = $.filter('**/*.css')
var tasks = folders.map(function(folder) {
var jsFilter;
if (isProduction) {
jsFilter = $.filter('**/*.min.js');
} else {
jsFilter = $.filter(['**/*.js', '!**/*.min.js']);
}
return gulp.src(path.join(scriptsPath, '**/'))
.pipe(jsFilter)
.pipe($.if(useSourceMaps, $.sourcemaps.init()))
.pipe($.if(isProduction, $.uglify({preserveComments: 'some'})))
.on('error', handleError)
.pipe(jsFilter.restore())
.pipe(cssFilter)
.pipe($.if( isProduction, $.minifyCss() ))
.on('error', handleError)
.pipe(cssFilter.restore())
.on('error', handleError)
.pipe(gulp.dest(build.vendor.js));
});
return es.concat.apply(null, tasks);
});
I am trying the last two days using gulp-if& some methods. But not yet get the solution.Thanks in advance.
You are trying to cram way to much into your vendor task. The stuff you do with your JS files is completely unrelated to the stuff you do with your CSS files. That's hard to read.
Instead of using gulp-filter try splitting vendor up into smaller tasks like vendor-js, vendor-css, etc... and then declare them as dependencies for your vendor task:
gulp.task('vendor', ['vendor-js', 'vendor-css' /* etc ... */]);
Your vendor-js task could then look like this:
var glob = require('glob');
gulp.task('vendor-js', function () {
var js = glob.sync('../vendor/**/*.js');
if (isProduction) {
// use <file>.min.js, unless there is only <file>.js
js = js.filter(function(file) {
return file.match(/\.min\.js$/) ||
js.indexOf(file.replace(/\.js$/, '.min.js')) < 0;
});
} else {
// use <file>.js, unless there is only <file>.min.js
js = js.filter(function(file) {
return !file.match(/\.min\.js$/) ||
js.indexOf(file.replace(/\.min\.js$/, '.js')) < 0;
});
}
gulp.src(js, { base: '../vendor' })
.pipe($.if(isProduction, // only minify for prod and when
$.if("!**/*.min.js", uglify()))) // the file isn't minified already
.pipe(gulp.dest('build'));
});
Adapting this to you specific needs should be fairly trivial from here on.

Write to same file from multiple tasks (Gulp, Node, gulp-json)

I've just started using Gulp (and NodeJs)... Obviously I ran into my first wall.
Here it is:
I have a large project that uses themes. Each theme has it's own assets (scss and js files). Here is my gulpfile.js:
// < require block here (not included, to keep this short)
var themes = ["theme1", "theme2", "theme3"];
// Since I can have up to 20 different themes, I use the 'themes' array so I can create tasks dynamically, like this:
themes.forEach(function (theme) {
gulp.task('css:' + theme, function () {
setVersion([theme], 'css'); // write asset version into a json file
gulp.src('../themes/frontend/' + theme + '/assets/css/style.scss')
.pipe(sourcemaps.init())
.pipe(sass({outputStyle: 'compressed'}).on('error', sass.logError))
.pipe(sourcemaps.write('./'))
.pipe(gulp.dest('../themes/frontend/' + theme + '/assets/css'))
});
});
// Of course, I need an "all" task to build all CSS in rare ocasions I need to do so:
gulp.task('css:all', ("css:" + themes.join(",css:")).split(","));
// ("css:" + themes.join(",css:")).split(",") => results in the needed ['css:theme1', 'css:theme2'] tasks array
// The same logic as above for JS files
// but added the use of gulp-concat and gulp-uglify
// Having scripts = { "theme1" : ['script1', 'script2'], "theme2": ['script1', 'script2'] }
// ...
// And "per theme" both "css and js"
themes.forEach(function (theme) {
gulp.task('theme:' + theme, ['css:' + theme, 'js:' + theme]);
});
// Next I need to set versions for each asset
// I'm writing all the versions into a json file
assetsVersion = someRandomGeneratedNumber;
function setVersion(themes, assetType) {
/**
* themes: array
* assetType: 'all', 'css' or 'js'
*/
var fs = require('fs'),
path = require("path");
var versionsFilePath = path.normalize(__dirname + '/../protected/config/theme/frontend/');
var versionsFileName = '_assets-version.json';
if (!fs.existsSync(versionsFilePath + versionsFileName)) {
// Create file if it doesn't exist
fs.writeFile(versionsFilePath + versionsFileName, "{}", function (err) {
if (err) {
return console.log(err);
}
});
}
gulp.src(versionsFilePath + versionsFileName)
.pipe(jeditor(function (json) {
themes.forEach(function(theme) {
if ("undefined" == typeof (json[theme])) {
json[theme] = {};
}
if ('css' == assetType) {
json[theme]['css'] = assetsVersion;
} else if ('js' == assetType) {
json[theme]['js'] = assetsVersion;
} else {
json[theme] = {"css": assetsVersion, "js": assetsVersion};
}
if ("undefined" == typeof(json[theme]['css'])) {
// if we're missing the 'css' key (i.e. we've just created the json file), add that too
json[theme]['css'] = assetsVersion;
}
if ("undefined" == typeof(json[theme]['js'])) {
// if we're missing the 'js' key (i.e. we've just created the json file), add that too
json[theme]['js'] = assetsVersion;
}
});
return json;
}))
.pipe(gulp.dest(versionsFilePath));
}
The assets versioning json should look like this:
{
"theme1": {
"css": "20150928163236",
"js": "20150928163236"
},
"theme2": {
"css": "20150928163236",
"js": "20150928163236"
},
"theme3": {
"css": "20150928163236",
"js": "20150928163236"
}
}
running 'gulp css:theme#' - works fine...
BUT running 'gulp css:all' - makes a messy json
Of course, this happens because all css:theme# (or js:theme#) tasks run async, and more often than not there are multiple tasks writing simultaneously to my json file.
I've read about tasks depending on other tasks, but that doesn't really fit into my whole "dynamic tasks" flow (or I don't know how to fit it in).
I mean I don't think that this:
gulp.task('css:theme1', ['versioning'], function() {
//do stuff after 'versioning' task is done.
});
would help me. SO what if it waits for the version to be written? Multiple tasks would still write to the file at the same time. Also, for this to work, I would need to pass parameters that I also don't know how to do... like:
gulp.task('css:'+theme, ['versioning --theme ' + theme], function() {
//do stuff after 'versioning' task is done.
});
Like I could make it work in the console. I know this isn't working, BUT would be really useful in some cases if it would somehow be possible to send parameters to the task in the task name.
Neither runSequence() { ... done(); }, I really don't see how could I make it work within my flow...
Please, anybody... help a newb...
How can I solve this, while:
Having tasks created dynamically;
Having one versioning json file for all themes.

Brunch plugin pipline issues

I'm writing a plugin for Brunch to 'filter' files from code library. Basic idea is to:
check my source files (in src\ folder, or any watched folders that don't match library pattern),
build a list of imported/required modules from code library (in lib\ folder, outside src\, somewhere on disk)
check files against this list and 'approve' or 'reject' them
compile only what's 'approved', so I don't end up with huge files that have all modules/components from my library, but only what I use in particular project
When I work only with JavaScript files this.pattern = /.*(js|jsx)$/; everything works fine. Next step is to include more files, since many modules/components in library have some sort of template or stylesheets, for example this is one AngularJS module:
lib\
modules\
pager\
controller.jsx
directive.jsx
template.html
pager.styl
README.md
But when I expand the pattern to include other files this.pattern = /.*/;, I run into all sorts of issues (; Most have to do with pipline - those are the kinds of errors I'm getting. For example:
jshint-brunch doesn't like README.md
html-brunch won't wrap template.html
stylus-brunch and sass-brunch are also unhappy
I've tried solving these problems individually, for example if I disable html-brunch config.plugins.off: ['html-brunch'], and add this code inside the compiler function, it kinda works:
if( params.path.match(/.html$/) ) {
params.data = "module.exports = function() { return " + JSON.stringify(params.data) + ";};";
return callback(null, this.config.modules.wrapper(params.path, params.data));
}
..but I couldn't resolve all the issues. Pretty much all problems have to do with this line in the compiler function: return callback(null, null);. When I 'reject' a file next plugin gets something undefined and breaks...
Any ideas how to solve this?
I'd like to eventually expand plugin's functionality to handle static assets too, for example copy lib\images\placeholder-1.jpg (but not placeholder-2.jpg) from library if it's used in html files, but I'm stuck at this point...
Here's the code of the plugin:
var CodeLibrary;
module.exports = CodeLibrary = (function() {
var required = [];
CodeLibrary.prototype.brunchPlugin = true;
function CodeLibrary(config) {
this.config = config;
this.pattern = /.*/;
this.watched = this.config.paths.watched.filter(function(path) {
return !path.match( config.plugins.library.pattern );
});
}
function is_required(path) {
var name = this.config.modules.nameCleaner(path);
return required.some(function(e, i, a) { return name.match(e); });
}
function in_library(path) {
return Boolean(path.match( this.config.plugins.library.pattern ));
}
function is_watched(path) {
return this.watched.some(function(e, i, a) { return path.match( e ); });
}
CodeLibrary.prototype.lint = function(data, path, callback) {
if( !is_watched.apply(this, [path]) &&
!is_required.apply(this, [path]) )
return callback();
var es6_pattern = /import .*'(.*)'/gm;
var commonjs_pattern = /require\('(.*)'\)/gm;
var match = es6_pattern.exec(data) || commonjs_pattern.exec(data);
while( match != null ) {
if( required.indexOf(match[1]) === -1 )
required.push( match[1] );
match = es6_pattern.exec(data) || commonjs_pattern.exec(data);
}
callback();
}
CodeLibrary.prototype.compile = function(params, callback) {
if( is_required.apply(this, [params.path]) ||
!in_library.apply(this, [params.path]) )
return callback(null, params);
return callback(null, null);
};
return CodeLibrary;
})();

Smart CSS compressor that moves #imports to top

I've written a little script that gathers up all my CSS files and compresses them:
var cssSources = [];
bundle.input.stylesheets.forEach(function(filename) {
if(isLocalPath(filename)) {
var fileContents = FileSystem.readFileSync(filename, {encoding: 'utf8'});
if(/\.css$/.test(filename)) {
cssSources.push(fileContents);
} else if(/\.less$/.test(filename)) {
var parser = new Less.Parser({
paths: [staticDir],
filename: filename
});
parser.parse(fileContents, function(e, tree) {
cssSources.push(tree.toCSS());
});
}
}
});
var cssCode = UglifyCSS.processString(cssSources.join(''));
Which was working well until I tried #importing a font in a CSS file that was not included first. This means that the #import wound up somewhere in the middle of my compressed CSS, which of course doesn't work.
I'm currently using UglifyCSS as you may have noticed; is there a better alternative that will move all the #imports to the top? Preferably available as an npm package.
Update: clean-css has the same problem.

Meteor/Node writeFile crashes server

I have the following code:
Meteor.methods({
saveFile: function(blob, name, path, encoding) {
var path = cleanPath(path), fs = __meteor_bootstrap__.require('fs'),
name = cleanName(name || 'file'), encoding = encoding || 'binary',
chroot = Meteor.chroot || 'public';
// Clean up the path. Remove any initial and final '/' -we prefix them-,
// any sort of attempt to go to the parent directory '..' and any empty directories in
// between '/////' - which may happen after removing '..'
path = chroot + (path ? '/' + path + '/' : '/');
// TODO Add file existance checks, etc...
fs.writeFile(path + name, blob, encoding, function(err) {
if (err) {
throw (new Meteor.Error(500, 'Failed to save file.', err));
} else {
console.log('The file ' + name + ' (' + encoding + ') was saved to ' + path);
}
});
function cleanPath(str) {
if (str) {
return str.replace(/\.\./g,'').replace(/\/+/g,'').
replace(/^\/+/,'').replace(/\/+$/,'');
}
}
function cleanName(str) {
return str.replace(/\.\./g,'').replace(/\//g,'');
}
}
});
Which I took from this project
https://gist.github.com/dariocravero/3922137
The code works fine, and it saves the file, however it repeats the call several time and each time it causes meteor to reset using windows version 0.5.4. The F12 console ends up looking like this: . The meteor console loops over the startup code each time the 503 happens and repeats the console logs in the saveFile function.
Furthermore in the target directory the image thumbnail keeps displaying and then display as broken, then a valid thumbnail again, as if the fs is writing it multiple times.
Here is the code that calls the function:
"click .savePhoto":function(e, template){
e.preventDefault();
var MAX_WIDTH = 400;
var MAX_HEIGHT = 300;
var id = e.srcElement.id;
var item = Session.get("employeeItem");
var file = template.find('input[name='+id+']').files[0];
// $(template).append("Loading...");
var dataURL = '/.bgimages/'+file.name;
Meteor.saveFile(file, file.name, "/.bgimages/", function(){
if(id=="goodPhoto"){
EmployeeCollection.update(item._id, { $set: { good_photo: dataURL }});
}else{
EmployeeCollection.update(item._id, { $set: { bad_photo: dataURL }});
}
// Update an image on the page with the data
$(template.find('img.'+id)).delay(1000).attr('src', dataURL);
});
},
What's causing the server to reset?
My guess would be that since Meteor has a built-in "automatic directories scanning in search for file changes", in order to implement auto relaunching of the application to newest code-base, the file you are creating is actually causing the server reset.
Meteor doesn't scan directories beginning with a dot (so called "hidden" directories) such as .git for example, so you could use this behaviour to your advantage by setting the path of your files to a .directory of your own.
You should also consider using writeFileSync insofar as Meteor methods are intended to run synchronously (inside node fibers) contrary to the usual node way of asynchronous calls, in this code it's no big deal but for example you couldn't use any Meteor mechanics inside the writeFile callback.
asynchronousCall(function(error,result){
if(error){
// handle error
}
else{
// do something with result
Collection.update(id,result);// error ! Meteor code must run inside fiber
}
});
var result=synchronousCall();
Collection.update(id,result);// good to go !
Of course there is a way to turn any asynchronous call inside a synchronous one using fibers/future, but that's beyond the point of this question : I recommend reading this EventedMind episode on node future to understand this specific area.

Resources