I am new in Cheerio js just want to iterate a specific li from the website li looks like the following
<li class="webcam">
<a href="/en/webcam/italia/lazio/roma/roma-colosseo.html">
<span class="inner-wrapper">
<span class="img-wrapper"><span class="label label-info lb_sm" style="position:absolute;">World
Wonder</span>
<img src="https://static.skylinewebcams.com/live1151.jpg"
data-original="https://static.skylinewebcams.com/live1151.jpg" alt="Italy - Rome - Colosseum"
class="lazy" style="display: inline;" width="318">
</span>
<span class="title">Italy - Rome - Colosseum</span>
<span class="description">Rome, view of the Colosseum and the ruins of the gladiator gymnasium</span>
</span>
</a>
</li>
I want to get href from a tag, data-original from img tag and .title from span tag.
Here is what I tried so far but didn't get any success,
this is the example of finding only with specific tag,
I didn't know how to find all my required thing in one go using cheerio.
request(url, (err, body) => {
if (err) { console.log(err); return; }
$ = cheerio.load(body);
links = $('img[class=lazy]'); //jquery get all hyperlinks
$(links).each(function (i, link) {
console.log(i, link.attribs.alt);
console.log(i, link.attribs.data-original);
});
})
Any help will be appreciated thanks
You want to iterate the lis not the imgs:
let data = $('li.webcam').get().map(li => {
return {
href: $(li) .find('a').attr('href'),
'data-original': $(li).find('img').attr('data-original'),
title: $(li).find('span.title').text()
}
})
Related
How do I using Cheerio get the number in a li tag that is wrapped by UL and class tags.
I have this html snippet of the dynamic content:
<ul class="numbers">
<li class="ball winNum1">21</li>
<li class="ball winNum2">6</li>
<li class="ball winNum3">32</li>
<li class="ball winNum4">14</li>
</ul>
let $ = cheerio.load(body);
let msg = [];
$('li.ball').each( (i, elm)=>{
msg[i] = $(elm).toString().trim();
});
console.log(msg);
I expect to print ['21', '6', '32', '14'] instead I get this:
['<li class="ball winNum1"></li>',
'<li class="ball winNum2"></li>',
'<li class="ball winNum3"></li>',
'<li class="ball winNum4"></li>']
and there are no numbers 21, 6, 32 or 14 in the li tag.
I'am answering my own question after 2 days of struggle
Looks like for dynamic content cheerio may alone will not be sufficient, by using puppeteer and cheerio in combination, able to get the result I was looking for.
npm install puppeteer cheerio --save
Here is my working code after using puppeteer and cheerio
puppeteer
.launch()
.then(function(browser) {
return browser.newPage();
})
.then(function(page) {
return page.goto(url).then(function() {
return page.content();
});
})
.then(function(html) {
$('li.ball', html).each(function() {
console.log($(this).text());
});
})
.catch(function(err) {
//handle error
});
I found "The Ultimate Guide to Web Scraping with Node.js" article very helpful to achieve my result:
https://www.freecodecamp.org/news/the-ultimate-guide-to-web-scraping-with-node-js-daa2027dcd3/
I have project where we need to create a live search with knockout, map with some titles, when the title enter in search field other titles should disappear, I try to use different code but non of them work, and there is no error to help me figure out.
my code for html :
Filter:
<ul data-bind="foreach: locations">
<li data-bind="text: title"></li>
</ul>
</div>
And for js:
function ViewModel(){
var self =this;
this.filter = ko.observable();
this.locations = ko.observableArray([{ title:'Safa Bridge'},{ title:'Holy Mosque'},{ title:'Diamond Tower'},{ title:'Albaik Resturant'},{ title:'Zamzam'}]);
this.visibleLocations = ko.computed(function(){
return this.locations().filter(function(location){
if(!self.filter() || location.title.toLowerCase().indexOf(self.filter().toLowerCase()) !== -1)
return location;
});
},this);
}
ko.applyBindings(new ViewModel());
I definitely know that something is wrong with this snippet but can't figure out the right way to get it done. I want to paginate the page where students are displayed. If I put it plainly like this return SchoolStudents.find();, it works perfectly by returning all the students but this defeats the main purpose of pagination. I'm either not sure where the problem is, either in the publish function or the helper function. What I want to achieve is that the records in SchoolStudents colleciton should be paginated to display 2 records on a page.
This is the autorun
Session.setDefault('skip', 0);
Template.view.onCreated(function () {
Session.setPersistent('ReceivedSlug', FlowRouter.getParam('myslug'));
this.autorun(function () {
Meteor.subscribe('SchoolStudents', Session.get('skip'));
});
});
this is the helper method
students(){
let myslug = trimInput(Session.get('ReceivedSlug'));
if (myslug) {
let mySchoolDoc = SchoolDb.findOne({slug: myslug});
if (mySchoolDoc) {
let arrayModuleSchool = StudentSchool.find({schoolId: mySchoolDoc._id});
if (arrayModuleSchool) {
var arrayStudentIds = [];
arrayModuleSchool.forEach(function(studentSchool){
arrayStudentIds.push(studentSchool.studentId);
});
let subReadiness = SchoolStudents.find({_id: {$in: arrayStudentIds}}).fetch();
if (subReadiness) {
return subReadiness;
}
}
}
}
}
This is the publish method
Meteor.publish('SchoolStudents', function (skipCount) {
check(skipCount, Number);
user = Meteor.users.findOne({_id:this.userId})
if(user) {
if(user.emails[0].verified) {
return SchoolStudents.find({userId: this.userId}, {limit: 2, skip: skipCount});
} else {
throw new Meteor.Error('Not authorized');
return false;
}
}
});
Blaze template
<section class="tab-section" id="content4">
{{#each student in students}}
<div class="row" style="margin-top: -20px;">
<!-- Begin Listing: 609 W GRAVERS LN-->
<div class="brdr bgc-fff pad-10 box-shad btm-mrg-20 property-listing card-1">
<div class="media">
<div class="media-body fnt-smaller">
<h4 class="media-heading">{{student.firstname}} {{student.lastname}}</h4>
<p class="hidden-xs" style="margin-bottom: 5px; margin-top: -10px;">{{trimString student.useremail 0 110}}</p><span class="fnt-smaller fnt-lighter fnt-arial">{{course.createdAt}}</span>
</div>
</div>
</div><!-- End Listing-->
</div>
{{/each}}
<ul class="pager">
<li class="studentprevious">Previous </li>
<li class="studentnext">Next </li>
</ul>
</section>
the pagination event
'click .studentprevious': function () {
if (Session.get('skip') > 0 ) {
Session.set('skip', Session.get('skip') - 2 );
}
},
'click .studentnext': function () {
Session.set('skip', Session.get('skip') + 2 );
}
I have a list like
<ul>
<li>Name1</li>
<li>Name2 </li>
</ul>
Using npm cheerio how do I add each list element text as lowercase id to the element itself?
So the outcome would be
<ul>
<li id="name1">Name1</li>
<li id="name2">Name2 </li>
</ul>
At the moment I'm adding a static id using
var cheerio = require('cheerio'),
$ = cheerio.load('<ul><li>Hello world</li></ul>');
$('li').attr('id', 'new-id')
console.log( $.html() )
Thanks
This should do the trick..
$('li').each( function(i, elem) {
$(this).attr('id', $(this).text().toLowerCase().replace(/\s/g, '') );
})
On a page I need to scrape (with node.js and cheerio), I have this pattern:
<h2>
<span id="2015"></span>
<span class="ignore-me"></span>
</h2>
<div>
<ol>
<li>
<a title="TITLE1" href="HREF1"></a>
<a class="image" title="ignore-me-1" href="ignore-me-1"></a>
</li>
...
<li>
<a title="TITLE2" href="HREF2"></a>
<a class="image" title="ignore-me-2" href="ignore-me-2"></a>
</li>
</ol>
</div>
I would like to extract a list with TITLEs an HREFs.
I am trying something like this:
$('h2 > span[id="2015"]').next('ol > li > a').each(function(index, element) {
console.log('title:', element.attr('title'), 'href:', element.attr('href'));
});
without success (each loop is never entered...).
Any suggestion?
The ol element isn't actually the next element of span#2015. The ol element is inside a div which is the next element of h2. The right tree traversal is :
$('h2 > span[id="2015"]')
.parent()
.next('div')
.find('ol > li > a:not([class])')
.each(function() {
var $el = $(this);
console.log('title:', $el.attr('title'), 'href:', $el.attr('href'));
});
The h2 tag does not have an ID, thus your selector finds no results, nothing to loop over.
You could easily do it by looping anchor tags.
$("a").each(function(i, e) {
if (e.attr('title') && e.attr('href')) console.log("... stuff ...");
});
Or you can give your h2 an id, or remove the id from your selector. Many ways to loop.