I use node.js and I want to get this http://myanimelist.net/includes/ajax.inc.php?t=64&id=1
page and fetch some data I need. I wasn't able to make it with cheerio, because I've never encounter with such a kind of pages before. I'll be glad if someone tell me how to parse such pages and which node module use for it since I wasn't able to figure out it with google, however I understand that it should be easy and I'm just asking silly question.
Here, is simple extracted output from html output via my code.
{
"description": "In the year 2071, humanity has colonized several of the planets and moons of the solar system leaving the now uninhabitable surface of planet Earth behind. The Inter Solar System Police attempts to ke...",
"genres": "Action, Adventure, Comedy, Drama, Sci-Fi, Space",
"status": "Finished Airing",
"type": "TV",
"episodes": "26",
"score": "8.83",
"ranked": "#22",
"popularity": "#31",
"members": "419,197"
}
Below is code which extracts the info from page and saves it in an object (key:value) pair (i.e., like the above);
var $body = $('body');
$('div').children().empty();
var description = $('div').text().trim();
var keys = $('body span').text().split(':');
keys.splice(-1, 1);
$body.children().empty();
var values = $body.text().trim().split('\n');
var result = {
description: description
};
for(var j = 0; j<keys.length; j++) {
result[(keys[j].toLowerCase().trim())] = (values[j].trim());
}
console.log('result', result);
To test the above code you need to open http://myanimelist.net/includes/ajax.inc.php?t=64&id=1 and paste the above script in Dev Tools inspector -> console. when you run the code it will throw result because jquery isn't found the page so manually add the jquery into your scripts via this link: https://stackoverflow.com/a/7474394/5228251
You need to use this ^code using cheerio
to parse the page.
Use request and cheerio npm modules;
Installing modules;
$ npm install request --save
$ npm install cheerio --save
Using this script;
var cheerio = require('cheerio'),
request = require('request');
function scrapePage(callback) {
var result = null;
var url = 'http://myanimelist.net/includes/ajax.inc.php?t=64&id=1';
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
// console.log(body) // Show the HTML for the Page URL.
var $ = cheerio.load('<body>' + body + '</body>');
var $body = $('body');
$('body div').children().empty();
var description = $('body div').text().trim();
var keys = $('body span').text().split(':');
keys.splice(-1, 1);
$body.children().empty();
var values = $body.text().trim().split('\n');
result = {
description: description
};
for(var j = 0; j<keys.length; j++) {
result[(keys[j].toLowerCase().trim())] = (values[j].trim());
}
}
callback(result);
});
}
usage:
scrapePage(function(result) {
console.log('result', result);
});
Hope this helps.
Collected from the Internet
Please contact [email protected] to delete if infringement.
Comments