javascript - Getting news description with Cheerio -
i'm trying text 'a' tag in every entry of webpage
https://hn.algolia.com/?query=apple&sort=bypopularity&prefix&page=0&daterange=all&type=story
i have parsed webpages i'm having issues one, here's code.
var cheerio = require('cheerio'); var request = require('request'); request({ method: 'get', url: 'https://hn.algolia.com/?query=apple&sort=bypopularity&prefix&page=0&daterange=all&type=story' }, function(err, response, body) { if (err) return console.error(err); // tell cherrio load html $ = cheerio.load(body); // list = []; // $('div[id="item-main"]').each(function(){ // var href = $(this).find('div > div').attr('h2'); // list.push(h2); // }); $('item-title-and-infos').each(function() { var href = $('h2', this).attr('href'); if (href.lastindexof('/') > 0) { console.log($('a', this).text()); } }); });
thanks.
the problem content loaded asyncronously, first empty page loaded , after content looking loaded.
just console.log body on request , see somethig like:
<!doctype html> <html ng-app='hnsearch'> <head ng-controller='headctrl'> ... links , meta ... </head> <body> <div id='main' ng-cloak role='main' ui-view> ! no content here ! loaded after </div> <script src="https://d3nb9u6x572n0.cloudfront.net/assets/application-70dfa2f5ecb75bc8dfaa8729257bcbf1.js"></script> </body> </html>
if inspect web google chrome, see link called after:
https://uj5wyc0l7x-dsn.algolia.net/1/indexes/item_production/query?x-algolia-api-key=8ece23f8eb07cd25d40262a1764599b1&x-algolia-application-id=uj5wyc0l7x&x-algolia-agent=algolia%20for%20angularjs%203.7.5
so found solution may hep you:
request.post({ url:'https://uj5wyc0l7x-dsn.algolia.net/1/indexes/item_production/query?x-algolia-api-key=8ece23f8eb07cd25d40262a1764599b1&x-algolia-application-id=uj5wyc0l7x&x-algolia-agent=algolia%20for%20angularjs%203.7.5', body:'{"params":"query=apple&hitsperpage=20&minwordsizefor1typo=5&minwordsizefor2typos=9&advancedsyntax=true&ignoreplurals=false&tagfilters=%5b%22story%22%5d&numericfilters=%5b%5d&page=0&querytype=prefixlast&typotolerance=true&restrictsearchableattributes=%5b%5d"}', gzip:true, headers:{ accept:'application/json', "accept-encoding":"gzip, deflate, br", "accept-language":"es-es,es;q=0.8", "cache-control":"no-cache", connection:"keep-alive", "content-length":258, "content-type":"application/x-www-form-urlencoded", host:"uj5wyc0l7x-dsn.algolia.net", origin:"https://hn.algolia.com", pragma:"no-cache", referer:"https://hn.algolia.com/?query=apple&sort=bypopularity&prefix&page=0&daterange=all&type=story", "user-agent":"mozilla/5.0 (windows nt 6.1; wow64) applewebkit/537.36 (khtml, gecko) chrome/55.0.2883.87 safari/537.36" } }, function (err,res,body) { console.log(body); });
now body huge json file data. hop helps you.
Comments
Post a Comment