i want flights table every city page befor have set search fields , hit button , befor have import captcha @ first page. web site designed .net. import captcha self want rest nodejs. i'm doing. first, import captcha , loaded page , looking @ firebug, set request header , request url , send using http.request method scrape page nodejs again. after viewstate of page , again usign firebug, set request header , "post data" , request url , send them using http.request scrape final page. information set using firebug, fixed; mean, urls, header options , post data. have change city name in post data. page empty table. if possible, should do? (sorry bad english :) ) url (in persian): http://sepehr.iranhrc.ir. in advance.
var http = require('follow-redirects').http; var querystring = require('querystring'); var cheerio = require('cheerio'); var datatoattach = { 'scriptmanager1': 'uplflightsearch|btnsubmit37756070715319', '__asyncpost': true, '__eventargument': '', '__eventtarget': '', '__lastfocus': '', '__viewstate': '', '__viewstategenerator': 'e4cf65f9', 'btnsubmit37756070715319': '?????', 'dplfrom': 'thr', 'dplreservationroutetype': 'roundtrip_fixeddate', 'dplto': '0', 'dplflightadults': '1', 'dplflightchilds': '0', 'dplflightinfants': '0', 'txtcountup': '00:26', 'txtdeparturedate': '1394/04/02', 'txtreturningdate': '1394/04/04' }; var flightssearchpageros = { hostname: 'sepehr.iranhrc.ir', path: '/systems/fa/reservation/flight_newreservation_search.aspx?qry=sbv7wbdq4b7yek1yv0opvmofqkdkbwh49wjk6uimgiw95zdjdgo0/sswjh8wjv1d', method: 'get', headers:{ 'user-agent': 'mozilla/5.0 (windows nt 6.1; wow64; rv:38.0) gecko/20100101 firefox/38.0', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'accept-language': 'en-us,en;q=0.5', 'connection': 'keep-alive', 'cache-control': 'no-cache', 'cookie': 'asp.net_sessionid=2iexj4pfxld4mdilfwttka2q;', 'content-type': 'text/html; charset=utf-8', 'host': 'sepehr.iranhrc.ir', 'referer': 'sepehr.iranhrc.ir' } }; var resultspageros = { hostname: 'sepehr.iranhrc.ir', path: '/systems/fa/reservation/flight_newreservation_search.aspx?action=display&rnd=2378726045210585', method: 'post', headers:{ 'user-agent': 'mozilla/5.0 (windows nt 6.1; wow64; rv:38.0) gecko/20100101 firefox/38.0', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'accept-language': 'en-us,en;q=0.5', 'cache-control': 'no-cache', 'connection': 'keep-alive', 'cookie': 'asp.net_sessionid=o1ipad335qahuaahc25ngalr;', 'content-length': '', 'content-type': 'application/x-www-form-urlencoded; charset=utf-8', 'referer': 'http://sepehr.iranhrc.ir/systems/fa/reservation/flight_newreservation_search.aspx', 'host': 'sepehr.iranhrc.ir', 'pragma': 'no-cache', 'x-microsoftajax': 'delta=true', 'x-requested-with': 'xmlhttprequest' } }; var flightssearchpage = http.request(flightssearchpageros, function(response{ var datastream = ''; var htmlcode = ''; var date = ''; response.on('data', function(chunk){ datastream += chunk; }); response.on('end', function(){ htmlcode = cheerio.load(datastream); seconddatatoattach.__viewstate = htmlcode("__viewstate").val(); resultspageros.headers['content-length'] = querystring.stringify(datatoattach).length; resultspagerequest(); }); }); flightssearchpage.on('error', function(e){console.log("error0: " + e.message);}); flightssearchpage.end(); function resultspagerequest(){ var changingcitiesboxresponse = http.request(resultspageros, function(response){ response.setencoding('utf8'); var datastream = ''; var htmlcode = ''; response.on('data', function(chunk){ datastream += chunk; }); response.on('end', function(){ htmlcode = cheerio.load(datastream); console.log(htmlcode.html()); }); console.log('status: ' + response.statuscode); }); changingcitiesboxresponse.on('error', function(e){console.log("error1: " + e.message);}); changingcitiesboxresponse.end(querystring.stringify(datatoattach)); } edit
one thing forgot mention that, done in php curl , looked @ code in curl part , done node , http.request. php returns correct answer mine not. , used phantom well.this code:
var url = "http://sepehr.iranhrc.ir/systems/fa/reservation/flight_newreservation_search.aspx?action=display&rnd=4565721642440773"; var settings = { operation: "post", encoding: "utf8", weak: false, headers: { 'user-agent': 'mozilla/5.0 (windows nt 6.1; wow64; rv:38.0) gecko/20100101 firefox/38.0', 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'accept-language': 'en-us,en;q=0.5', 'cache-control': 'no-cache', 'connection': 'keep-alive', 'cookie': 'asp.net_sessionid=2iexj4pfxld4mdilfwttka2q;', 'content-length': '', 'content-type': 'application/x-www-form-urlencoded; charset=utf-8', 'referer': 'http://sepehr.iranhrc.ir/systems/fa/reservation/flight_newreservation_search.aspx?qry=sbv7wbdq4b7yek1yv0opvmofqkdkbwh49wjk6uimgiw95zdjdgo0/sswjh8wjv1d', 'host': 'sepehr.iranhrc.ir', 'pragma': 'no-cache', 'x-microsoftajax': 'delta=true', 'x-requested-with': 'xmlhttprequest' }, data: querystring.stringify(seconddatatoattach) }; phantom.create(function (ph){ ph.createpage(function (page){ page.open(url2, settings, function(status){ console.log(status); page.evaluate(function(){ return document.body.innerhtml; }, function(result){ console.log('content ' + result); ph.exit(); }); }); }); }, {dnodeopts: {weak: false} }); but didn't answer. , yes, uses ajax send request. according firebug:
first request sending post data , second one, think redirect gives link result page(i used link no luck) , third result page flight details can't get. may used them wrong. how can use these 3 request flights table?
i think what's happening page want results loading them through separate ajax request, , regular request isn't going that. you'll need either figure out separate request , yourself, or scrape page phantomjs can execute javascript page.
first though, go page on browser regularly , watch network tab , locate request loads in data.
Comments
Post a Comment