import com.jaunt.*; public class jauntcrawler{ public static void main(string[] args){ try{ useragent useragent = new useragent(); //create new useragent (headless browser) useragent.visit("http://google.de"); //visit google useragent.doc.apply("schmetterlinge"); //apply form input (starting @ first editable field) useragent.doc.submit(); //click submit button labelled "google search" elements links = useragent.doc.findevery("<h3 class=r>").findevery("<a>"); //find search result links for(element link : links) system.out.println(link.getat("href")); //print results if(useragent.doc.nextpagelinkexists()) { useragent.visit(useragent.doc.nextpagelink().gethref()); elements newlinks = useragent.doc.findevery("<h3 class=r>").findevery("<a>"); system.out.println("\npage 2:"); for(element link : newlinks) system.out.println(link.getat("href")); } } catch(jauntexception e){ //if http/connection error occurs, handle jauntexception. system.err.println(e); } } } i want return more search results google first page. second for-loop should return results of next page, doesn't. idea why?
i came across same problem. user agent not going next page found way achieve :
elements nextlinks = useragent.doc.findevery("<a class=fl"); for(int i=0;i<nextlinks.size();i++) { useragent.visit("http://google.co.in/search?q="+<search_string+"&start="+(i+1)*10); links = useragent.doc.findevery("<h3 class=r>").findevery("<a>"); for(element link : links) system.out.println(link.getat("href")); }
Comments
Post a Comment