jsoup获取文档类示例

 

 1 import java.io.IOException;  2 
 3 import org.jsoup.Jsoup;  4 import org.jsoup.nodes.Document;  5 import org.jsoup.nodes.Element;  6 import org.jsoup.select.Elements;  7 
 8 
 9 public class WyCrawler { 10     public static void main(String[] args) { 11         try { 12             Document document = Jsoup.connect("http://某网页").timeout(3000).get(); 13             String selector = "li>div[class=titleBar clearfix]>h3>a"; 14             Elements elements = document.select(selector); 15             for(Element element:elements){ 16 // System.out.println(element.text());
17                 String url = element.absUrl("href"); 18                 Document document2 = Jsoup.connect(url).get(); 19                 Elements elements2 = document2.select("#endText"); 20                 for(Element element2:elements2){ 21  System.out.println(element2.text()); 22  } 23  } 24         } catch (IOException e) { 25  e.printStackTrace(); 26  } 27  } 28 }

上面是如何爬取超连接里的内容,下面的比较简单java

 1 import java.io.IOException;  2 
 3 import org.jsoup.Jsoup;  4 import org.jsoup.nodes.Document;  5 import org.jsoup.nodes.Element;  6 import org.jsoup.select.Elements;  7 
 8 
 9 public class Test { 10     public static void main(String[] args) { 11         try { 12             Document document = Jsoup.connect("http://www.某网页.com/").get(); 13             //获取内容 14 // String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>div[class=item-content]"; 15 // Elements elements = document.select(selector); 16 // for(Element element:elements){ 17 // System.out.println(element.text()); 18 // } 19             
20             //获取标题 21 // String selector2 = "div[class=panel panel20 post-item post-box]>div[class=item-detail]>h2[class=item-title]"; 22 // Elements elements = document.select(selector2); 23 // for(Element element:elements){ 24 // System.out.println(element.text()); 25 // } 26             
27             //综合写法,标题内容一块儿获取
28             String selector = "div[class=panel panel20 post-item post-box]>div[class=item-detail]"; 29             Elements elements = document.select(selector); 30             for(Element element:elements){ 31                 Elements titles = element.select("div[class=item-title]"); 32                 Elements content = element.select("h2[class=item-content]"); 33                 System.out.println(titles.text()+"\n"+content.text()); 34  } 35             
36             
37             
38         } catch (IOException e) { 39  e.printStackTrace(); 40  } 41  } 42 }
相关文章
相关标签/搜索