网页抓取又称之为网络爬虫,咱们用来获取目标网站、目标模块的最新动态,能够直接抓取过来。 咱们提供抓取目标的url地址,而后写一个方法,抓取目标url的东西。c#
<!-- lang: c# --> //获取某个URL下的内容 public string getContextByUrl(string url) { WebRequest request = WebRequest.Create(url); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream dataStream = response.GetResponseStream(); StreamReader reader = new StreamReader(dataStream,Encoding.Default); string result = reader.ReadToEnd(); reader.Close(); dataStream.Flush(); dataStream.Close(); response.Close(); return result; }
而后咱们调用此方法网络
<!-- lang: c# --> string result = getContextByUrl(url); string name = result.Substring(result.IndexOf("tb-detail-hd"), 450); string aa = name.Substring(name.IndexOf("<h3>"), 50); string price = null; if (url.Contains("detail")) { price = result.Substring(result.IndexOf("<strong>"), 50); } else if (url.Contains("item")) { price = result.Substring(result.IndexOf("<strong class>"), 50); } Label1.Text = aa; Label2.Text = price;