---恢复内容开始---html
因为工做须要,须要作一个小程序,实现网站监控,当浏览器浏览到目标网站的时候,实现爬取数据。因为https存在证书验证,须要别的方式来,暂时还没研究,但必须能正常访问。正则表达式
官方的Demo,我在这个基础上进行了修改。http://fiddler.wikidot.com/fiddlercore-demo小程序
这边我来介绍下个人Demo。浏览器
首先要去下载FiddlerCore4.Dll,百度上不少。session
先上代码:工具
主界面代码网站
using System; using System.Collections.Generic; using System.IO; using System.Reflection; using System.Threading; using Fiddler; using System.Net; using System.Text.RegularExpressions; using System.Text; using System.Diagnostics; using System.Runtime.InteropServices; namespace FiddlerDemo { class Program { public static Proxy oProxy; //static string sSecureEndpointHostname = "cd.chnai.com"; //static int iSecureEndpointPort = 7777; #region 控制台异常关闭时,程序不影响电脑 public delegate bool ControlCtrlDelegate(int CtrlType); [DllImport("kernel32.dll")] private static extern bool SetConsoleCtrlHandler(ControlCtrlDelegate HandlerRoutine, bool Add); private static ControlCtrlDelegate cancelHandler = new ControlCtrlDelegate(HandlerRoutine); public static bool HandlerRoutine(int CtrlType) { switch (CtrlType) { case 0: DoQuit(); //Ctrl+C关闭 break; case 2: //Console.WriteLine("2工具被强制关闭");//按控制台关闭按钮关闭 DoQuit(); break; } return false; } #endregion static void Main(string[] args) { SetConsoleCtrlHandler(cancelHandler, true); FiddlerHelp fh = new FiddlerHelp(); fh.StartSession(); } static void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } public static void DoQuit() { WriteCommandResponse("Shutting down..."); if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } } }
处理类:ui
using Fiddler; using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Threading.Tasks; using System.Configuration; using System.Xml; namespace FiddlerDemo { public class HtmlConfig { public string sWeb { get; set; } public string sRegex { get; set; } } public class FiddlerHelp { public static Proxy oProxy; public static List<HtmlConfig> dicHtml = new List<HtmlConfig> { }; public void StartSession() { LoadHtmlConfig(); //设置别名 Fiddler.FiddlerApplication.SetAppDisplayName("FiddlerCoreDemoApp"); List<Fiddler.Session> oAllSessions = new List<Fiddler.Session>(); Fiddler.FiddlerApplication.OnNotification += delegate(object sender, NotificationEventArgs oNEA) { Console.WriteLine("** NotifyUser: " + oNEA.NotifyString); }; Fiddler.FiddlerApplication.BeforeRequest += delegate(Fiddler.Session oS) { // 为了使反应篡改,必须使用缓冲模式 // 被启用。这容许FiddlerCore以容许修改 // 在BeforeResponse处理程序中的反应,而不是流 // 响应给客户机做为响应进来。 oS.bBufferResponse = true; Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); }; // 下面的事件,您能够检查由Fiddler阅读每一响应缓冲区。 // 请注意,这不是为绝大多数应用很是有用,由于原始缓冲区几乎是无用的;它没有解压,它包括标题和正文字节数等。 // 本次仅适用于极少数的应用程序这就须要一个原始的,未经处理的字节流获取有用 //Fiddler.FiddlerApplication.OnReadResponseBuffer += new EventHandler<RawReadEventArgs>(FiddlerApplication_OnReadResponseBuffer); Fiddler.FiddlerApplication.BeforeResponse += delegate(Fiddler.Session oS) { //HTTP响应,并随后修改任何HTTP响应,以取代 oS.utilDecodeResponse(); foreach (var item in dicHtml) {
if (oS.fullUrl.Contains(item.sWeb))
{ Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); string sHtmlBody = oS.GetResponseBodyAsString(); if(!string.IsNullOrEmpty(sHtmlBody)) { Console.Write("获取的内容为:"+MatchRegex(sHtmlBody,item.sRegex) + "\n"); } } } Monitor.Enter(oAllSessions); oAllSessions.Add(oS); Monitor.Exit(oAllSessions); // 内容:{3} , oS.GetResponseBodyEncoding().GetString(oS.responseBodyBytes) //Console.WriteLine("{0}:HTTP {1} for {2}", oS.id, oS.responseCode, oS.fullUrl); }; Fiddler.FiddlerApplication.AfterSessionComplete += delegate(Fiddler.Session oS) { Console.Title = ("Session list contains: " + oAllSessions.Count.ToString() + " sessions");
//数量大于1000条时进行清空
if (oAllSessions.Count > 1000)
{
Monitor.Enter(oAllSessions);
oAllSessions.Clear();
Monitor.Exit(oAllSessions);
} }; Console.CancelKeyPress += new ConsoleCancelEventHandler(Console_CancelKeyPress); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.abortifclientaborts", true); //启动方式 //FiddlerCoreStartupFlags oFCSF = FiddlerCoreStartupFlags.Default; Fiddler.CONFIG.IgnoreServerCertErrors = false; int iPort = 8877; //Fiddler.FiddlerApplication.Startup(iPort, oFCSF); Fiddler.FiddlerApplication.Startup(iPort, true, false, true); bool bDone = false; #region 各类操做 do { Console.WriteLine("\nEnter a command [C=Clear; L=List; G=Collect Garbage; W=write SAZ; R=read SAZ;\n\tS=Toggle Forgetful Streaming; T=Trust Root Certificate; Q=Quit]:>"); ConsoleKeyInfo cki = Console.ReadKey(); Console.WriteLine(); switch (Char.ToLower(cki.KeyChar)) { case 'c': Monitor.Enter(oAllSessions); oAllSessions.Clear(); Monitor.Exit(oAllSessions); WriteCommandResponse("Clear..."); FiddlerApplication.Log.LogString("Cleared session list."); break; case 'd': FiddlerApplication.Log.LogString("FiddlerApplication::Shutdown."); FiddlerApplication.Shutdown(); break; //case 'l': // WriteSessionList(oAllSessions); // break; case 'g': Console.WriteLine("Working Set:\t" + Environment.WorkingSet.ToString("n0")); Console.WriteLine("Begin GC..."); GC.Collect(); Console.WriteLine("GC Done.\nWorking Set:\t" + Environment.WorkingSet.ToString("n0")); break; case 'q': bDone = true; DoQuit(); break; case 'r': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 'w': WriteCommandResponse("This demo was compiled without SAZ_SUPPORT defined"); break; case 't': try { WriteCommandResponse("Result: " + Fiddler.CertMaker.trustRootCert().ToString()); } catch (Exception eX) { WriteCommandResponse("Failed: " + eX.ToString()); } break; // Forgetful streaming case 's': bool bForgetful = !FiddlerApplication.Prefs.GetBoolPref("fiddler.network.streaming.ForgetStreamedData", false); FiddlerApplication.Prefs.SetBoolPref("fiddler.network.streaming.ForgetStreamedData", bForgetful); Console.WriteLine(bForgetful ? "FiddlerCore will immediately dump streaming response data." : "FiddlerCore will keep a copy of streamed response data."); break; } } while (!bDone); #endregion } /// <summary> /// 经过读取xml内的配置来获取监听的网站和获取数据的正则表达式 /// </summary> private void LoadHtmlConfig() { try { XmlDocument xmlDoc = new XmlDocument(); string sPath = string.Empty;
if (File.Exists(@"..\..\WatchHtml.xml"))
{
//调试目录
sPath = @"..\..\WatchHtml.xml";
}
else
{
//编译目录下
sPath = @"WatchHtml.xml";
} XmlNode xn = xmlDoc.SelectSingleNode("Root"); XmlNodeList xnl = xn.ChildNodes; foreach (XmlNode item in xnl) { XmlElement xe = (XmlElement)item; HtmlConfig htmlConfig = new HtmlConfig { sWeb = xe.GetAttribute("Web").ToString(), sRegex = xe.GetAttribute("Regex").ToString() }; dicHtml.Add(htmlConfig); } } catch (Exception) { throw; } } public void Console_CancelKeyPress(object sender, ConsoleCancelEventArgs e) { DoQuit(); } /// <summary> /// 退出 /// </summary> public void DoQuit() { if (null != oProxy) oProxy.Dispose(); Fiddler.FiddlerApplication.Shutdown(); Thread.Sleep(500); } public static void WriteCommandResponse(string s) { ConsoleColor oldColor = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(s); Console.ForegroundColor = oldColor; } /// <summary> /// /// </summary> /// <param name="sHtml">得到的Html页面</param> /// <param name="sRegex">正则表达式</param> /// <returns></returns> public static string MatchRegex(string sHtml,string sRegex) { string sResult = string.Empty; try { if (string.IsNullOrEmpty(sHtml)) return null; var result = Regex.Match(sHtml.Replace('\r', ' ').Replace('\n', ' ').Trim(), sRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (result.Success) { sResult = result.Value; } return sResult; } catch (Exception) { return null; } } } }
XML文件内容spa
实现效果.net
---恢复内容结束---