首页 > 学院 > 开发设计 > 正文

支持Cookie并开放了一些特殊设置项的HttpWebClient

2019-11-14 13:52:05
字体:
来源:转载
供稿:网友
  1 using System;  2 using System.Collections.Generic;  3 using System.Linq;  4 using System.Text;  5 using System.Net;  6 using System.IO;  7 using System.Collections.Specialized;  8 using System.Web;  9  10 namespace Common.Helpers 11 { 12     /// <summary> 13     /// 网络访问辅助类 14     /// </summary> 15     public class HttpWebClient : WebClient 16     { 17         #region 公共属性 18         /// <summary> 19         /// 浏览器用户标识,默认采用Chrome的标识 20         /// </summary> 21         public string UserAgent { get; set; } 22         /// <summary> 23         /// Cookie容器 24         /// </summary> 25         public CookieContainer CookieContainer { get; set; } 26         /// <summary> 27         /// 如果 POST 请求需要 100-Continue 响应,则为 true;否则为 false。 28         /// </summary> 29         public bool Expect100Continue { get; set; } 30  31         PRivate WebResponse m_LastWebResponse = null; 32         /// <summary> 33         /// 最后一次的响应对象 34         /// </summary> 35         public WebResponse LastWebResponse { get { return this.m_LastWebResponse; } } 36  37         private int m_Timeout = 120000; 38         /// <summary> 39         /// 超时时间,默认120000毫秒(120秒) 40         /// </summary> 41         public int Timeout 42         { 43             get { return m_Timeout; } 44             set { m_Timeout = value; } 45         } 46  47         private HttpWebClientSetting m_HttpWebClientSetting = null; 48         /// <summary> 49         /// WebClient设置项,该属性始终不会为null 50         /// </summary> 51         public HttpWebClientSetting HttpWebClientSetting 52         { 53             get 54             { 55                 if (m_HttpWebClientSetting == null) 56                 { 57                     m_HttpWebClientSetting = new HttpWebClientSetting(); 58                 } 59                 return m_HttpWebClientSetting; 60             } 61             set 62             { 63                 m_HttpWebClientSetting = value ?? new HttpWebClientSetting(); 64             } 65         } 66          67  68         /// <summary> 69         /// 预处理Web请求对象的委托方法(会在每次获取WebRequest对象后调用),默认值为null 70         /// </summary> 71         public Action<HttpWebRequest> PrepareProcessWebRequest { get; set; } 72         #endregion 73  74         #region 构造方法 75         public HttpWebClient() 76             : this(new CookieContainer()) 77         { 78         } 79  80         public HttpWebClient(CookieContainer cookieContainer) 81         { 82             this.CookieContainer = cookieContainer; 83             this.UserAgent = UserAgentValues.Firefox; 84             this.Expect100Continue = false; 85         } 86         #endregion 87  88         #region 重写方法,增加对CookieContainer的支持 89         protected override WebRequest GetWebRequest(Uri address) 90         { 91             if (!string.IsNullOrEmpty(this.UserAgent)) 92             { 93                 this.Headers.Add(HttpRequestHeader.UserAgent, this.UserAgent); 94             } 95  96             WebRequest request = base.GetWebRequest(address); 97             request.Timeout = this.Timeout; 98              99             if (request is HttpWebRequest)100             {101                 HttpWebRequest httpRequest = request as HttpWebRequest;102                 httpRequest.CookieContainer = this.CookieContainer;103                 httpRequest.ServicePoint.Expect100Continue = this.Expect100Continue; // 取消100-continue104 105                 //读取自定义设置项106                 if (this.HttpWebClientSetting != null)107                 {108                     httpRequest.AllowAutoRedirect = this.HttpWebClientSetting.AllowAutoRedirect;109                 }110 111                 //使用外部委托属性处理Request对象112                 if (this.PrepareProcessWebRequest != null)113                 {114                     this.PrepareProcessWebRequest(httpRequest);115                 }116             }117             118             return request;119         }120         #endregion121 122         #region 重写方法,增加对响应对象的访问123         protected override WebResponse GetWebResponse(WebRequest request)124         {125             WebResponse response = base.GetWebResponse(request);126             this.m_LastWebResponse = response;127             return response;128         }129         #endregion130 131         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData132         /// <summary>133         /// 向一个URL用POST提交数据,并返回其响应内容134         /// ZhangQingFeng    2014-12-14    Add135         ///    EditLog:136         ///        ZhangQingFeng    2015-05-12    Edit        因WebClient的UpdateValues方法中固定为UTF-8格式进行UrlEncode,因此此处需用UploadString方式来间接实现    --见微软WebClient类源码UploadValuesInternal方法中137         /// </summary>138         /// <param name="url">请求的URL</param>139         /// <param name="data">要提交的数据</param>140         /// <param name="encoding">请求所使用的编码</param>141         /// <param name="responseEncoding">响应内容所使用的编码,为null时使用请求的编码</param>142         /// <returns>响应的内容</returns>143         public string PostData(string url, NameValueCollection data, Encoding encoding, Encoding responseEncoding)144         {145             WebClient client = this;146 147             /*148             client.Encoding = encoding ?? Encoding.UTF8;149 150             byte[] response = client.UploadValues(url, "POST", data ?? new NameValueCollection());151 152             string html = string.Empty;153 154             if (responseEncoding == null)155             {156                 html = client.Encoding.GetString(response);157             }158             else159             {160                 html = responseEncoding.GetString(response);161             }162              */163 164             client.Encoding = encoding ?? Encoding.UTF8;165             client.Headers.Add(HttpRequestHeader.ContentType, "application/x-www-form-urlencoded");166 167             string delimiter = String.Empty;168             StringBuilder values = new StringBuilder();169             foreach (string name in data.AllKeys)170             {171                 values.Append(delimiter);172                 values.Append(HttpUtility.UrlEncode(name, encoding));173                 values.Append("=");174                 values.Append(HttpUtility.UrlEncode(data[name], encoding));175                 delimiter = "&";176             }177 178             byte[] arrData = client.UploadData(url, "POST", Encoding.ASCII.GetBytes(values.ToString()));179             string html = (responseEncoding ?? client.Encoding).GetString(arrData);180 181             return html;182         }183 184         /// <summary>185         /// 向一个URL用POST提交数据,并返回其响应内容186         /// ZhangQingFeng    2014-12-14    Add187         /// </summary>188         /// <param name="url">请求的URL</param>189         /// <param name="data">要提交的数据</param>190         /// <param name="encoding">请求和响应所使用的编码</param>191         /// <returns>响应的内容</returns>192         public string PostData(string url, NameValueCollection data, Encoding encoding)193         {194             return PostData(url, data, encoding, null);195         }196 197         /// <summary>198         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)199         /// ZhangQingFeng    2014-12-14    Add200         /// </summary>201         /// <param name="url">请求的URL</param>202         /// <param name="data">要提交的数据</param>203         /// <returns>响应的内容</returns>204         public string PostData(string url, NameValueCollection data)205         {206             return PostData(url, data, this.Encoding);207         }208         #endregion209 210         #region (public) 向一个URL用POST提交数据,并返回其响应内容 PostData211         /// <summary>212         /// 向一个URL用POST提交数据,并返回其响应内容213         /// ZhangQingFeng    2014-12-14    Add214         /// </summary>215         /// <param name="url">请求的URL</param>216         /// <param name="data">要提交的数据</param>217         /// <param name="encoding">请求和响应内容所使用的编码</param>218         /// <returns>响应的内容</returns>219         public string PostData(string url, Dictionary<string, string> data, Encoding encoding, Encoding responseEncoding)220         {221             NameValueCollection postData = new NameValueCollection();222             if (data != null)223             {224                 foreach (var item in data)225                 {226                     postData.Add(item.Key, item.Value);227                 }228             }229             return PostData(url, postData, encoding, responseEncoding);230         }231 232 233         /// <summary>234         /// 向一个URL用POST提交数据,并返回其响应内容235         /// ZhangQingFeng    2014-12-14    Add236         /// </summary>237         /// <param name="url">请求的URL</param>238         /// <param name="data">要提交的数据</param>239         /// <param name="encoding">请求和响应所使用的编码</param>240         /// <returns>响应的内容</returns>241         public string PostData(string url, Dictionary<string, string> data, Encoding encoding)242         {243             return PostData(url, data, encoding, null);244         }245 246         /// <summary>247         /// 向一个URL用POST提交数据,并返回其响应内容(使用this.Encoding来作请求编码和响应编码)248         /// ZhangQingFeng    2014-12-14    Add249         /// </summary>250         /// <param name="url">请求的URL</param>251         /// <param name="data">要提交的数据</param>252         /// <returns>响应的内容</returns>253         public string PostData(string url, Dictionary<string, string> data)254         {255             return PostData(url, data, this.Encoding);256         }257         #endregion258 259         #region 辅助类260         /// <summary>261         /// 浏览器用户标识类262         /// </summary>263         public class UserAgentValues264         {265             public static readonly string FireFox = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0";266             public static readonly string Chrome = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36";267             public static readonly string IE8 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2;)";268         }269         #endregion270     }271 272     /// <summary>273     /// HttpWebClient对象设置类274     /// </summary>275     public class HttpWebClientSetting276     {277         private bool m_AllowAutoRedirect = true;278         /// <summary>279         /// 当响应内容为重定向时客户端是否自动重定向(如果该属性为true,则取到的响应则为重定向后的内容,否则则为响应原文),默认值为true280         /// </summary>281         public bool AllowAutoRedirect282         {283             get { return m_AllowAutoRedirect; }284             set { m_AllowAutoRedirect = value; }285         }286     }287 }
HttpWebClient

在做页面抓取的过程中,发现自带的WebClient不够灵活,因此做了一个实现。

 

关于在PostData方法中不使用UploadValues()方法的原因:

1.查看微软的源代码实现时发现,无论设置请求时的Encoding是否为GB2312,在使用WebClient的UploadValues()上传内容时,其内在都是使用UTF-8编码进行UrlEncode,因此传到服务端中的数据中若包含有中文时则一定会乱码,因此重写PostData以规避此问题。

 

关于HttpWebClientSetting中的AllowAutoRedirect属性:

在WebClient发起请求时,若响应内容为重定向,则WebClient会自动做重定向,因此该类提供此设置项以控制在访问时是否自动做重定向(第二次访问Refer后的网站时会将请求中的Refer头置空,将该AllowAutoRedirect设置为false,然后手动从Response.Header中取出Location对象地址,设置Refer后再访问,则可真实模拟浏览器访问,从而避开一些网站的防抓取设置)

 

关于HttpWebClient中的LastWebResponse属性:

当存在多次重定向时,系统记录了最后一次返回的内容,从此内容的Header中取出ResponseUri,则可以取到最后返回响应的页面真实地址,从而为下一次的设置请求Refer头作准备。

 

大约就是如此,后期如有Bug会继续更新。


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表