1.python爬虫浏览器伪装
#导入urllib.request模块import urllib.request#设置请求头headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")#创建一个openeropener=urllib.request.build_opener()#将headers添加到opener中opener.addheaders=[headers]#将opener安装为全局urllib.request.install_opener(opener)#用urlopen打开网页data=urllib.request.urlopen(url).read().decode('utf-8','ignore')
2.设置代理
#定义代理ipproxy_addr="122.241.72.191:808"#设置代理proxy=urllib.request.ProxyHandle({'http':proxy_addr})#创建一个openeropener=urllib.request.build_opener(proxy,urllib.request.HTTPHandle)#将opener安装为全局urllib.request.install_opener(opener)#用urlopen打开网页data=urllib.request.urlopen(url).read().decode('utf-8','ignore')
3.同时设置用代理和模拟浏览器访问
#定义代理ipproxy_addr="122.241.72.191:808"#创建一个请求req=urllib.request.Request(url)#添加headersreq.add_header("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)#设置代理proxy=urllib.request.ProxyHandle("http":proxy_addr)#创建一个openeropener=urllib.request.build_opener(proxy,urllib.request.HTTPHandle)#将opener安装为全局urllib.request.install_opener(opener)#用urlopen打开网页data=urllib.request.urlopen(req).read().decode('utf-8','ignore')
4.在请求头中添加多个信息
import urllib.requestpage_headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0", "Host":"www.baidu.com", "Cookie":"xxxxxxxx" }req=urllib.request.Request(url,headers=page_headers)data=urllib.request.urlopen(req).read().decode('utf-8','ignore')
5.添加post请求参数
import urllib.requestimport urllib.parse#设置post参数page_data=urllib.parse.urlencode([ ('pn',page_num), ('kd',keywords) ])#设置headerspage_headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0', 'Connection':'keep-alive', 'Host':'www.lagou.com', 'Origin':'https://www.lagou.com', 'Cookie':'JSESSIONID=ABAAABAABEEAAJA8F28C00A88DC4D771796BB5C6FFA2DDA; user_trace_token=20170715131136-d58c1f22f6434e9992fc0b35819a572b', 'Accept':'application/json, text/javascript, */*; q=0.01', 'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8', 'Referer':'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E6%8C%96%E6%8E%98?labelWords=&fromSearch=true&suginput=', 'X-Anit-Forge-Token':'None', 'X-Requested-With':'XMLHttpRequest' }#打开网页req=urllib.request.Request(url,headers=page_headers)data=urllib.request.urlopen(req,data=page_data.encode('utf-8')).read().decode('utf-8')
新闻热点
疑难解答