首页 > 编程 > Python > 正文

python登录并爬取淘宝信息代码示例

2020-02-16 11:00:37
字体:
来源:转载
供稿:网友

本文主要分享关于python登录并爬取淘宝信息的相关代码,还是挺不错的,大家可以了解下。

#!/usr/bin/env python # -*- coding:utf-8 -*- from selenium import webdriver import time import datetime import traceback import logging import os from selenium.webdriver.common.action_chains import ActionChains import codecs #登录 def login(driver,site):  driver.get(site)  time.sleep(5)  try:   #点击请登录   driver.find_element_by_class_name("h").click()   time.sleep(5)   #输入账号和密码   driver.find_element_by_id("TPL_username_1").send_keys(u"yourusername")   time.sleep(5)   #print driver.find_element_by_id("TPL_username_1")   driver.find_element_by_id("TPL_password_1").send_keys(u"yourpsd")   time.sleep(5)    #点击登录   driver.find_element_by_id("J_SubmitStatic").click()   time.sleep(30)  except:   print u"failure" def crawlmarket(driver,filename,site):  #driver = webdriver.Firefox()  driver.get(site)  driver.maximize_window()  time.sleep(10)  driver.refresh()  time.sleep(10)  test = driver.find_elements_by_xpath("//a[@class='J_ItemLink']")  #是否获取到消息,若无则登录  if len(test)==0:   login(driver,site)  time.sleep(30)  resultstrall=""  resultstr=""  strinfo =""  for i in range(0,len(test),1):   if test[i].text != "" :    resultstr = test[i].text.strip()+'/n'    print resultstr    resultstrall += resultstr  #是否成功抓取  if resultstrall !="":   f = codecs.open(filename,'w','utf-8')   f.write(resultstrall)   f.close()  #若没有成功抓取将网站写入error  else:   strinfo = filename+","+site   print strinfo   ferror = codecs.open("error.txt",'a','utf-8')   ferror.write(strinfo)   ferror.close()  driver.quit() def crawltaobaosousuo(driver,filename,site):  #driver = webdriver.Firefox()  driver.get(site)  driver.maximize_window()  time.sleep(10)  driver.get(site)  time.sleep(30)  driver.refresh()  test = driver.find_elements_by_xpath("//a[@class='J_ClickStat']")  resultstrall=""  resultstr=""  strinfo =""  for i in range(0,len(test),1):   if test[i].text != "" :    resultstr = test[i].text.strip()+'/n'    print resultstr    resultstrall += resultstr  if resultstrall !="":   f = codecs.open(filename,'w','utf-8')   f.write(resultstrall)   f.close()  else:   strinfo = filename+","+site   print strinfo   ferror = codecs.open("error.txt",'a','utf-8')   ferror.write(strinfo)   ferror.close()  driver.quit() def jiexi(driver):  f = open("1.txt","r")  for line in f:   time.sleep(60)   info = line.split(",")   href = info[1]   filename = info[0].decode("utf-8")   print filename   if "markets" in href:    crawlmarket(driver,filename,href)   else:    crawltaobaosousuo(driver,filename,href) if __name__ =='__main__':  driver = webdriver.Firefox()  jiexi(driver) 

小结

有改进策略一起探讨,可以抓取淘宝部分网页内容,根据自己的需求改改吧,会被风控。个人觉得不登录的效果更好。

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表