首页 > 学院 > 开发设计 > 正文

[python]沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

2019-11-14 16:58:51
字体:
来源:转载
供稿:网友

沪深龙虎榜数据进一步处理,计算日后5日的涨跌幅

事前数据:

前面处理得到的csv文件

文件名前加入“[wait]”等待程序处理

python代码从雅虎股票历史数据api获取数据,计算后面5日的涨跌幅

雅虎数据api格式:

PRiceUrl = 'http://table.finance.yahoo.com/table.csv?s={%1}&d={%2}&e={%3}&f={%4}&g=d&a={%5}&b={%6}&c={%7}&ignore=.csv'
# %1:000001.sz
# END: %2:月-1 %3:日 %4:年
# STRAT: %5:月-1 %6:日 %7:年

事前数据截图:

计算后,再用Excel的条件格式并另存为excel文件后截图:

代码:

  1 #coding=utf-8  2   3 #读取'[wait]'开头的csv文件  4 #copyright @ WangXinsheng  5 #http://www.VEVb.com/wangxinsheng/  6 import os  7 import gzip  8 import re  9 import http.cookiejar 10 import urllib.request 11 import urllib.parse 12 import time 13 import datetime 14  15 def getOpener(head): 16     # deal with the Cookies 17     cj = http.cookiejar.CookieJar() 18     pro = urllib.request.HTTPCookieProcessor(cj) 19     opener = urllib.request.build_opener(pro) 20     header = [] 21     for key, value in head.items(): 22         elem = (key, value) 23         header.append(elem) 24     opener.addheaders = header 25     return opener 26  27 def ungzip(data): 28     try:        # 尝试解压 29         print('正在解压.....') 30         data = gzip.decompress(data) 31         print('解压完毕!') 32     except: 33         print('未经压缩, 无需解压') 34     return data 35   36 #常量 37 header = { 38     #'Connection': 'Keep-Alive', 39     'Accept': '*/*', 40     'Accept-Language': 'zh-CN,zh;q=0.8', 41     'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.111 Safari/537.36', 42     'Accept-Encoding': 'gzip, deflate', 43     'Host': 'yahoo.com', 44     'Referer' : 'http://www.yahoo.com' 45 } 46 priceUrl = 'http://table.finance.yahoo.com/table.csv?/ 47 s={%1}&d={%2}&e={%3}/ 48 &f={%4}&g=d&a={%5}&b={%6}&c={%7}&ignore=.csv' 49 # %1:000001.sz 50 # END:   %2:月-1 %3:日-1 %4:年 51 # STRAT: %5:月-1 %6:日 %7:年 52  53  54 path=r'.' 55 files = os.listdir(path) 56 files.sort() 57 out=[] 58  59 for f in files: 60     if(f.startswith('[wait]') and 61        f.endswith('.csv')): 62         #读取文件 63         print('读取文件:'+path+'/'+f) 64          65         f=open(path+'/'+f,'rt') 66         infos = f.readlines() 67         f.close() 68  69         i = 0 70         add = False 71         for info in infos: 72             if(i==0): 73                 i=i+1 74                 info = info.replace('/n','')+',"一天后","二天后","三天后","四天后","五天后"/n' 75                 out.append(info) 76                 continue 77             elif(len(info.split(','))>9): 78                 out.append(info) 79                 continue 80             else: 81                 #确认需要取的数据范围 82                 tmp = info.split(',') 83                 try: 84                     timeArray = time.strptime(tmp[0], "%Y-%m-%d") 85                 except: 86                     timeArray = time.strptime(tmp[0], "%Y/%m/%d") 87                 timeStamp = int(time.mktime(timeArray)) 88                 fromDay = datetime.datetime.utcfromtimestamp(timeStamp) 89                 fromDay = fromDay + datetime.timedelta(days = 1) 90                 endDay = fromDay + datetime.timedelta(days = 15) 91                 code = tmp[1].replace('"','').replace("'","") 92                 if(code.startswith('6')): 93                     code = code +'.ss' 94                 else: 95                     code = code +'.sz' 96                 url = priceUrl.replace('{%1}',code).replace('{%2}',str(endDay.month-1)) 97                 url = url.replace('{%3}',str(endDay.day)).replace('{%4}',str(endDay.year)) 98                 url = url.replace('{%5}',str(fromDay.month-1)).replace('{%6}',str(fromDay.day)) 99                 url = url.replace('{%7}',str(fromDay.year))100                 print('抓取URL: '+url)101                 102                 #通过雅虎获取价格103                 dd = ''104                 try:105                     opener = getOpener(header)106                     op = opener.open(url)107                     data = op.read()108                     data = ungzip(data)109                     dd = data.decode()110                 except:111                     print('网络抓取失败')112                     out.append(info)113                     continue114                 #计算涨跌幅百分比    115                 if(dd!=''):116                     dataInfo = dd.split('/n')117                     j=0118                     dayCount = 0119                     startPrice = 0120                     for x in range(len(dataInfo)-1,0,-1):121                         #处理数据122                         if(dataInfo[x]==''):123                             continue124                         #print(dataInfo[x])125                         if(dayCount>5):126                             break127                         di = dataInfo[x]128                         if(dayCount==0):129                             startPrice = float(di.split(',')[4])130                         elif(int(di.split(',')[5])!=0):131                             add = True132                             closeP = float(di.split(',')[4])133                             info = info.replace('/n','')+',"'+str(round((closeP-startPrice)/startPrice*100,2))+'%['+str(closeP)+']"'134                             #print(info)135                         if(dayCount==0 or int(di.split(',')[5])!=0):136                             dayCount=dayCount+1137                             138                 if(add):139                     out.append(info+'/n')140                 #print(out)141                 continue142         #输出143         ff = open(path+'/'+f.name.replace('[wait]','[处理完了]'),'w')144         for o in out:145             ff.write(o)146         ff.close()147         print('处理完了/n文件地址:'+path+'/'+f.name.replace('[wait]','[处理完了]'))148     else:149         continue

 


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表