hhhhhh
# -*- coding: utf-8 -*- import requests from bs4 import BeautifulSoup import lxml import json import re import time import tushare as ts import pandas as pd import pymysql from sqlalchemy import create_engine pymysql.install_as_MySQLdb() def EMydSpider(conn, current): # 抓取东财个股盘口异动数据:http://quote.eastmoney.com/changes cursor = conn.cursor() header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (Khtml, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"} url = "http://push2ex.eastmoney.com/getAllStockChanges?type=8201,8202,8193,4,32,64,8207,8209,8211,8213,8215,8204,8203,8194,8,16,128,8208,8210,8212,8214,8216" for page in range(200): param = {"pageindex": page, "pagesize": '64', "ut": '7eea3edcaed734bea9cbfc24409ed989', "dpt": 'wzchanges'} html = json.loads(requests.get(url=url, params=param, headers=header).text) if html['data'] is None: print("东方财富:共爬取%d页数据" % page) print("东方财富:个股盘口异动数据已抓取完成") break else: allstock = html['data']['allstock'] for stock in allstock: stk_code = stock['c'] stk_name = stock['n'] chg_time = stock['tm'] chg_type = stock['t'] chg_value = stock['i'] sql = "insert into emyd_data(current,stk_code,stk_name,chg_time,chg_type,chg_value) values(%s,%s,%s,%s,%s,%s)" %(current, stk_code, stk_name, chg_time, chg_type, chg_value) try: cursor.execute(sql) except: cursor.rollback print("新增失败,数据已回滚") cursor.commit() cursor.close() def TXfjSpider(current,code_list): # 抓取每日腾讯股票分价表数据 cursor = conn.cursor() header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"} for code in code_list: url = "http://stock.gtimg.cn/data/index.php?appn=price&c=" + code html = requests.get(url=url,headers=header).text start = re.search('v\wp.{10}\d+\,\d+\,\d+\,\"', html).group() end = re.search('\"\W+', html).group() fj_list = txfj_html.replace(start, '').replace(end, '').split('^') for fj in fj_list: price = fj.split('~')[0] volumn = fj.split('~')[2] sql = "insert into txfj_data(current,stk_code,price,volumn) values(%s,%s,%s,%s)" %(current, code, price, volumn) try: cursor.execute(sql) except: cursor.rollback() print("新增失败,数据已回滚") cursor.commit() cursor.close() def TXddSpider(current,code_list): # 抓取每日腾讯股票大单数据(大于100万) cursor = conn.cursor() header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3676.400 QQBrowser/10.5.3738.400"} url = "http://stock.finance.qq.com/sstock/list/view/dadan.php?" for page in range(50): for code in code_list: param = {"t": "js","c": code,"max": 80,"p": page,"opt": 10,"o": 0} html = requests.get(url=url, headers=header).text start = re.search('var v_dadan_data_.{12}\'', html).group() end = re.search('\'\]\;', html).group() dd_list = html.replace(start, '').replace(end, '').split('^') for dd in dd_list: ddprice = dd.split('~')[0] volumn = dd.split('~')[2] sql = "insert into txdd_data(current,stk_code,ddprice,volumn) values(%s,%s,%s,%s)" % (current, code, ddprice, volumn) try: cursor.execute(sql) except: cursor.rollback() print("新增失败,数据已回滚") cursor.commit() cursor.close() def Tstockbasic(conn): # 调用stock_basic,获取股票代码、股票名称、所属行业、上市日期、上市场所 cursor = conn.cursor() # 读取数据库已存储数据 sql_stk_data = pd.read_sql("select ts_code from stk_basic_data",con=conn) # 创建空列表,存储ts_code code_list = [] stk_basic = pro.stock_basic(list_status='L', fields='ts_code,symbol,name,industry,list_date,exchange') for index, row in stk_basic.iterrows(): new_code = row['ts_code'] stk_code = row['symbol'] stk_name = row['name'] industry = row['industry'] list_date = row['list_date'] exchange = row['exchange'] code_list.append(new_code[7:]+new_code[:6]) if new_code in sql_stk_data: sql = "update {} set {}={} where {}={}".format("stk_basic_data", "industry", industry, ts_code, new_code) try: cursor.execute(sql) except: cursor.rollback print("更新失败,数据已回滚") else: sql = "insert into stk_basic_data(ts_code,stk_name,industry,list_date,exchange) values(%s,%s,%s,%s,%s)" %(new_code,stk_name,industry,list_date,exchange) try: cursor.execute(sql) except: cursor.rollback print("新增失败,数据已回滚") conn.commit() conn.close() return code_list def Tstockcompany(conn): # 调用stock_company,获取上市公司:所在省份、城市、注册日期、员工人数 cursor = conn.cursor() # 读取数据库已存储数据 sql_company_data = pd.read_sql("select ts_code from stk_company_data", con=conn) # 交易所代码 ,SSE上交所 SZSE深交所 ,默认SSE df1 = pro.stock_company(exchange='SZSE', fields='ts_code, province, city, setup_date, employees') df2 = pro.stock_company(exchange='SSE', fields='ts_code, province, city, setup_date, employees') stk_company = pd.concat([df1,df2],axis=0) for index, row in stk_company.iterrows(): new_code = row['ts_code'] province = row['province'] city = row['city'] setup_date = row['setup_date'] employees = row['employees'] if new_code in sql_company_data: sql = "update {} set {}={} where {}={}".format("stk_company_data", "employees", ts_code, new_code, ) try: cursor.execute(sql) except: cursor.rollback print("更新失败,数据已回滚") else: sql = "insert into stk_basic_data(ts_code,province,province,setup_date,employees) values(%s,%s,%s,%s,%s)" % (new_code, province,province,setup_date,employees) try: cursor.execute(sql) except: cursor.rollback print("新增失败,数据已回滚") conn.commit() conn.close() def Tstockconcept(): # 调用concept_detail,获取概念股分类明细数据 df = pro.concept_detail(ts_code, fields='ts_code,concept_name,in_date') if __name__ == "__main__": # 设置tushare社区token ts.set_token('ac16b470869c5d82db5033ae9288f77b282d2b5519507d6d2c72fdd7') pro = ts.pro_api() # 创建数据库连接 conn =pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', database='quantitative_trading_data', charset='utf8') engine = create_engine('mysql://root:123456@127.0.0.1/quantitative_trading_data?charset=utf8') # 获取当前年月日,传递给各函数 current = time.strftime("%Y%m%d", time.localtime()) # 调用Tstockbasich获取股票列表,并完成数据更新 code_list = Tstockbasic(conn) # 调用EMydSpider获取东财盘口异动数据 EMydSpider(conn, current) # 调用TXfjSpider获取腾讯财经当日股票分价数据 TXfjSpider(conn, current, code_list) # 调用TXddSpiderr获取腾讯财经当日股票大单数据 TXddSpider(conn, current, code_list) # 调用Tstockcompany获取公司信息,并完成数据更新 Tstockcompany(conn)