http://casey.blog.51cto.com/9994043/1707905
各种股票软件,例如通达信、同花顺、大智慧,都可以实时查看股票价格和走势,做一些简单的选股和定量分析,但是如果你想做更复杂的分析,例如回归分析、关联分析等就有点捉襟见肘,所以最好能够获取股票历史及实时数据并存储到数据库,然后再通过其他工具,例如SPSS、SAS、EXCEL或者其他高级编程语言连接数据库获取股票数据进行定量分析,这样就能实现更多目的了。
为此,首先需要找到可以获取股票数据的接口,新浪、雅虎、腾讯等都有接口可以实时获取股票数据,历史数据选择了雅虎接口,收盘数据选择了腾讯接口。
(1)项目结构
(2)数据库连接池
connectionpool.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
#-*- coding: UTF-8 -*- ''' create a connection pool ''' from DBUtils import PooledDB import MySQLdb import string maxconn = 30 #最大连接数 mincached = 10 #最小空闲连接 maxcached = 20 #最大空闲连接 maxshared = 30 #最大共享连接 connstring = "root#root#127.0.0.1#3307#pystock#utf8" #数据库地址 dbtype = "mysql" #选择mysql作为存储数据库 def createConnectionPool(connstring, dbtype): db_conn = connstring.split( "#" ); if dbtype = = 'mysql' : try : pool = PooledDB.PooledDB(MySQLdb, user = db_conn[ 0 ],passwd = db_conn[ 1 ],host = db_conn[ 2 ],port = string.atoi(db_conn[ 3 ]),db = db_conn[ 4 ],charset = db_conn[ 5 ], mincached = mincached,maxcached = maxcached,maxshared = maxshared,maxconnections = maxconn) return pool except Exception, e: raise Exception, 'conn datasource Excepts,%s!!!(%s).' % (db_conn[ 2 ], str (e)) return None pool = createConnectionPool(connstring, dbtype) |
(3)数据库操作
DBOperator.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
#-*- coding: UTF-8 -*- ''' Created on 2015-3-13 @author: Casey ''' import MySQLdb from stockmining.stocks.setting import LoggerFactory import connectionpool class DBOperator( object ): def __init__( self ): self .logger = LoggerFactory.getLogger( 'DBOperator' ) #self.conn = None def connDB( self ): #单连接 #self.conn=MySQLdb.connect(host="127.0.0.1",user="root",passwd="root",db="pystock",port=3307,charset="utf8") #连接池中获取连接 self .conn = connectionpool.pool.connection() return self .conn def closeDB( self ): if ( self .conn ! = None ): self .conn.close() def insertIntoDB( self , table, dict ): try : if ( self .conn ! = None ): cursor = self .conn.cursor() else : raise MySQLdb.Error( 'No connection' ) sql = "insert into " + table + "(" param = [] for key in dict : sql + = key + ',' param.append( dict .get(key)) param = tuple (param) sql = sql[: - 1 ] + ") values(" for i in range ( len ( dict )): sql + = "%s," sql = sql[: - 1 ] + ")" self .logger.debug(sql % param) n = cursor.execute(sql, param) self .conn.commit() cursor.close() except MySQLdb.Error,e: self .logger.error( "Mysql Error %d: %s" % (e.args[ 0 ], e.args[ 1 ])) self .conn.rollback() def execute( self , sql): try : if ( self .conn ! = None ): cursor = self .conn.cursor() else : raise MySQLdb.Error( 'No connection' ) n = cursor.execute(sql) return n except MySQLdb.Error,e: self .logger.error( "Mysql Error %d: %s" % (e.args[ 0 ], e.args[ 1 ])) def findBySQL( self , sql): try : if ( self .conn ! = None ): cursor = self .conn.cursor() else : raise MySQLdb.Error( 'No connection' ) cursor.execute(sql) rows = cursor.fetchall() return rows except MySQLdb.Error,e: self .logger.error( "Mysql Error %d: %s" % (e.args[ 0 ], e.args[ 1 ])) def findByCondition( self , table, fields, wheres): try : if ( self .conn ! = None ): cursor = self .conn.cursor() else : raise MySQLdb.Error( 'No connection' ) sql = "select " for field in fields: sql + = field + "," sql = sql[: - 1 ] + " from " + table + " where " param = [] values = '' for where in wheres: sql + = where.key + "='%s' and " param.append(where.value) param = tuple (param) self .logger.debug(sql) n = cursor.execute(sql[: - 5 ] % param) self .conn.commit() cursor.close() except MySQLdb.Error,e: self .logger.error( "Mysql Error %d: %s" % (e.args[ 0 ], e.args[ 1 ])) |
(4)日志
LoggerFactory.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#-*- coding: UTF-8 -*- ''' Created on 2015-3-11 @author: Casey ''' import logging import time ''' 传入名称 ''' def getLogger(name): now = time.strftime( '%Y-%m-%d %H:%M:%S' ) logging.basicConfig( level = logging.DEBUG, format = now + " : " + name + ' LINE %(lineno)-4d %(levelname)-8s %(message)s' , datefmt = '%m-%d %H:%M' , filename = "d:\stocksstock.log" , filemode = 'w' ); console = logging.StreamHandler(); console.setLevel(logging.DEBUG); formatter = logging.Formatter(name + ': LINE %(lineno)-4d : %(levelname)-8s %(message)s' ); console.setFormatter(formatter); logger = logging.getLogger(name) logger.addHandler(console); return logger if __name__ = = '__main__' : getLogger( "www" ).debug( "www" ) |
(5)获取股票历史数据
采用雅虎的接口:http://ichart.yahoo.com/table.csv?s=<string>&a=<int>&b=<int>&c=<int>&d=<int>&e=<int>&f=<int>&g=d&ignore=.csv
参 数:s — 股票名称
a — 起始时间,月
b — 起始时间,日
c — 起始时间,年
d — 结束时间,月
e — 结束时间,日
f — 结束时间,年
g— 时间周期。
(一定注意月份参数,其值比真实数据-1。如需要9月数据,则写为08。)
示例 查询浦发银行2010.09.25 – 2010.10.8之间日线数据
http://ichart.yahoo.com/table.csv?s=600000.SS&a=08&b=25&c=2010&d=09&e=8&f=2010&g=d
返回:
Date,Open,High,Low,Close,Volume,Adj Close
2010-09-30,12.37,12.99,12.32,12.95,76420500,12.95
2010-09-29,12.20,12.69,12.12,12.48,79916400,12.48
2010-09-28,12.92,12.92,12.57,12.58,63988100,12.58
2010-09-27,13.00,13.02,12.89,12.94,43203600,12.94
因为数据量比较大,需要跑很久,所以也可以考虑多线程模式来获取相关数据,单线程模式:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
#-*- coding: UTF-8 -*- ''' Created on 2015-3-1 @author: Casey ''' import urllib import re import sys from setting import params import urllib2 from db import * dbOperator = DBOperator() table = "stock_quote_yahoo" '''查找指定日期股票流量''' def isStockExitsInDate(table, stock, date): sql = "select * from " + table + " where code = '%d' and date='%s'" % (stock, date) n = dbOperator.execute(sql) if n > = 1 : return True def getHistoryStockData(code, dataurl): try : r = urllib2.Request(dataurl) try : stdout = urllib2.urlopen(r, data = None , timeout = 3 ) except Exception,e: print ">>>>>> Exception: " + str (e) return None stdoutInfo = stdout.read().decode(params.codingtype).encode( 'utf-8' ) tempData = stdoutInfo.replace( '"' , '') stockQuotes = [] if tempData.find( '404' ) ! = - 1 : stockQuotes = tempData.split( "n" ) stockDetail = {} for stockQuote in stockQuotes: stockInfo = stockQuote.split( "," ) if len (stockInfo) = = 7 and stockInfo[ 0 ]! = 'Date' : if not isStockExitsInDate(table, code, stockInfo[ 0 ]): stockDetail[ "date" ] = stockInfo[ 0 ] stockDetail[ "open" ] = stockInfo[ 1 ] #开盘 stockDetail[ "high" ] = stockInfo[ 2 ] #最高 stockDetail[ "low" ] = stockInfo[ 3 ] #最低 stockDetail[ "close" ] = stockInfo[ 4 ] #收盘 stockDetail[ "volume" ] = stockInfo[ 5 ] #交易量 stockDetail[ "adj_close" ] = stockInfo[ 6 ] #收盘adj价格 stockDetail[ "code" ] = code #代码 dbOperator.insertIntoDB(table, stockDetail) result = tempData except Exception as err: print ">>>>>> Exception: " + str (dataurl) + " " + str (err) else : return result finally : None def get_stock_history(): #沪市2005-2015历史数据 for code in range ( 601999 , 602100 ): dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SS&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl ) #深市2005-2015历史数据 for code in range ( 1 , 1999 ): dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) #中小板股票 for code in range ( 2001 , 2999 ): dataUrl = "http://ichart.yahoo.com/table.csv?s=%06d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) #创业板股票 for code in range ( 300001 , 300400 ): dataUrl = "http://ichart.yahoo.com/table.csv?s=%d.SZ&a=01&b=01&c=2005&d=01&e=01&f=2015&g=d" % code print getHistoryStockData(code, dataUrl) def main(): "main function" dbOperator.connDB() get_stock_history() dbOperator.closeDB() if __name__ = = '__main__' : main() |
(6)获取实时价格和现金流数据
A:实时价格数据采用腾讯的接口:沪市:http://qt.gtimg.cn/q=sh<int>,深市:http://qt.gtimg.cn/q=sz<int>
如获取平安银行的股票实时数据:http://qt.gtimg.cn/q=sz000001,会返回一个包含股票数据的字符串:
v_sz000001="51~平安银行~000001~11.27~11.27~11.30~316703~151512~165192~11.27~93~11.26~ 4352~11.25~4996~11.24~1037~11.23~1801~11.28~1181~11.29~2108~11.30~1075~11.31~1592~11.32~ 1118~15:00:24/11.27/3146/S/3545407/17948|14:56:59/11.26/15/S/16890/17787| 14:56:56/11.25/404/S/454693/17783|14:56:54/11.26/173/B/194674/17780|14:56:51 /11.26/306/B/344526/17777|14:56:47/11.26/16/B/18016/17773~ 20151029150142~0.00~0.00~11.36~11.25~ 11.26/313557/354285045~ 316703~35783~0.27~7.38~~11.36~11.25~0.98~1330.32~1612.59~1.03~12.40~10.14~";
数据比较多,比较有用的是:1-名称;2-代码;3-价格;4-昨日收盘;5-今日开盘;6-交易量(手);7-外盘;8-内盘;9-买一;10-买一量;11-买二;12-买二量;13-买三;14-买三量;15-买四;16-买四量;17-买五;18-买五量;19-卖一;20-卖一量;21-卖二;22-卖二量;23-卖三;24-卖三量;25-卖四;26-卖四量;27-卖五;28-卖五量;30-时间;31-涨跌;32-涨跌率;33-最高价;34-最低价;35-成交量(万);38-换手率;39-市盈率;42-振幅;43-流通市值;44-总市值;45-市净率
B:现金流数据仍然采用腾讯接口:沪市:http://qt.gtimg.cn/q=ff_sh<int>,深市:http://qt.gtimg.cn/q=ff_sz<int>
例如平安银行的现金流数据http://qt.gtimg.cn/q=ff_sz000001:
v_ff_sz000001="sz000001~21162.20~24136.40~-2974.20~-8.31~14620.87~11646.65~2974.22~ 8.31~35783.07~261502.0~261158.3~平安银行~20151029~20151028^37054.20^39358.20~ 20151027^39713.50^42230.70~20151026^82000.80^83689.90~20151023^81571.30^71743.10";
比较重要的:1-主力流入;2-主力流出;3-主力净流量;4-主力流入/主力总资金;5-散户流入;6-散户流出;7-散户净流量;8-散户流入/散户总资金;9-总资金流量;12-名字;13-日期
采用多线程、数据库连接池实现股票实时价格和现金流数据的获取:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
#-*- coding: UTF-8 -*- ''' Created on 2015年3月2日 @author: Casey ''' import time import threading ''' 上证编码:'600001' .. '602100' 深圳编码:'000001' .. '001999' 中小板:'002001' .. '002999' 创业板:'300001' .. '300400' ''' import urllib2 from datetime import date from db import * from setting import * class StockTencent( object ): #数据库表 __stockTables = { 'cash' : 'stock_cash_tencent' , 'quotation' : 'stock_quotation_tencent' } '''初始化''' def __init__( self ): self .__logger = LoggerFactory.getLogger( 'StockTencent' ) self .__dbOperator = DBOperator() def main( self ): self .__dbOperator.connDB() threading.Thread(target = self .getStockCash).start() threading.Thread(target = self .getStockQuotation).start() self .__dbOperator.closeDB() '''查找指定日期股票流量''' def __isStockExitsInDate( self , table, stock, date): sql = "select * from " + table + " where code = '%s' and date='%s'" % (stock, date) n = self .__dbOperator.execute(sql) if n > = 1 : return True '''获取股票资金流明细''' def __getStockCashDetail( self , dataUrl): #读取数据 tempData = self .__getDataFromUrl(dataUrl) if tempData = = None : time.sleep( 10 ) tempData = self .__getDataFromUrl(dataUrl) return False #解析资金流向数据 stockCash = {} stockInfo = tempData.split( '~' ) if len (stockInfo) < 13 : return if len (stockInfo) ! = 0 and stockInfo[ 0 ].find( 'pv_none' ) = = - 1 : table = self .__stockTables[ 'cash' ] code = stockInfo[ 0 ].split( '=' )[ 1 ][ 2 :] date = stockInfo[ 13 ] if not self .__isStockExitsInDate(table, code, date): stockCash[ 'code' ] = stockInfo[ 0 ].split( '=' )[ 1 ][ 2 :] stockCash[ 'main_in_cash' ] = stockInfo[ 1 ] stockCash[ 'main_out_cash' ] = stockInfo[ 2 ] stockCash[ 'main_net_cash' ] = stockInfo[ 3 ] stockCash[ 'main_net_rate' ] = stockInfo[ 4 ] stockCash[ 'private_in_cash' ] = stockInfo[ 5 ] stockCash[ 'private_out_cash' ] = stockInfo[ 6 ] stockCash[ 'private_net_cash' ] = stockInfo[ 7 ] stockCash[ 'private_net_rate' ] = stockInfo[ 8 ] stockCash[ 'total_cash' ] = stockInfo[ 9 ] stockCash[ 'name' ] = stockInfo[ 12 ].decode( 'utf8' ) stockCash[ 'date' ] = stockInfo[ 13 ] #插入数据库 self .__dbOperator.insertIntoDB(table, stockCash) '''获取股票交易信息明细''' def getStockQuotationDetail( self , dataUrl): tempData = self .__getDataFromUrl(dataUrl) if tempData = = None : time.sleep( 10 ) tempData = self .__getDataFromUrl(dataUrl) return False stockQuotation = {} stockInfo = tempData.split( '~' ) if len (stockInfo) < 45 : return if len (stockInfo) ! = 0 and stockInfo[ 0 ].find( 'pv_none' ) = = - 1 and stockInfo[ 3 ].find( '0.00' ) = = - 1 : table = self .__stockTables[ 'quotation' ] code = stockInfo[ 2 ] date = stockInfo[ 30 ] if not self .__isStockExitsInDate(table, code, date): stockQuotation[ 'code' ] = stockInfo[ 2 ] stockQuotation[ 'name' ] = stockInfo[ 1 ].decode( 'utf8' ) stockQuotation[ 'price' ] = stockInfo[ 3 ] stockQuotation[ 'yesterday_close' ] = stockInfo[ 4 ] stockQuotation[ 'today_open' ] = stockInfo[ 5 ] stockQuotation[ 'volume' ] = stockInfo[ 6 ] stockQuotation[ 'outer_sell' ] = stockInfo[ 7 ] stockQuotation[ 'inner_buy' ] = stockInfo[ 8 ] stockQuotation[ 'buy_one' ] = stockInfo[ 9 ] stockQuotation[ 'buy_one_volume' ] = stockInfo[ 10 ] stockQuotation[ 'buy_two' ] = stockInfo[ 11 ] stockQuotation[ 'buy_two_volume' ] = stockInfo[ 12 ] stockQuotation[ 'buy_three' ] = stockInfo[ 13 ] stockQuotation[ 'buy_three_volume' ] = stockInfo[ 14 ] stockQuotation[ 'buy_four' ] = stockInfo[ 15 ] stockQuotation[ 'buy_four_volume' ] = stockInfo[ 16 ] stockQuotation[ 'buy_five' ] = stockInfo[ 17 ] stockQuotation[ 'buy_five_volume' ] = stockInfo[ 18 ] stockQuotation[ 'sell_one' ] = stockInfo[ 19 ] stockQuotation[ 'sell_one_volume' ] = stockInfo[ 20 ] stockQuotation[ 'sell_two' ] = stockInfo[ 22 ] stockQuotation[ 'sell_two_volume' ] = stockInfo[ 22 ] stockQuotation[ 'sell_three' ] = stockInfo[ 23 ] stockQuotation[ 'sell_three_volume' ] = stockInfo[ 24 ] stockQuotation[ 'sell_four' ] = stockInfo[ 25 ] stockQuotation[ 'sell_four_volume' ] = stockInfo[ 26 ] stockQuotation[ 'sell_five' ] = stockInfo[ 27 ] stockQuotation[ 'sell_five_volume' ] = stockInfo[ 28 ] stockQuotation[ 'datetime' ] = stockInfo[ 30 ] stockQuotation[ 'updown' ] = stockInfo[ 31 ] stockQuotation[ 'updown_rate' ] = stockInfo[ 32 ] stockQuotation[ 'heighest_price' ] = stockInfo[ 33 ] stockQuotation[ 'lowest_price' ] = stockInfo[ 34 ] stockQuotation[ 'volume_amout' ] = stockInfo[ 35 ].split( '/' )[ 2 ] stockQuotation[ 'turnover_rate' ] = stockInfo[ 38 ] stockQuotation[ 'pe_rate' ] = stockInfo[ 39 ] stockQuotation[ 'viberation_rate' ] = stockInfo[ 42 ] stockQuotation[ 'circulated_stock' ] = stockInfo[ 43 ] stockQuotation[ 'total_stock' ] = stockInfo[ 44 ] stockQuotation[ 'pb_rate' ] = stockInfo[ 45 ] self .__dbOperator.insertIntoDB(table, stockQuotation) '''读取信息''' def __getDataFromUrl( self , dataUrl): r = urllib2.Request(dataUrl) try : stdout = urllib2.urlopen(r, data = None , timeout = 3 ) except Exception,e: self .__logger.error( ">>>>>> Exception: " + str (e)) return None stdoutInfo = stdout.read().decode(params.codingtype).encode( 'utf-8' ) tempData = stdoutInfo.replace( '"' , '') self .__logger.debug(tempData) return tempData '''获取股票现金流量''' def getStockCash( self ): self .__logger.debug( "开始:收集股票现金流信息" ) try : #沪市股票 for code in range ( 600001 , 602100 ): dataUrl = "http://qt.gtimg.cn/q=ff_sh%d" % code self .__getStockCashDetail(dataUrl) #深市股票 for code in range ( 1 , 1999 ): dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code self .__getStockCashDetail(dataUrl) #中小板股票 for code in range ( 2001 , 2999 ): dataUrl = "http://qt.gtimg.cn/q=ff_sz%06d" % code self .__getStockCashDetail(dataUrl) #'300001' .. '300400' #创业板股票 for code in range ( 300001 , 300400 ): dataUrl = "http://qt.gtimg.cn/q=ff_sz%d" % code self .__getStockCashDetail(dataUrl) except Exception as err: self .__logger.error( ">>>>>> Exception: " + str (code) + " " + str (err)) finally : None self .__logger.debug( "结束:股票现金流收集" ) '''获取股票交易行情数据''' def getStockQuotation( self ): self .__logger.debug( "开始:收集股票交易行情数据" ) try : #沪市股票 for code in range ( 600001 , 602100 ): dataUrl = "http://qt.gtimg.cn/q=sh%d" % code self .getStockQuotationDetail(dataUrl) #深市股票 for code in range ( 1 , 1999 ): dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code self .getStockQuotationDetail(dataUrl) #中小板股票 for code in range ( 2001 , 2999 ): dataUrl = "http://qt.gtimg.cn/q=sz%06d" % code self .getStockQuotationDetail(dataUrl) #'300001' .. '300400' # 创业板股票 for code in range ( 300001 , 300400 ): dataUrl = "http://qt.gtimg.cn/q=sz%d" % code self .getStockQuotationDetail(dataUrl) except Exception as err: self .__logger.error( ">>>>>> Exception: " + str (code) + " " + str (err)) finally : None self .__logger.debug( "结束:收集股票交易行情数据" ) if __name__ = = '__main__' : StockTencent(). main() |
(7)加入到系统任务计划中收集盘后数据
(8)收集后的数据可以用以分析了,例如:
求取10月28日主力净流入最大的股票:select * from stock_cash_tencent where main_net_cash = (select max(main_net_cash) from stock_cash_tencent where date = '20151028' )
原来是“兴蓉环境”,当日放量上涨,次日收跌,连续多日有主力资金流入。
excel中做分析:
平安银行的资金流量分析