[Code snippet] Copying IB data to Pandas for efficiency
-
There's a small inefficiency issue when optimizing parameters using Interactive Brokers historical data, whether it's with backtrader's optimization module or an external optimization library such as Optunity. The issue is that a new connection is made to download the data from IB for each iteration of each permutation in parameter ranges.
To speed things up, I use the IbPy library to copy the data into a pandas dataframe then pass this over to backtrader as a data feed. In this way, the IB data is read once from TWS, but is made available to the optimizer via a data frame in memory as often as the optimization engine requires it.
There may be a simpler way of doing this with ibStore, but if there is I wan't able to figure it out.
Code snippet for the IbPy/pandas solution is below using backtrader's optimization example code as a base...
import os.path # To manage paths import sys # To find out the script name (in argv[0]) import backtrader as bt from datetime import datetime import pytz, tzlocal from time import sleep, strftime, localtime from ib.ext.Contract import Contract from ib.opt import ibConnection, message import pandas as pd # Set up IB message handler to dump to pandas dataframe df = pd.DataFrame( columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'OpenInterest']) s = pd.Series() # define historical data handler for IB - this will populate our pandas data frame def historical_data_handler(msg): global df # print (msg.reqId, msg.date, msg.open, msg.close, msg.high, msg.low) if ('finished' in str(msg.date)) == False: s = ([datetime.fromtimestamp(int(msg.date)), msg.open, msg.high, msg.low, msg.close, msg.volume, 0]) df.loc[len(df)] = s else: df.set_index('Date',inplace=True) con = ibConnection(host='127.0.0.1',port=7496,clientId=77) con.register(historical_data_handler, message.historicalData) con.connect() # IBpy - set up contract details and historical data request qqq = Contract() qqq.m_symbol = 'ES' qqq.m_secType = 'FUT' qqq.m_exchange = 'GLOBEX' qqq.m_currency = 'USD' qqq.m_expiry = '201709' print(qqq.m_symbol) con.reqHistoricalData(0, qqq, '', '3 W', '1 hour', 'TRADES', 1, 2) sleep(10) print('---------------') print(df) data = bt.feeds.PandasData(dataname = df, tz=pytz.timezone('US/Eastern')) # assign our newly created dataframe to a bt.feed data = bt.feeds.PandasData(dataname = df, tz=pytz.timezone('US/Eastern')) # Create a Strategy class TestStrategy(bt.Strategy): params = ( ('maperiod', 15), ('printlog', False), ) def log(self, txt, dt=None, doprint=False): ''' Logging function fot this strategy''' if self.params.printlog or doprint: dt = dt or self.datas[0].datetime.date(0) print('%s, %s' % (dt.isoformat(), txt)) def __init__(self): # Keep a reference to the "close" line in the data[0] dataseries self.dataclose = self.datas[0].close # To keep track of pending orders and buy price/commission self.order = None self.buyprice = None self.buycomm = None # Add a MovingAverageSimple indicator self.sma = bt.indicators.SimpleMovingAverage( self.datas[0], period=self.params.maperiod) def notify_order(self, order): if order.status in [order.Submitted, order.Accepted]: # Buy/Sell order submitted/accepted to/by broker - Nothing to do return # Check if an order has been completed # Attention: broker could reject order if not enougth cash if order.status in [order.Completed]: if order.isbuy(): self.log( 'BUY EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % (order.executed.price, order.executed.value, order.executed.comm)) self.buyprice = order.executed.price self.buycomm = order.executed.comm else: # Sell self.log('SELL EXECUTED, Price: %.2f, Cost: %.2f, Comm %.2f' % (order.executed.price, order.executed.value, order.executed.comm)) self.bar_executed = len(self) elif order.status in [order.Canceled, order.Margin, order.Rejected]: self.log('Order Canceled/Margin/Rejected') # Write down: no pending order self.order = None def notify_trade(self, trade): if not trade.isclosed: return self.log('OPERATION PROFIT, GROSS %.2f, NET %.2f' % (trade.pnl, trade.pnlcomm)) def next(self): # Simply log the closing price of the series from the reference self.log('Close, %.2f' % self.dataclose[0]) # Check if an order is pending ... if yes, we cannot send a 2nd one if self.order: return # Check if we are in the market if not self.position: # Not yet ... we MIGHT BUY if ... if self.dataclose[0] > self.sma[0]: # BUY, BUY, BUY!!! (with all possible default parameters) self.log('BUY CREATE, %.2f' % self.dataclose[0]) # Keep track of the created order to avoid a 2nd order self.order = self.buy() else: if self.dataclose[0] < self.sma[0]: # SELL, SELL, SELL!!! (with all possible default parameters) self.log('SELL CREATE, %.2f' % self.dataclose[0]) # Keep track of the created order to avoid a 2nd order self.order = self.sell() def stop(self): self.log('(MA Period %2d) Ending Value %.2f' % (self.params.maperiod, self.broker.getvalue()), doprint=True) # Create a cerebro entity cerebro = bt.Cerebro(maxcpus=1) # Add the Data Feed to Cerebro cerebro.adddata(data) # Add a strategy strats = cerebro.optstrategy( TestStrategy, maperiod=range(5, 10)) # Run over everything cerebro.run()
-
I found this post helpful when creating a class that caches IB data. You can find that example here.
-
@d416 said in [Code snippet] Copying IB data to Pandas for efficiency:
The issue is that a new connection is made to download the data from IB for each iter
@brettelliot The limitation that @d416 was referring back in May 2017 was (hopefully removed) and the data is not redownloaded for each of the different parameters combinations in the optimization.
In any case thanks for contributing the script. There will for sure be users that will find it useful.