For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/

alpaca data store live backfill_start sometimes not working reliably



  • I tried to understand the documentation on the Alpaca Data Store and followed the example:
    alpaca-backtrader-api.

    Problems in live mode:

    • on backfill_start=True at minute data resolution, appereantly datastore attempts to download the complete minute history ---> this sometimes causes REST API timouts or freezing when launching
    • tickdata and bardata must be added separately: just cerebro.resampledata(data0...) does not work in live mode (as suggested in the documentations)

    What I tried so far but did not solve my problems:

    • 'backfill_from' with an additional DataFeed
    • 'backfill_start' = False : causes error because indicators cannot be calculated

    See example code to reproduce (c&p):

    import alpaca_backtrader_api
    import backtrader as bt
    from datetime import datetime
    import logging
    import matplotlib
    import logging
    import pandas as pd
    import numpy as np
    import pdb
    import pytz
    import pendulum
    import pickle
    from alpaca_keys_paper import *
    
    #======global settings
    IS_BACKTEST = True
    IS_LIVE = False
    BACKTEST_CASH = 100000
    TIMEFRAME = bt.TimeFrame.Minutes
    FROMDATE = datetime( 2017, 1, 3)
    TODATE = datetime( 2017, 1, 4)
    COMPRESSION=1 # minutes
    CHEAT_ON_CLOSE = True
    LOGGING = True
    SLIPPAGE = 0.0001
    SYMB = 'SPY'
    
    #======params=======
    
    
    class StrategyStoploss(bt.Strategy):
        
        params = dict(
        )
        
        def __init__(self):
            self.live_bars = False
            self.datas_mapping = {}
            self.dt_previous_bar = 0
            self.rsi = bt.indicators.RSI_SMA(self.datas[0].close, period=14)
            self.rsi2 = bt.indicators.RSI_SMA(self.datas[0].close, period=21)
            self.last_bar = 0
            self.startdate = pendulum.now('UTC').to_iso8601_string().replace(':','-').split('.')[0]
            
        def log(self,txt,*args):
            if LOGGING:                       # can be deactivated for wrapper optimizer 
                dt = self.data.datetime[0]
                dt = bt.num2date(dt)
                if IS_BACKTEST: print("_dt=" + dt.isoformat(),f"portfolio={'%.0f' % self.broker.getvalue()}", txt, *args) # only print portfolio value in BACKTEST, otherwise API call limit reached
                else: print("_dt=" + dt.isoformat(), txt, *args)
    
        def notify_store(self, msg, *args, **kwargs):
            super().notify_store(msg, *args, **kwargs)
            self.log(msg)
    
        def stop(self):
            print('==================================================')
            print('Starting Value - %.2f' % self.broker.startingcash)
            print('Ending   Value - %.2f' % self.broker.getvalue())
            print('==================================================')
            
              
        def calculate_size(self,money_available,last_price,size_down_on_frac=0.98):
            frac = np.trunc(money_available / last_price)
            whole = np.trunc(frac)
            if (whole / frac) >= size_down_on_frac: whole = whole - 1
            return whole
        
        def notify_data(self, data, status, *args, **kwargs):
            super().notify_data(data, status, *args, **kwargs)
            print('event=notify_data', data._getstatusname(status), *args)
            if data._getstatusname(status) == "LIVE":
                self.live_bars = True
            
            # workaround: introduce datas_mapping dictionary to access data by symbol name
            for num in range(len(self.datas)):
                sym_name = self.datas[num]._name
                if not sym_name in self.datas_mapping.keys(): self.datas_mapping[sym_name] = f"self.datas[{num}]"
                    
        def datas_get(self,symbol):
            return eval(self.datas_mapping[symbol])
        
        def __bt_to_pandas__(self, btdata, length):
            get = lambda mydata: mydata.get(ago=0, size=length)
    
            fields = {
                'open': get(btdata.open),
                'high': get(btdata.high),
                'low': get(btdata.low),
                'close': get(btdata.close),
                'volume': get(btdata.volume)
            }
            time = [btdata.num2date(x) for x in get(btdata.datetime)]
            return pd.DataFrame(data=fields, index=time)
                
        def notify_order(self, order):
            
            if order.getstatusname() not in ['Completed','Canceled']: 
                return
            
            # ordertype
            buy='buy' if order.isbuy() else ''; sell='sell' if order.issell() else ''
            side=buy+sell
            
            # logging the order
            self.log("event=order",f"type={order.getordername()}",f"side={side}",f"price={'%.2f' % order.executed.price}",f"size={str(int(order.executed.size))}",f"margin={str(order.executed.margin)}",f"status={str(order.getstatusname())}",f"ref={str(order.ref)}")
    
            # if order not completed do nothing
            #if not order.status == order.Completed:
                 #return 
            
        def notify_trade(self, trade):
            ttype='long' if trade.long==True else 'short'
            asset = trade.getdataname()
            price = trade.price
            size = trade.size
            status = trade.status
            side = 'buy' if size > 0 else 'sell'
            if size == 0: side = 'unclear'
            self.log("event=trade",f"type={ttype}",f"asset={asset}",f"price={'%.2f' % price}",f"size={str(size)}",f"commission={str(trade.commission)}",f"tradeid={str(trade.tradeid)}",f"ref={str(trade.ref)}",f"status={str(status)}")
            
            # on buy order save buy price
            if side=='buy': self.buy_prices[asset] = price
            if side=='sell' : self.sell_prices[asset] = price
        
        def next(self):
            
            # Return 0 if no live bars
            if not self.live_bars and not IS_BACKTEST:
                # only run code if we have live bars (today's bars).
                # ignore if we are backtesting
                return
            
            # data
            databar = self.datas_get(SYMB + '_bar')
            datatick = self.datas_get(SYMB)
            
            # assign prices for tickdata and bardata
            p_open_tick = datatick.open[0]
            p_close_tick = datatick.close[0]
            p_open_bar = databar.open[0]
            p_close_bar = databar.close[0]
                
            # check if new bar is reached with the new tick (live!) 
            new_bar = False
            if databar.datetime[0] > self.last_bar: 
                self.last_bar = databar.datetime[0]
                new_bar = True
                self.log("event=new_bar",f"asset={SYMB}",f"=======================================")      
            
            # logging info
            if new_bar:
                self.log("event=tick",f"asset={SYMB}",f"open={str(p_open_tick)}",f"close={str(p_close_tick)}") 
                self.log("event=bar",f"asset={SYMB}",f"open={str(p_open_bar)}",f"close={str(p_close_bar)}") 
    
    cerebro = bt.Cerebro()
    cerebro.addstrategy(StrategyStoploss)
    store = alpaca_backtrader_api.AlpacaStore(
        key_id=ALPACA_API_KEY,
        secret_key=ALPACA_SECRET_KEY,
        paper=not IS_LIVE,
        usePolygon=False
    )
    
    #===========configuring broker
    if IS_BACKTEST:
        cerebro.broker.set_slippage_perc(SLIPPAGE,slip_open=True,slip_limit=True,slip_match=True,slip_out=False)
        cerebro.broker.set_coc(CHEAT_ON_CLOSE)
        cerebro.broker.setcash(BACKTEST_CASH)
    else:    
        broker = store.getbroker()
        cerebro.setbroker(broker)
    
    #===========configuring data
    DataFactory = store.getdata  # or use alpaca_backtrader_api.AlpacaData 
    # in backtest, data is minute-bar data
    # in live, tickdata is fed via websocket! next() is called by tick, so data need to be resampled to higher resolution!
    
    if IS_BACKTEST:
        data0 = DataFactory(dataname=SYMB,historical=True,fromdate=FROMDATE,todate=TODATE,timeframe=TIMEFRAME,compression=COMPRESSION)
    else:
        data0 = DataFactory(dataname=SYMB,historical=False,timeframe=TIMEFRAME,compression=1,backfill_start=True,backfill=True,qcheck=0.9)
    
    cerebro.adddata(data0,name=SYMB) # tickdata in live mode
    cerebro.resampledata(data0,name=SYMB + '_bar',timeframe=TIMEFRAME,compression=COMPRESSION) # bardata in live mode
    
    # risk
    cerebro.addobserver(bt.observers.DrawDown)
    
    cerebro.broker.RETURNS = []
    cerebro.broker.BENCHMARK = []
    results=cerebro.run() # timezone not needed, everything is in UTC
    


  • anyone has some ideas how to make the minute data work in a scalable way?


Log in to reply
 

});