For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/

Reducing run time



  • Hello,

    So I am running a very simple script that i am using to simply plot the tick data I have.

    When I run one stock through it individually it takes 5 seconds. However when I have a list of two stocks run through it, it takes ~75 seconds.

    The lists containing the stock names are in CSV form with the date in one column and the ticker name in the other.

    Does anyone have an idea of why this might be happening?

    I have the script below.

    import datetime 
    from datetime import timedelta
    from datetime import date
    import os.path  
    import sys 
    from pathlib import Path
    import pandas as pd
    import backtrader.feeds as btfeeds
    import pandas_datareader.data as web
    import backtrader as bt 
    import backtrader.indicators as btind
    import backtrader.analyzers as btanalyzers
    import re
    import time
    import matplotlib
    import backtrader.utils.flushfile
    import subprocess
    import sys
    
    """
    Charter
    
    """
    
    class Inputs:
    
        path = 'C:\Python27\.vscode\Excel Files\Stock_lists\Names_dates_small.csv'
        Stocklist = []
        data_columns = ['Date', "Ticker"]
        with open(path) as csvfile:
            data = pd.read_csv(csvfile, names = data_columns)
        for index,row in data.iterrows():
            Stocklist.append([row['Ticker'],row['Date']])
    
        params = {    
        'starting_cash' : 100000,
        'Bar_compression' : 1,
        'Bar_type' : bt.TimeFrame.Seconds,
        }
    
    class BuyySell(bt.observers.BuySell):
        plotlines = dict(
                        buy=dict(markersize=6.0),
                        sell=dict(markersize=6.0),)
        params = (
                        ("barplot",True),
                        ("bardist" ,0.0003))  
    
    class Algo(bt.Strategy):
        def next(self):
            # trade variables  
                for i, d in enumerate(self.datas):
                    if (len(self.datas[i])>0):
                        Open = self.datas[i].open
                        High   = self.datas[i].high
                        Low    = self.datas[i].low
                        Close  = self.datas[i].close
                        Volume = self.datas[i].volume
                        Symbol = self.datas[i]._name
                        Time = self.datas[i].datetime.time
                        Date = self.datas[i].datetime.datetime
                        position = self.broker.getposition(data = d)
                       
    class run:
        def runstrat(self):
            #Algo Cerebro start, add strat, slippage, multiobserver
                cerebro = bt.Cerebro()
                cerebro.addstrategy(Algo)
                cerebro.addobservermulti(BuyySell)
                for i in Inputs.Stocklist:
                    ticker = i[0]
                    day = datetime.datetime.strptime(i[1],"%m/%d/%Y")
                    date = day.strftime("%Y%m%d")
                    dayy = day.strftime("%Y-%m-%d")
                    path = ("Q:/data/equity_prints_quotes/csv/%s/%s_trades.csv" %(date,ticker))
    # path tab here
                    mypath = Path(path)
                    if mypath.exists():
                        data_columns = ["day", "time" , "ticker", "price", "volume"]
                        with open(path) as csvfile:
                            data = pd.read_csv(csvfile, names = data_columns)
                        data= pd.DataFrame(data)    
                        data['Open']= data['price']
                        data['High']= data['price']
                        data['Low']= data['price']
                        data['Close']= data['price']
                        data['date']= data[["day","time"]].apply(lambda x : '{} {}'.format(x[0],x[1]), axis=1)
                        data['time'] = pd.to_datetime(data['time'])
                        data= data.set_index('time')                       
                        end_time = data.index[0] + timedelta(minutes = 10)
                        data =  (data.loc[:(end_time)])
                        data['date'] = pd.to_datetime(data['date'])
                        data['date']= data['date'].astype('datetime64[ns]')
                        data= data.set_index('date')
                        data = data[["Open", "High", "Low", "Close", "volume",]]               
                        data2 = btfeeds.PandasData(dataname = data, timeframe=bt.TimeFrame.Ticks,)
                        # cerebro.adddata(data2, name = ticker)
                        cerebro.resampledata(data2, name= ('{}, {}'.format(ticker,dayy)), timeframe =Inputs.params['Bar_type'], compression = Inputs.params['Bar_compression'])
                            
        #Cerebro: PnL, calc, Inputs, Broker, Sizer, Plot
          
                cerebro.broker.setcash(Inputs.params['starting_cash'])
                cerebro.addsizer(bt.sizers.FixedSize, stake=300)    
                cerebro.broker.setcommission(commission=0.0001)
                cerebro.run()
                cerebro.plot(style = 'Tick', bardown = 'black',fmt_x_data = ('%H:%M:%S'),volume = True)
           
    if __name__ == '__main__': 
    
        strat = run()
        strat.runstrat()
    
    


  • @backtrader i meant to put this in the help section. Can you give me authorization to delete it so i can repost it there?


  • administrators

    @rstrong said in Reducing run time:

    Does anyone have an idea of why this might be happening?

    Because the data feeds have different sizes. You would ideally first filter the data you want to backtest, rather than loading all into the platform and have the platform filter bar by bar for you.



  • @backtrader could you provide a bit more color on what you mean?

    In this script I filter the data to only put 10 minutes worth of data into backtrader for each data feed.

    How else would I filter the data?


  • administrators

    @rstrong said in Reducing run time:

    data2 = btfeeds.PandasData(dataname = data, timeframe=bt.TimeFrame.Ticks,

    10 minutes of ticks can be a huge information, especially if you also resample it.

    If you already filter, there is not much more you can do.