For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/

How to speed up almost 100 times when add data and preload data?



  • If you don't use too many data, you may ignore this page.

    As we know,when we use preload data,our backtest will faster than not using it,but however,preload data will consume much time ,so ,maybe ,there is a way to speed up the preload function.

    when I load 5000+ future contract,every time I preload data,it consume me 62.5 seconds. Terrible!
    每次preload的时间.png
    But , if we save self.datas, after it runs in cerebro to pickle and read it from pickle,it just consume 0.66 seconds.
    从pickle进行pre_load数据.png

    first time,you should run it and save my.datas :

    cerebro.run(save_my_data=True)
    

    after this time,you can use it to speed up:

    cerebro.run(load_my_data=True)
    

    how to implement it?

    you should add a function load_my_data_from_pickle to cerebro and modify the function runstrategies and add params.

    1. add two param
    params = (
            ('preload', True),
            ('runonce', True),
            ('maxcpus', None),
            ('stdstats', True),
            ('oldbuysell', False),
            ('oldtrades', False),
            ('lookahead', 0),
            ('exactbars', False),
            ('optdatas', True),
            ('optreturn', True),
            ('objcache', False),
            ('live', False),
            ('writer', False),
            ('tradehistory', False),
            ('oldsync', False),
            ('tz', None),
            ('cheat_on_open', False),
            ('broker_coo', True),
            ('quicknotify', False),
            ("load_my_data",False),
            ("save_my_data",False)
        )
    
    1. add function and modify function
    def load_my_data_from_pickle(self,path="normal_future_data.pkl"):
            ''' add from pickle'''
            import pickle 
            with open(path,"rb") as f:
                my_data = pickle.load(f)
            return my_data
    
        def runstrategies(self, iterstrat, predata=False):
            '''
            Internal method invoked by ``run``` to run a set of strategies
            '''
            self._init_stcount()
    
            self.runningstrats = runstrats = list()
            for store in self.stores:
                store.start()
    
            if self.p.cheat_on_open and self.p.broker_coo:
                # try to activate in broker
                if hasattr(self._broker, 'set_coo'):
                    self._broker.set_coo(True)
    
            if self._fhistory is not None:
                self._broker.set_fund_history(self._fhistory)
    
            for orders, onotify in self._ohistory:
                self._broker.add_order_history(orders, onotify)
    
            self._broker.start()
    
            for feed in self.feeds:
                feed.start()
    
            if self.writers_csv:
                wheaders = list()
                for data in self.datas:
                    if data.csv:
                        wheaders.extend(data.getwriterheaders())
    
                for writer in self.runwriters:
                    if writer.p.csv:
                        writer.addheaders(wheaders)
    
            # self._plotfillers = [list() for d in self.datas]
            # self._plotfillers2 = [list() for d in self.datas]
    
            if not predata:
                if self.p.load_my_data:
                    # begin_time=time.time()
                    self.datas = self.load_my_data_from_pickle()
                    # end_time=time.time()
                    # print("every time pre_load consume time :{}".format(end_time-begin_time))
                    # assert 0
                elif self.p.save_my_data:
                    
                    begin_time=time.time()
                    for data in self.datas:
                        data.reset()
                        if self._exactbars < 1:  # datas can be full length
                            data.extend(size=self.params.lookahead)
                        data._start()
                        if self._dopreload:
                            data.preload()
                    end_time=time.time()
                    print("every time pre_load consume time :{}".format(end_time-begin_time))
        
                    import pickle 
                    with open("normal_future_data.pkl",'wb') as f:
                         pickle.dump(self.datas,f)
                    
                    assert 0
                else:
                    begin_time=time.time()
                    for data in self.datas:
                        data.reset()
                        if self._exactbars < 1:  # datas can be full length
                            data.extend(size=self.params.lookahead)
                        data._start()
                        if self._dopreload:
                            data.preload()
                    end_time=time.time()
                    print("every time pre_load consume time :{}".format(end_time-begin_time))
    
            for stratcls, sargs, skwargs in iterstrat:
                sargs = self.datas + list(sargs)
                try:
                    strat = stratcls(*sargs, **skwargs)
                except bt.errors.StrategySkipError:
                    continue  # do not add strategy to the mix
    
                if self.p.oldsync:
                    strat._oldsync = True  # tell strategy to use old clock update
                if self.p.tradehistory:
                    strat.set_tradehistory()
                runstrats.append(strat)
    
            tz = self.p.tz
            if isinstance(tz, integer_types):
                tz = self.datas[tz]._tz
            else:
                tz = tzparse(tz)
    
            if runstrats:
                # loop separated for clarity
                defaultsizer = self.sizers.get(None, (None, None, None))
                for idx, strat in enumerate(runstrats):
                    if self.p.stdstats:
                        strat._addobserver(False, observers.Broker)
                        if self.p.oldbuysell:
                            strat._addobserver(True, observers.BuySell)
                        else:
                            strat._addobserver(True, observers.BuySell,
                                               barplot=True)
    
                        if self.p.oldtrades or len(self.datas) == 1:
                            strat._addobserver(False, observers.Trades)
                        else:
                            strat._addobserver(False, observers.DataTrades)
    
                    for multi, obscls, obsargs, obskwargs in self.observers:
                        strat._addobserver(multi, obscls, *obsargs, **obskwargs)
    
                    for indcls, indargs, indkwargs in self.indicators:
                        strat._addindicator(indcls, *indargs, **indkwargs)
    
                    for ancls, anargs, ankwargs in self.analyzers:
                        strat._addanalyzer(ancls, *anargs, **ankwargs)
    
                    sizer, sargs, skwargs = self.sizers.get(idx, defaultsizer)
                    if sizer is not None:
                        strat._addsizer(sizer, *sargs, **skwargs)
    
                    strat._settz(tz)
                    strat._start()
    
                    for writer in self.runwriters:
                        if writer.p.csv:
                            writer.addheaders(strat.getwriterheaders())
    
                if not predata:
                    for strat in runstrats:
                        strat.qbuffer(self._exactbars, replaying=self._doreplay)
    
                for writer in self.runwriters:
                    writer.start()
    
                # Prepare timers
                self._timers = []
                self._timerscheat = []
                for timer in self._pretimers:
                    # preprocess tzdata if needed
                    timer.start(self.datas[0])
    
                    if timer.params.cheat:
                        self._timerscheat.append(timer)
                    else:
                        self._timers.append(timer)
    
                if self._dopreload and self._dorunonce:
                    if self.p.oldsync:
                        self._runonce_old(runstrats)
                    else:
                        self._runonce(runstrats)
                else:
                    if self.p.oldsync:
                        self._runnext_old(runstrats)
                    else:
                        self._runnext(runstrats)
    
                for strat in runstrats:
                    strat._stop()
    
            self._broker.stop()
    
            if not predata:
                for data in self.datas:
                    data.stop()
    
            for feed in self.feeds:
                feed.stop()
    
            for store in self.stores:
                store.stop()
    
            self.stop_writers(runstrats)
    
            if self._dooptimize and self.p.optreturn:
                # Results can be optimized
                results = list()
                for strat in runstrats:
                    for a in strat.analyzers:
                        a.strategy = None
                        a._parent = None
                        for attrname in dir(a):
                            if attrname.startswith('data'):
                                setattr(a, attrname, None)
    
                    oreturn = OptReturn(strat.params, analyzers=strat.analyzers, strategycls=type(strat))
                    results.append(oreturn)
    
                return results
    
            return runstrats
    

    very good job!!!



  • @tianjixuetu Thanks for sharing. It helped me save over 140sec per run.


Log in to reply
 

});