For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/

four ways to load data and it's different time



  • backtrader load data time.png
    if your data is large,pandasDirectData maybe more suit for you.

    from __future__ import (absolute_import, division, print_function,
                            unicode_literals)
    
    import time 
    import datetime  # For datetime objects
    import os.path  # To manage paths
    import sys  # To find out the script name (in argv[0])
    import pickle
    import inspect
    import pandas as pd 
    # Import the backtrader platform
    import backtrader as bt
    from backtrader.comminfo import ComminfoFuturesPercent,ComminfoFuturesFixed
    from backtrader.metabase import findbases,MetaBase
    # Create a Stratey
    class YunStrategy(bt.Strategy):
        author = "yunjinqi"
        params = (("look_back_days",60),("hold_days",30),)
    
        def log(self, txt, dt=None):
            ''' Logging function for this strategy'''
            dt = dt or self.datas[0].datetime.date(0)
            print('%s, %s' % (dt.isoformat(), txt))
    
        def __init__(self):
            # Keep a reference to the "close" line in the data[0] dataseries
            self.bar_num =0 
            self.ma = bt.indicators.SMA(period = self.p.look_back_days)
    
        def next(self):
            self.bar_num+=1
            
            if self.bar_num % self.p.look_back_days == 0:
                # self.log("开仓日,当前收盘价为:{}".format(self.datas[0].close[0]))
                # self.log("当前数据的长度为:{}".format(len(self.datas[0])))
                # self.log("data._name is {}".format(self.datas[0]._name))
                # self.log("DataSeries : getwriterinfo :{}".format(self.datas[0].getwriterinfo()))
                # self.log("DataSeries : getwriterheaders :{}".format(self.datas[0].getwriterheaders()))
                # self.log("DataSeries : getwritervalues :{}".format(self.datas[0].getwritervalues()))
                # self.log("findbases :self.datas[0].__class__:{},something:{} ".format(self.datas[0].__class__,
                #                                    inspect.getmembers(self.datas[0].__class__, inspect.isclass)))
                # self.log("lines:{} ".format(self.datas[0].lines.__dir__))
                
                # self.log("strategy:{}".format(inspect.getmembers(self.__class__, inspect.isclass)))
                # self.log("strategy PriceClose:{}".format(self.PriceClose))
                # self.log("strategy PriceLow:{}".format(self.PriceLow))
                pass
            if self.bar_num % self.p.hold_days == 0:
                # self.log("平仓日,当前收盘价为:{}".format(self.datas[0].close[0]))
                pass
    
    def run_CSVData():
        begin_time=time.time()
        # Create a cerebro entity
        cerebro = bt.Cerebro()
    
        # Add a strategy
        cerebro.addstrategy(YunStrategy)
    
        # Datas are in a subfolder of the samples. Need to find where the script is
        # because it could have been called from anywhere
        
        datapath = 'E:/orcl-1995-2014.txt'
    
        # Create a Data Feed
        data = bt.feeds.YahooFinanceCSVData(
            dataname=datapath,
            # Do not pass values before this date
            fromdate=datetime.datetime(1995, 1, 1),
            # Do not pass values before this date
            todate=datetime.datetime(2014, 12, 31),
            # Do not pass values after this date
            reverse=False)
        # Add the Data Feed to Cerebro
        cerebro.adddata(data)
        comm=ComminfoFuturesPercent(commission=0.0002,margin=0.1, mult=10)
        cerebro.broker.addcommissioninfo(comm)
        # Set our desired cash start
        cerebro.broker.setcash(100000.0)
    
        # Print out the starting conditions
        # print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    
        # Run over everything
        cerebro.run()
    
        # Print out the final result
        # print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
        end_time=time.time()
        # print(f"run1 total_use time is {end_time-begin_time}")
        return end_time-begin_time
    
    def run_PandasData():
        begin_time=time.time()
        # Create a cerebro entity
        cerebro = bt.Cerebro()
        # Add a strategy
        cerebro.addstrategy(YunStrategy)
    
        # Datas are in a subfolder of the samples. Need to find where the script is
        # because it could have been called from anywhere
       
        df=pd.read_csv('E:/orcl-1995-2014.txt')
        df.index=pd.to_datetime(df['Date'])
        data = bt.feeds.PandasData(dataname=df)
        # Add the Data Feed to Cerebro
        cerebro.adddata(data)
        comm=ComminfoFuturesPercent(commission=0.0002,margin=0.1, mult=10)
        cerebro.broker.addcommissioninfo(comm)
        # Set our desired cash start
        cerebro.broker.setcash(100000.0)
    
        # Print out the starting conditions
        # print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    
        # Run over everything
        cerebro.run()
    
        # Print out the final result
        # print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
        end_time=time.time()
        # print(f"PandasData total_use time is {end_time-begin_time}")
        return end_time-begin_time
        
    def run_PandasDirectData():
        begin_time=time.time()
        # Create a cerebro entity
        cerebro = bt.Cerebro()
    
        # Add a strategy
        cerebro.addstrategy(YunStrategy)
    
        # Datas are in a subfolder of the samples. Need to find where the script is
        # because it could have been called from anywhere
        df=pd.read_csv('E:/orcl-1995-2014.txt')
        df.columns=['datetime','open','high','low','close','openinterest','volume']
        df=df[['datetime','open','high','low','close','volume','openinterest']]
        df.index=pd.to_datetime(df['datetime'])
        del df['datetime']
        # df=df.astype('float')
        data = bt.feeds.PandasDirectData(dataname=df)
        # Add the Data Feed to Cerebro
        cerebro.adddata(data)
        comm=ComminfoFuturesPercent(commission=0.0002,margin=0.1, mult=10)
        cerebro.broker.addcommissioninfo(comm)
        # Set our desired cash start
        cerebro.broker.setcash(100000.0)
    
        # Print out the starting conditions
        # print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    
        # Run over everything
        cerebro.run()
    
        # Print out the final result
        # print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
        end_time=time.time()
        # print(f"PandasDirectData total_use time is {end_time-begin_time}")
        return end_time-begin_time
    
    def run_pickle():
        begin_time=time.time()
        # Create a cerebro entity
        cerebro = bt.Cerebro()
    
        # Add a strategy
        cerebro.addstrategy(YunStrategy)
    
        # Datas are in a subfolder of the samples. Need to find where the script is
        # because it could have been called from anywhere
        with open("E:/test_backtrader_load_data.pkl",'rb') as f:
            df=pickle.load(f)
        # df=df.astype('float')
        data = bt.feeds.PandasDirectData(dataname=df)
        # Add the Data Feed to Cerebro
        cerebro.adddata(data)
        comm=ComminfoFuturesPercent(commission=0.0002,margin=0.1, mult=10)
        cerebro.broker.addcommissioninfo(comm)
        # Set our desired cash start
        cerebro.broker.setcash(100000.0)
    
        # Print out the starting conditions
        # print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    
        # Run over everything
        cerebro.run()
    
        # Print out the final result
        # print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
        end_time=time.time()
        # print(f"PandasDirectData total_use time is {end_time-begin_time}")
        return end_time-begin_time
        
    if __name__ == '__main__':
        csvdata_list=[]
        PandasData_list=[]
        PandasDirectData_list=[]
        pickle_list=[]
        for i in range(100):
            csvdata_list.append(run_CSVData())
            PandasData_list.append(run_PandasData())
            PandasDirectData_list.append(run_PandasDirectData())
            pickle_list.append(run_pickle())
    
    import matplotlib.pyplot as plt
    plt.plot(range(100),csvdata_list,color='green',label='csv load data')
    plt.plot(range(100),PandasData_list,color='blue',label='PandasData load data')
    plt.plot(range(100),PandasDirectData_list,color='red',label='PandasDirectData load data')
    plt.plot(range(100),pickle_list,color='black',label='pickle df and load data')
    plt.legend(loc='best')  # 控制图片显示的位置
    plt.title("backtrader load data time")
    plt.xlabel("different trial")
    plt.ylabel("seconds")
    plt.show()
    


  • Wow, thanks for doing the hard work!


Log in to reply
 

});