Zero records while loading Data from CSV
-
Tried to use both GenericCSVData, and PandasData methods, both resulting zero records.
GenericCSVData
csv data aswhile checking len(data) it shows ZERO records
i tried to use PandasData method
definedand used this following code
I checked the dataframe it has the data as shown here but when i use length of the records it shows ZERO again,
Appreciated if anyone could let me know where the issue is...
-
@kk Please do not post the code as image. Mind the top of the page:
"For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/"
For better support it usually required to copy/past part/all of your code to test locally, which is not feasible to do with images.
-
@vladisld got it.... i will take care of that next time onwards
-
@vladisld here is the complete code (its same as Momentum Strategy Article)
import os
os.getcwd()
os.chdir("C:\KK\Data\sectors")from numpy import cumsum, log, polyfit, sqrt, std, subtract
import numpy as np
import backtrader as btdef Momentum_func(the_array):
r = np.log(the_array)
slope, _, rvalue, _, _ = linregress(np.arrange(len(r)), r)
annulized = (1 + slope) ** 252
return annulized * (rvalue ** 2)class Momentum(bt.ind.OperationN):
lines = ('trend',)
params = dict(period = 50)
func = Momentum_funcimport collections
class ClenowStrategy(bt.Strategy):
params = dict(
momentum = Momentum,
mom_period = 90,
movav = bt.ind.SMA,
idx_period = 200,
stock_period= 100,
reserve = 0.05,
selcperc = 0.01,
)def log(self, arg): print('{} {}'.format(self.datetime.date(), arg)) def __init__(self): self.inds = collections.defaultdict(dict) self.stocks = self.datas[1:] self.d_with_len = [] #for fixed size positioning self.selnum = int(len(self.datas) * self.p.selcperc) self.perctarget = (1.0 - self.p.reserve)/self.selnum self.add_timer( when = Timer.SESSION_START, weekdays = [self.p.rebal_weekday], weekcarry = True, ) self.idx_mav = self.p.movav(self.data0, period=self.p.idx_period) for d in self.stocks: self.inds[d]['mom'] = self.p.momentum(d, period = self.mom_period) self.inds[d]['mav'] = self.p.movav(d, period = self.stock_period) self.inds[d]['vol'] = self.p.volatr(d, period=self.p.vol_period) def prenext(self): self.d_with_len = [d for d in self.datas if len(d)] self.next() def nextstart(self): self.d_with_len = self.datas self(next) def next(self): notify_timer() def notify_timer(self, timer, when, *args, **kwargs): self.rankings = list(filter(lambda d: len(d) > self.p.stock_period, self.stocks)) self.rankings.sort(key=lambda d: self.inds[d]["mom"][0]) num_stocks = len(self.rankings) # sell stocks based on criteria for i, d in enumerate(self.rankings): if self.getposition(self.data).size: if i > num_stocks * self.p.selcperc or d < self.inds[d]["mav"]: self.close(d) if self.spy < self.idx_mav: return # buy stocks with remaining cash for i, d in enumerate(self.rankings[:int(num_stocks * self.p.selcperc)]): cash = self.broker.get_cash() value = self.broker.get_value() if cash <= 0: break if not self.getposition(self.data).size: size = value * self.perctarget / self.inds[d]["vol"] self.buy(d, size=size) def stop(self): print('Final Portfolio Value %.2f' %(cerebro.broker.getvalue()))
import backtrader.feeds as btfeed
class PandasData(btfeed.PandasData):
params = (
('dtformat', '%Y-%m-%d'),
('datetime', None),
('time', None),
('open', 'Open'),
('high', 'High'),
('low', 'Low'),
('close', 'Close'),
('volume', 'Volume'),
('openinterest', None),
)import pandas as pd
import datetimefromdate = datetime.datetime(2011, 1, 1)
todate = datetime.datetime(2021, 5, 31)if name == "main":
cerebro = bt.Cerebro(stdstats=False)
cerebro.broker.set_coc(True)''' spy = bt.feeds.YahooFinanceData(dataname='^NSEI', fromdate=fromdate, todate=todate, plot=False) cerebro.adddata(spy) # add S&P 500 Index ''' directory = "C:\\KK\\Data\\sectors" for filename in os.listdir(directory): datapath = os.path.join(directory, filename) dataframe = pd.read_csv(datapath, parse_dates=True, index_col=0) df = PandasData(dataname=dataframe) print('Stock: {} - Records {:.2f}'.format(filename, len(df))) if len(df) > 100: cerebro.adddata(df, name = filename) ''' for filename in os.listdir(directory): f = os.path.join(directory, filename) if(os.path.isfile(f)): data = bt.feeds.GenericCSVData( dataname = f, datetime = 0, time = -1, open = 1, high = 2, low = 3, close = 4, volume = 5, openinterest = -1, nullvalue = 0.0, dtformat = ('%d-%m-%Y'), fromdate = fromdate, todate = todate ) print('Stock: {} - Records {:.2f}'.format(f, len(data))) if len(data) > 100: cerebro.adddata(data, name = filename) ''' #set desired cash cerebro.broker.setcash(500000.0) cerebro.broker.setcommission(0.001) #set commission cerebro.addobserver(bt.observers.Value) cerebro.addanalyzer(bt.analyzers.SharpeRatio, riskfreerate=0.0) cerebro.addanalyzer(bt.analyzers.Returns) cerebro.addanalyzer(bt.analyzers.DrawDown) cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio') cerebro.addstrategy(ClenowStrategy) ''' stop = 5 cerebro.optstrategy(Strategy, #momentum_period=range(50, 300, 50), num_positions=range(1, 4)) ''' print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue()) results = cerebro.run()
-
@vladisld posted complete code, but looks like some issue the way i put the entire code, so it formed multiple blocks, sorry for it. Unable to delete it after posting
-
Inline code
import os os.getcwd() os.chdir("C:\\KK\\Data\\sectors") from numpy import cumsum, log, polyfit, sqrt, std, subtract import numpy as np import backtrader as bt def Momentum_func(the_array): r = np.log(the_array) slope, _, rvalue, _, _ = linregress(np.arrange(len(r)), r) annulized = (1 + slope) ** 252 return annulized * (rvalue ** 2) class Momentum(bt.ind.OperationN): lines = ('trend',) params = dict(period = 50) func = Momentum_func import collections class ClenowStrategy(bt.Strategy): params = dict( momentum = Momentum, mom_period = 90, movav = bt.ind.SMA, idx_period = 200, stock_period= 100, reserve = 0.05, selcperc = 0.01, ) def __init__(self): self.inds = collections.defaultdict(dict) self.stocks = self.datas[1:] self.d_with_len = [] #for fixed size positioning self.selnum = int(len(self.datas) * self.p.selcperc) self.perctarget = (1.0 - self.p.reserve)/self.selnum self.add_timer( when = Timer.SESSION_START, weekdays = [self.p.rebal_weekday], weekcarry = True, ) self.idx_mav = self.p.movav(self.data0, period=self.p.idx_period) for d in self.stocks: self.inds[d]['mom'] = self.p.momentum(d, period = self.mom_period) self.inds[d]['mav'] = self.p.movav(d, period = self.stock_period) self.inds[d]['vol'] = self.p.volatr(d, period=self.p.vol_period) def prenext(self): self.d_with_len = [d for d in self.datas if len(d)] self.next() def nextstart(self): self.d_with_len = self.datas self(next) def next(self): notify_timer() def notify_timer(self, timer, when, *args, **kwargs): self.rankings = list(filter(lambda d: len(d) > self.p.stock_period, self.stocks)) self.rankings.sort(key=lambda d: self.inds[d]["mom"][0]) num_stocks = len(self.rankings) # sell stocks based on criteria for i, d in enumerate(self.rankings): if self.getposition(self.data).size: if i > num_stocks * self.p.selcperc or d < self.inds[d]["mav"]: self.close(d) if self.spy < self.idx_mav: return # buy stocks with remaining cash for i, d in enumerate(self.rankings[:int(num_stocks * self.p.selcperc)]): cash = self.broker.get_cash() value = self.broker.get_value() if cash <= 0: break if not self.getposition(self.data).size: size = value * self.perctarget / self.inds[d]["vol"] self.buy(d, size=size) import backtrader.feeds as btfeed class PandasData(btfeed.PandasData): params = ( ('dtformat', '%Y-%m-%d'), ('datetime', None), ('time', None), ('open', 'Open'), ('high', 'High'), ('low', 'Low'), ('close', 'Close'), ('volume', 'Volume'), ('openinterest', None), ) import pandas as pd import datetime fromdate = datetime.datetime(2011, 1, 1) todate = datetime.datetime(2021, 5, 31) if __name__ == "__main__": cerebro = bt.Cerebro(stdstats=False) cerebro.broker.set_coc(True) spy = bt.feeds.YahooFinanceData(dataname='^NSEI', fromdate=fromdate, todate=todate, plot=False) cerebro.adddata(spy) # add S&P 500 Index directory = "C:\\KK\\Data\\sectors" for filename in os.listdir(directory): datapath = os.path.join(directory, filename) dataframe = pd.read_csv(datapath, parse_dates=True, index_col=0) df = PandasData(dataname=dataframe) print('Stock: {} - Records {:.2f}'.format(filename, len(df))) if len(df) > 100: cerebro.adddata(df, name = filename) ''' for filename in os.listdir(directory): f = os.path.join(directory, filename) if(os.path.isfile(f)): data = bt.feeds.GenericCSVData( dataname = f, datetime = 0, time = -1, open = 1, high = 2, low = 3, close = 4, volume = 5, openinterest = -1, nullvalue = 0.0, dtformat = ('%d-%m-%Y'), fromdate = fromdate, todate = todate ) print('Stock: {} - Records {:.2f}'.format(f, len(data))) if len(data) > 100: cerebro.adddata(data, name = filename) ''' #set desired cash cerebro.broker.setcash(500000.0) cerebro.broker.setcommission(0.001) #set commission cerebro.addobserver(bt.observers.Value) cerebro.addanalyzer(bt.analyzers.SharpeRatio, riskfreerate=0.0) cerebro.addanalyzer(bt.analyzers.Returns) cerebro.addanalyzer(bt.analyzers.DrawDown) cerebro.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio') cerebro.addstrategy(ClenowStrategy) ''' stop = 5 cerebro.optstrategy(Strategy, #momentum_period=range(50, 300, 50), num_positions=range(1, 4)) ''' print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue()) results = cerebro.run() print(f"Sharpe: {results[0].analyzers.sharperatio.get_analysis()['sharperatio']:.2f}") print(f"Norm. Annual Return: {results[0].analyzers.returns.get_analysis()['rnorm100']:.2f}%") print(f"Max Drawdown: {results[0].analyzers.drawdown.get_analysis()['max']['drawdown']:.2f}%")
-
@kk said in Zero records while loading Data from CSV:
data = bt.feeds.GenericCSVData(...
Few points:
-
AFAIU, creating the datafeed, being it GenericCSVData or PandasData will not load the actual data ( it may differ for other data feeds) . Data is loaded during the Cerebro engine run. It could be loaded all in once using 'preload' if Cerebro
preload
parameter was set to 'True' (default) or on the go. -
len(datafeed)
returns the length of the iterated part of the datafeed - even if the datafeed's data was fully pre-loaded. So even afterdatafeed.preload()
method is called - len(datafeed) will still return zero ( since it hasn't been iterated yet ) -
one may use
datafeed.buflen()
to get the actual number of loaded bars. Once againbuflen
method will return zero upon data feed creation - since data was not attempted to be loaded at this point - at least for CSV and Pandas data feeds.
Please correct me if you know/experience otherwise
-
-
remove index_col=.. from read_csv.
-
-
@vladisld you are right, data is not loaded at this point of time, so checking the length will always gives zero and data is not loading. i tried with buflen() still the issue same. Thanks for the help
-
So, what's the solution?
-
@asdasda load the data into pandas and check the condition of no. of records if it's more than required then load the data to bt.feeds