compression
-
Hi,
I implemented the data feed for sqlite from here :
https://community.backtrader.com/topic/2853/sqlite-exampleI am confused where should I implement the compression bit.
I want to be able to have 2 lines datas[0] being 1 minutes and datas[1] being 5 minutes compression in a specific strategy.
Even when looking in the lib, I can't find where it's handled.
I have read these referred from the sqlite thread: https://www.backtrader.com/docu/datafeed-develop-general/datafeed-develop-general/ -
@yancoupro Please share what you have tried so far.
-
Hi, thanks for the interest,
currently only working with minutes.
I also decided to use actual time instead of a number of items in the buffer to avoid gaps in data
here are some concrete questions :Is it ok, to have np.nan returned while the compression is not reached ?
Should I return some partial data at 9:35 if the data feed start date was 9:33 ?
any improvements welcomed.
import traceback import numpy as np import datetime as dt from backtrader import TimeFrame from backtrader.feed import DataBase from backtrader import date2num from sqlalchemy import create_engine import u class SqliteDATA(DataBase): params = ( ('database', None), ('symbol', 'XBTUSD'), ('tick_value', 0.01), ('timeframe ', TimeFrame.Minutes), ('compression', 1), ('fromdate', dt.datetime(1900, 1, 1)), ('todate', dt.datetime.max), ('echo', False) ) def __init__(self): self.started = False self.last_line = None try: if self.p.echo: print(f"from db: {self.p.database}") self.engine = create_engine(self.p.database, echo=self.p.echo) except Exception as ex: print('Unable to open database {0} - {1}'.format(self.p.database, ex)) if self.p.timeframe == TimeFrame.Minutes: self._timeframe = self.p.timeframe else: raise ValueError(f" timeframe : {self._timeframe} not handled") self._dataname = '{0}'.format(self.p.symbol) self.buffer = [] def start(self): try: self.conn = self.engine.connect() sql_query = "SELECT `dt`,`o`,`l`,`h`,`c`,`v` FROM `stocks` WHERE `symbol` = '" + self.p.symbol + "' AND `dt` between '" + self.p.fromdate.strftime( "%Y-%m-%d %H:%M:%S") + "' and '" + self.p.todate.strftime("%Y-%m-%d %H:%M:%S") + "' ORDER BY `dt` ASC" if self.p.echo: print(sql_query) self.result = self.conn.execute(sql_query) except Exception as ex: print('Unable to read table stocks from database {0} - {1}'.format(self.p.database, ex)) return def stop(self): self.engine.dispose() def compress(self, sdt): # latest date parsed. ret = [date2num(sdt)] # first open ret.append(float(self.buffer[0][1])) # min low ret.append(float(min([r[2] for r in self.buffer]))) # max high ret.append(float(max([r[3] for r in self.buffer]))) # last closed ret.append(float(self.buffer[-1][4])) # sum volume ret.append(int(sum([r[5] for r in self.buffer]))) # openinterested not processed ret.append(-1) return ret def _load(self): new_row = self.result.fetchone() if new_row is None: return False if self.p.echo: print(f"in", new_row) sdt = dt.datetime.strptime(str(new_row[0]), '%Y-%m-%d %H:%M:%S.%f') if self.started: self.buffer.append(new_row) # get minutes m = dt.datetime.strftime(u.to_date(new_row[0]), "%M") step = float(m) % float(self.p.compression) if step == 0: self.last_line = self.compress(sdt) self.buffer = [] else: self.last_line = [date2num(sdt), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] self.started = True; r = self.last_line if self.p.echo: print(f"buffered rows ", u.s_date(new_row[0]), r) self.lines.datetime[0] = r[0] self.lines.open[0] = r[1] self.lines.low[0] = r[2] self.lines.high[0] = r[3] self.lines.close[0] = r[4] self.lines.volume[0] = r[5] self.lines.openinterest[0] = -1 return True
results for VNET from 9:30 to 9:39
in ('2020-11-02 09:30:00.000000', 23.0, 22.74, 23.0, 22.74, 4699.0) #matches time but buffering not started. buffered rows 2020-11-02 09:30:00 [737731.3958333334, nan, nan, nan, nan, nan, nan] in ('2020-11-02 09:31:00.000000', 22.99, 22.95, 23.19, 23.18, 3721.0) in ('2020-11-02 09:33:00.000000', 23.35, 22.99, 23.35, 23.09, 17426.0) in ('2020-11-02 09:34:00.000000', 23.0, 22.93, 23.02, 23.02, 33806.0) in ('2020-11-02 09:35:00.000000', 22.8599, 22.8599, 22.8599, 22.8599, 4524.0) #computed with data from 9:31 to 9:35 included buffered rows 2020-11-02 09:35:00 [737731.3993055555, 22.99, 22.8599, 23.35, 22.8599, 59477, -1] in ('2020-11-02 09:37:00.000000', 22.95, 22.95, 22.95, 22.95, 111.0) in ('2020-11-02 09:38:00.000000', 22.95, 22.95, 22.95, 22.95, 200.0) in ('2020-11-02 09:39:00.000000', 22.82, 22.82, 22.82, 22.82, 100.0)
-
It looks like you have everything you need. I don't believe backtrader starts until it has all of the data it needs, but I would just check where there is some delayed signal and see if any indicators are correspondingly delayed. Not sure what to add. It looks good.
That said, one thing to consider is just creating your minute data in datas[0] and then once the class is instatiated into datas[0], resample that data using the built in resampler. This might save you headaches.