Hi, thanks for the interest,
currently only working with minutes.
I also decided to use actual time instead of a number of items in the buffer to avoid gaps in data
here are some concrete questions :
Is it ok, to have np.nan returned while the compression is not reached ?
Should I return some partial data at 9:35 if the data feed start date was 9:33 ?
any improvements welcomed.
import traceback
import numpy as np
import datetime as dt
from backtrader import TimeFrame
from backtrader.feed import DataBase
from backtrader import date2num
from sqlalchemy import create_engine
import u
class SqliteDATA(DataBase):
params = (
('database', None),
('symbol', 'XBTUSD'),
('tick_value', 0.01),
('timeframe ', TimeFrame.Minutes),
('compression', 1),
('fromdate', dt.datetime(1900, 1, 1)),
('todate', dt.datetime.max),
('echo', False)
)
def __init__(self):
self.started = False
self.last_line = None
try:
if self.p.echo:
print(f"from db: {self.p.database}")
self.engine = create_engine(self.p.database, echo=self.p.echo)
except Exception as ex:
print('Unable to open database {0} - {1}'.format(self.p.database, ex))
if self.p.timeframe == TimeFrame.Minutes:
self._timeframe = self.p.timeframe
else:
raise ValueError(f" timeframe : {self._timeframe} not handled")
self._dataname = '{0}'.format(self.p.symbol)
self.buffer = []
def start(self):
try:
self.conn = self.engine.connect()
sql_query = "SELECT `dt`,`o`,`l`,`h`,`c`,`v` FROM `stocks` WHERE `symbol` = '" + self.p.symbol + "' AND `dt` between '" + self.p.fromdate.strftime(
"%Y-%m-%d %H:%M:%S") + "' and '" + self.p.todate.strftime("%Y-%m-%d %H:%M:%S") + "' ORDER BY `dt` ASC"
if self.p.echo:
print(sql_query)
self.result = self.conn.execute(sql_query)
except Exception as ex:
print('Unable to read table stocks from database {0} - {1}'.format(self.p.database, ex))
return
def stop(self):
self.engine.dispose()
def compress(self, sdt):
# latest date parsed.
ret = [date2num(sdt)]
# first open
ret.append(float(self.buffer[0][1]))
# min low
ret.append(float(min([r[2] for r in self.buffer])))
# max high
ret.append(float(max([r[3] for r in self.buffer])))
# last closed
ret.append(float(self.buffer[-1][4]))
# sum volume
ret.append(int(sum([r[5] for r in self.buffer])))
# openinterested not processed
ret.append(-1)
return ret
def _load(self):
new_row = self.result.fetchone()
if new_row is None:
return False
if self.p.echo:
print(f"in", new_row)
sdt = dt.datetime.strptime(str(new_row[0]), '%Y-%m-%d %H:%M:%S.%f')
if self.started:
self.buffer.append(new_row)
# get minutes
m = dt.datetime.strftime(u.to_date(new_row[0]), "%M")
step = float(m) % float(self.p.compression)
if step == 0:
self.last_line = self.compress(sdt)
self.buffer = []
else:
self.last_line = [date2num(sdt), np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
self.started = True;
r = self.last_line
if self.p.echo:
print(f"buffered rows ", u.s_date(new_row[0]), r)
self.lines.datetime[0] = r[0]
self.lines.open[0] = r[1]
self.lines.low[0] = r[2]
self.lines.high[0] = r[3]
self.lines.close[0] = r[4]
self.lines.volume[0] = r[5]
self.lines.openinterest[0] = -1
return True
results for VNET from 9:30 to 9:39
in ('2020-11-02 09:30:00.000000', 23.0, 22.74, 23.0, 22.74, 4699.0)
#matches time but buffering not started.
buffered rows 2020-11-02 09:30:00 [737731.3958333334, nan, nan, nan, nan, nan, nan]
in ('2020-11-02 09:31:00.000000', 22.99, 22.95, 23.19, 23.18, 3721.0)
in ('2020-11-02 09:33:00.000000', 23.35, 22.99, 23.35, 23.09, 17426.0)
in ('2020-11-02 09:34:00.000000', 23.0, 22.93, 23.02, 23.02, 33806.0)
in ('2020-11-02 09:35:00.000000', 22.8599, 22.8599, 22.8599, 22.8599, 4524.0)
#computed with data from 9:31 to 9:35 included
buffered rows 2020-11-02 09:35:00 [737731.3993055555, 22.99, 22.8599, 23.35, 22.8599, 59477, -1]
in ('2020-11-02 09:37:00.000000', 22.95, 22.95, 22.95, 22.95, 111.0)
in ('2020-11-02 09:38:00.000000', 22.95, 22.95, 22.95, 22.95, 200.0)
in ('2020-11-02 09:39:00.000000', 22.82, 22.82, 22.82, 22.82, 100.0)