For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/
Why optstrategy not run the stratege in the 2nd circle
-
Here is my code. I can see "FactorStrategy next" log in the first run but none in the 2nd run. I tried other demos and found that "Next" method should be called in the 2nd run. Is there anything wrong with my code?
#!/usr/bin/env python3 # -*- coding: UTF-8 -*- from __future__ import (absolute_import, division, print_function, unicode_literals) from datetime import datetime from datetime import timedelta import backtrader as bt import warnings import math import numpy as np import pandas as pd from feed import tdx_feed from factor.factor import get_factors from enum import IntEnum, unique import alphalens import logging warnings.filterwarnings('ignore') logging.basicConfig(format="[%(asctime)s %(levelname)s %(thread)d %(filename)s:%(lineno)d] %(message)s", level=logging.DEBUG, ) @unique class Universe(IntEnum): HS300 = 0 ZZ500 = 1 ZZ800 = 2 ZZ1000 = 3 @unique class Period(IntEnum): LAST_WEEK = 1 LAST_MONTH = 2 LAST_HALF_YEAR = 3 @unique class Portfolio(IntEnum): PURE_LONG_1 = 1 PURE_LONG_5 = 5 LONG_SHORT_I = 3 LONG_SHORT_II = 4 @unique class FeeSlippage(IntEnum): NONE = 1 FEEONLY = 2 FEESLIPPAGE = 3 @unique class FilterStop(IntEnum): NO = 1 YES = 2 @unique class RebalancePeriod(IntEnum): ONE_WEEK = 1 class input_param: universe = 0 period = 0 portfolio = 0 feeSlippage = 1 filterStop = 0 rebancePeriod = 0 factors = [] customize_universe = [] modname = "" # fromdate = 0 # todate = 0 fromdate = None todate = None start_cash = 100000000.0 data_length = 365 code = '000300' setcode = 1 # factorCls = MyIndicator reblance_period = 1 group_size = 5 debug = True def set(self, js): self.universe = js["universe"] self.period = js["period"] self.portfolio = js["portfolio"] self.feeSlippage = js["feeSlippage"] self.filterStop = js["filterStop"] self.rebancePeriod = js["rebancePeriod"] def parse(self): # Get the dates from the args if self.period: fromdate = (datetime.today() - timedelta(days=180)).date() todate = datetime.today().date() elif self.fromdate: fromdate = datetime.strptime(self.fromdate, '%Y-%m-%d') todate = datetime.strptime(self.todate, '%Y-%m-%d') else: fromdate = (datetime.today() - timedelta(days=30)).date() todate = datetime.today().date() self.todate = todate.year * 10000 + todate.month * 100 + todate.day class FactorStrategy(bt.Strategy): ''' group factors ''' params = dict(period=5, # factorCls=MyIndicator, reblance_period=1, group_size=5, portfolio=Portfolio.LONG_SHORT_I, ) def __init__(self): self.headers = [d._name for d in self.datas] self.factor_data = pd.DataFrame() self.turnover = pd.DataFrame() logging.debug('FactorStrategy __init__') def log(self, txt): dt = self.datas[0].datetime.datetime(0) logging.debug(f'{dt.isoformat()}, {txt}') def get_quantile_index(self, portfolio): p_index, short_index = None, None if portfolio == Portfolio.PURE_LONG_1: p_index, short_index = 0, None elif portfolio == Portfolio.PURE_LONG_5: p_index, short_index = 4, None elif portfolio == Portfolio.LONG_SHORT_I: p_index, short_index = [0, 4], 0 elif portfolio == Portfolio.LONG_SHORT_II: p_index, short_index = [0, 4], 4 return p_index, short_index def get_factor_quantile(self, dt, p_index, column='factor_quantile'): if isinstance(p_index, int): p_index = [p_index] for index, row in dt.iterrows(): if row[column] not in p_index: dt.loc[index, column] = math.nan return dt.dropna(subset=[column]) def next(self): logging.debug('FactorStrategy next') self.get_pd_data() if len(self.data0) % self.p.reblance_period == 0: p_index, short_index = None, None if self.p.portfolio == Portfolio.PURE_LONG_1: p_index, short_index = 0, None elif self.p.portfolio == Portfolio.PURE_LONG_5: p_index, short_index = 4, None elif self.p.portfolio == Portfolio.LONG_SHORT_I: p_index, short_index = [0,4], 0 elif self.p.portfolio == Portfolio.LONG_SHORT_II: p_index, short_index = [0,4], 4 self.trade_portfolio(p_index, short_index) def get_pd_data(self): factor_row = [] price_row = [] tradeable_row = [] dt0 = bt.num2date(self.data0.datetime[0]) for d in self.datas: factor = d.l.factor[0] factor_row.append(factor) price = d.l.open[0] price_row.append(price) trade = d.l.tradeable[0] tradeable_row.append(trade) dates = list(np.array([dt0] * len(self.datas))) df_factor = pd.DataFrame(factor_row, index=[dates, self.headers], columns=["factor"]) df_factor.index.names = ["date", "asset"] df_quantile = pd.DataFrame(pd.qcut(factor_row, self.p.group_size, labels=False, duplicates='drop'), index=[dates, self.headers], columns=["factor_quantile"]) df_price = pd.DataFrame(price_row, index=[dates, self.headers], columns=["price"]) df_tradeable = pd.DataFrame(tradeable_row, index=[dates, self.headers], columns=["tradeable"]) df = pd.concat([df_factor, df_quantile, df_price, df_tradeable], axis=1) self.factor_data = self.factor_data.append(df) def trade_portfolio(self, p_index, short_index): """ trade portfolio every period :param p_index: :param short_index: :return: """ try: trade_count = 0 group_quantile_last = pd.DataFrame() dt0 = bt.num2date(self.data0.datetime[0]) dt_last = bt.num2date(self.data.datetime[-self.p.reblance_period]) dt = self.factor_data.loc[dt_last, :] group_quantile_last = self.get_factor_quantile(dt, p_index) except Exception as e: logging.error(str(e)) dt = self.factor_data.loc[dt0, :] group_quantile = self.get_factor_quantile(dt, p_index) # sell these holdings if group_quantile_last.size > 0: sell_hold = set(group_quantile_last.index) - set(group_quantile.index) for s in sell_hold: t = group_quantile_last.loc[s, :] if t.tradeable: self.close(s) trade_count = trade_count + 1 buy_target = list(set(group_quantile.index) - set(group_quantile_last.index)) logging.debug('close prev: %s', str(sell_hold)) else: buy_target = list(set(group_quantile.index)) # buy these portfolio can_trade = sum(group_quantile['tradeable'].loc[buy_target]) if can_trade > 0.5: value = self.broker.getcash() / can_trade for s in buy_target: t = group_quantile.loc[s, :] if t.tradeable and int(t.factor_quantile) != short_index: ss = int((value / 100) / t.price) * 100 self.buy(size=ss) trade_count = trade_count + 1 elif t.tradeable: ss = int((value / 100) / t.price) * 100 self.buy(size=-ss) trade_count = trade_count + 1 logging.debug('buy today: %s', str(buy_target)) else: self.log("can't trade this day") pass # compute turnover if group_quantile_last.size > 0: turnover = trade_count / group_quantile_last.size else: turnover = math.nan turnover_dt = pd.DataFrame(turnover, index=[dt0], columns=['turnover']) self.turnover = self.turnover.append(turnover_dt) def stop(self): self.value = self.broker.get_value() logging.debug("finnal value: %f", self.value) logging.debug("final factors:") logging.debug(self.factor_data) def get_code_result(results): ''' fetch all results from mysql one time. :param results: :return: map, key=code, value=list(record) ''' dict_code_result = {} c0 = '' start = end = 0 for i, r in enumerate(results): if c0 != r['GpCode']: if c0 != '': dict_code_result[c0] = results[start:end + 1] c0 = r['GpCode'] start = i dict_code_result[r['GpCode']] = [] else: end = i if c0 != '': dict_code_result[c0] = results[start:] # todo...make sure every code in result has the same length. return dict_code_result def filter_codes(dict_code_result): ''' :param dict_code_result: :return: ''' for key in list(dict_code_result.keys()): if len(dict_code_result[key]) < 3*250: del dict_code_result[key] return dict_code_result def get_benchmark_returns(bench): dt0 = bench[0]['close'] dt1 = bench[-1]['close'] return (dt1 - dt0) / dt0 # ignore ret = [i['close'] for i in bench] dt = [datetime.strptime(str(i['datetime']), "%Y%m%d") for i in bench] a = pd.DataFrame(ret, index=dt, columns=['returns']) return a def get_statistics(factor_data, turnover, net_profit_rate, bench_returns_rate, data_length, point_number=2): """ get statistics :param factor_data: :param turnover: :param net_profit_rate: :param bench_returns_rate: :param data_length: :param point_number: :return: """ annual_net_profit_rate = round(net_profit_rate / data_length * 365, point_number) alpha_returns_rate = round(net_profit_rate - bench_returns_rate, point_number) annual_alpha_returns_rate = round(alpha_returns_rate * 365 / data_length, point_number) ic = alphalens.performance.factor_information_coefficient(factor_data) IC = round(ic.mean()[0], point_number) IR = round((ic.mean() / ic.std())[0], point_number) return dict({ "annulReturn":annual_net_profit_rate, "alpha": annual_alpha_returns_rate, "turnover":round(turnover.mean(), point_number), "IC":IC, "IR":IR, }) def get_analysis(analyzer): if analyzer: pss = analyzer.get_analysis() # ps = [[k] + v[-2:] for k, v in iteritems(pss)] ps = [[k] + v for k, v in iter(pss.items())] cols = ps.pop(0) # headers are in the first entry positions = pd.DataFrame.from_records(ps[1:], columns=cols) positions.index = pd.to_datetime(positions['Datetime']) del positions['Datetime'] # positions.index = positions.index.tz_localize('UTC') positions = positions.stack() positions.index.set_names(['date', 'asset'], inplace=True) positions.columns = ['returns'] return positions return None def cb(strategies): for strategy in strategies: logging.debug(strategy.p.portfolio) logging.debug(strategy.value) logging.debug(strategy.factor_data) def run_once(args, factors, map_data, bench_returns_rate): logging.debug('run_once run_once run_once run_once') groups = factors.groupby("asset") cerebro = bt.Cerebro(optreturn=False, stdstats=False) for k, v in map_data.items(): f = groups.get_group(k) data = tdx_feed.TdxFeed( result=v, factors= f, plot=False, ) cerebro.adddata(data, name=k) if args.portfolio == 0: cerebro.optstrategy(FactorStrategy, group_size=args.group_size, reblance_period=args.reblance_period, portfolio=(Portfolio.PURE_LONG_1, Portfolio.PURE_LONG_5), ) elif args.portfolio == 1: cerebro.optstrategy(FactorStrategy, group_size=args.group_size, reblance_period=args.reblance_period, portfolio=(Portfolio.LONG_SHORT_I,), ) elif args.portfolio == 2: cerebro.optstrategy(FactorStrategy, group_size=args.group_size, reblance_period=args.reblance_period, portfolio=(Portfolio.LONG_SHORT_II,), ) else: logging.error("can't be here") cerebro.broker.setcash(args.start_cash) cerebro.broker.set_slippage_fixed(0.001) result_dict = dict() cerebro.optcallback(cb) # Execute cerebro.run(maxcpus=1) return result_dict def runstrat(input): args = input_param() args.set(input) args.parse() logging.debug('loading from pickle...') # for debug use only: import pickle with open("E:/code/src/tdx_factor/last_year.pickle", 'rb') as ff: map_data = pickle.load(ff) with open("E:/code/src/tdx_factor/000300_last_year.pickle", 'rb') as ff: bench = pickle.load(ff) bench_returns_rate = get_benchmark_returns(bench) logging.debug('bench_returns_rate %f', bench_returns_rate) factor_all = get_factors(map_data, bench) logging.debug(len(factor_all)) result_statistics = list() for k, v in factor_all.items(): statis = run_once(args, v, map_data, bench_returns_rate) statis["factorid"] = k[0] statis["factorName"] = k[1] result_statistics.append(statis) return result_statistics if __name__ == '__main__': input = {"universe": 0,"period": 1,"portfolio": 0,"feeSlippage": 1,"filterStop": 0,"rebancePeriod": 0,"factors": [],"customize_universe": [],"modname": "mod_pypf64.dll"} runstrat(input) logging.debug('===========================')
Here is the log:
[2021-03-23 18:14:02,057 DEBUG 5040 factor_trade.py:576] loading from pickle... [2021-03-23 18:14:02,195 DEBUG 5040 factor_trade.py:590] bench_returns_rate 0.338562 [2021-03-23 18:14:11,536 INFO 5040 utils.py:129] Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8. [2021-03-23 18:14:11,536 INFO 5040 utils.py:141] NumExpr defaulting to 8 threads. [2021-03-23 18:14:14,232 DEBUG 5040 factor_trade.py:592] 1 [2021-03-23 18:14:14,232 DEBUG 5040 factor_trade.py:452] run_once run_once run_once run_once [2021-03-23 18:14:16,579 DEBUG 5040 factor_trade.py:185] FactorStrategy __init__ [2021-03-23 18:14:16,583 DEBUG 5040 factor_trade.py:216] FactorStrategy next [2021-03-23 18:14:16,591 ERROR 5040 factor_trade.py:277] datetime.datetime(2020, 12, 31, 0, 0) [2021-03-23 18:14:16,597 DEBUG 5040 factor_trade.py:193] 2019-07-05T00:00:00, can't trade this day . . . [2021-03-23 18:14:24,715 DEBUG 5040 factor_trade.py:343] finnal value: 118570513.235212 [2021-03-23 18:14:24,715 DEBUG 5040 factor_trade.py:344] final factors: [2021-03-23 18:14:24,715 DEBUG 5040 factor_trade.py:345] factor factor_quantile price tradeable date asset 2019-07-05 000001 NaN NaN 13.730000 1.0 000002 NaN NaN 27.389999 1.0 000063 NaN NaN 33.090000 1.0 000066 NaN NaN 9.860000 1.0 000069 NaN NaN 7.170000 1.0 ... ... ... ... 2020-12-31 000768 1.514446 4.0 34.790001 1.0 000776 -0.080605 1.0 15.850000 1.0 000783 0.642726 2.0 8.210000 1.0 000786 0.607996 2.0 37.900002 1.0 000858 1.026616 3.0 288.010010 1.0 [10950 rows x 4 columns] [2021-03-23 18:14:24,725 DEBUG 5040 factor_trade.py:520] **************************************************************** [2021-03-23 18:14:24,725 DEBUG 5040 factor_trade.py:521] Portfolio.PURE_LONG_1 Dropped 16.4% entries from factor data: 16.4% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions). max_loss is 35.0%, not exceeded: OK! [2021-03-23 18:14:26,075 DEBUG 5040 factor_trade.py:531] {'annulReturn': 0.27, 'alpha': -0.22, 'turnover': 0.07, 'IC': 0.19, 'IR': 0.77} [2021-03-23 18:14:26,078 DEBUG 5040 factor_trade.py:185] FactorStrategy __init__ [2021-03-23 18:14:26,079 DEBUG 5040 factor_trade.py:343] finnal value: 100000000.000000 [2021-03-23 18:14:26,079 DEBUG 5040 factor_trade.py:344] final factors: [2021-03-23 18:14:26,079 DEBUG 5040 factor_trade.py:345] Empty DataFrame Columns: [] Index: [] [2021-03-23 18:14:26,080 DEBUG 5040 factor_trade.py:520] **************************************************************** [2021-03-23 18:14:26,080 DEBUG 5040 factor_trade.py:521] Portfolio.PURE_LONG_5
-
I found self.buflen() returns 0 in the 2nd run when call advance_peek method. buflen returns 202 in the first run(which exact contains 202 days). So why buflen returns 0 in the 2nd run? I will check...
-
d.advance_peek() returns inf in the function _runonce in cerebro.py. Strange! It returns a value in the first run but inf in the 2nd run.
def _runonce(self, runstrats): ''' Actual implementation of run in vector mode. Strategies are still invoked on a pseudo-event mode in which ``next`` is called for each data arrival ''' for strat in runstrats: strat._once() strat.reset() # strat called next by next - reset lines # The default once for strategies does nothing and therefore # has not moved forward all datas/indicators/observers that # were homed before calling once, Hence no "need" to do it # here again, because pointers are at 0 datas = sorted(self.datas, key=lambda x: (x._timeframe, x._compression)) while True: # Check next incoming date in the datas dts = [d.advance_peek() for d in datas] dt0 = min(dts) if dt0 == float('inf'): break # no data delivers anything
-
data feed issue. Change the feed can fix.