Navigation

    Backtrader Community

    • Register
    • Login
    • Search
    • Categories
    • Recent
    • Tags
    • Popular
    • Users
    • Groups
    • Search
    For code/output blocks: Use ``` (aka backtick or grave accent) in a single line before and after the block. See: http://commonmark.org/help/

    Error with script to download historical intraday data

    General Discussion
    2
    4
    42
    Loading More Posts
    • Oldest to Newest
    • Newest to Oldest
    • Most Votes
    Reply
    • Reply as topic
    Log in to reply
    This topic has been deleted. Only users with topic management privileges can see it.
    • Q
      quake004 last edited by

      I've written a script to download free intraday data. The API from which I'm downloading offers two years of data in csv files divided in 24 slices with 30 days divisions begging from the current day. So it's not full months. The problem I have is that after downloading all files for all stocks in all timeframes for a given slice I go to download the next slice and some files aren't being written correctly. Some files have the combined information from both time slices but others have only the headers.

      Here is the script, the error is probably around line 66.

      import os
      import io
      import math
      import time
      import os.path
      import sys
      import glob
      import asyncio
      import requests
      import traceback
      import urllib.request
      import logging as log
      import pandas as pd
      
      from time import sleep
      from random import randrange
      from datetime import datetime
      from typing import List, Dict, Tuple
      from pathlib import Path
      from proxybroker import Broker
      from itertools import cycle
      
      log.basicConfig(
          level=log.DEBUG,
          format=('%(asctime)s.%(msecs)03d:'
                  '%(levelname)s:'
                  '%(filename)s - %(module)s - %(funcName)s:\t'
                  '%(message)s'),
          datefmt='%Y-%m-%d %H:%M:%S',
          handlers=[
              log.FileHandler("debug.log"),
              log.StreamHandler()
          ]
      )
      
      apikey = 'XXXXXXXXXXXXXXX'
      delay = 2 + 0.001
      
      BASE_URL = 'https://www.alphavantage.co/'
      # To download the data in a subdirectory where the script is located
      modpath = os.path.dirname(os.path.abspath(sys.argv[0]))
      
      
      def download_previous_data(
          file: str,
          ticker: str,
          timeframe: str,
          _slice: str,
      ):
          global apikey, delay
          url = f'{BASE_URL}query?function=TIME_SERIES_INTRADAY_EXTENDED&symbol={ticker}&interval={timeframe}&slice={_slice}&apikey={apikey}&datatype=csv'
          log.info(f'Downloading {_slice} of {timeframe} for {ticker}...')
          try:
              while True:
                  t = time.process_time()
                  df = pd.read_csv(url).iloc[::-1]
                  elapsed_time = time.process_time() - t
                  if delay > elapsed_time:
                      sleep(delay - elapsed_time)
                  if len(df.columns) == 6:
                      break
                  sleep(1)
              if os.path.exists(file):
                  pd.read_csv(file).append(df).drop_duplicates().to_csv(file, index=False, encoding='utf-8-sig')
              else:
                  df.to_csv(file, index=False, mode='w', encoding='utf-8-sig')
          except Exception as e:
              log.info(f"Couldn't download data for {ticker} from {url}")
              log.error(e, exc_info=True)
      
      
      def get_tickers(filepath) -> List[str]:
          '''Get a list of all ticker symbols
          '''
          df = pd.read_csv(filepath)
          #tickers = df.loc[df['exchange'] == 'NYSE']['symbol'].tolist()
          tickers = df[(df.symbol == 'AN').idxmax():] \
              .loc[df['exchange'] == 'NYSE'] \
              .loc[df['assetType'] == 'Stock']['symbol'].tolist()
      
          return df, tickers
      
      
      def create_download_folders(timeframes: List[str]):
          for timeframe in timeframes:
              download_path = f'{modpath}/{timeframe}'
              #download_path = f'/media/user/Portable Drive/Trading/data/{timeframe}'
              Path(download_path).mkdir(parents=True, exist_ok=True)
      
      
      def use_stocks_from_file(filepath: str) -> List[str]:
          filepath = f'{modpath}/{filepath}'
          with open(filepath) as f:
              tickers = f.read().replace('\n', '').split(',')
          return tickers
      
      
      def get_data():
          filepath = f'{modpath}/stocks_alphavantage.csv'
          df, tickers = get_tickers(filepath)
          timeframes = ['1min', '5min', '15min', '30min', '60min']
      
          create_download_folders(timeframes)
      
          slices = ['year2month10',
                      'year2month9', 'year2month8', 'year2month7',
                      'year2month6', 'year2month5', 'year2month4',
                      'year2month3', 'year2month2', 'year2month1',
                      'year1month12', 'year1month11', 'year1month10',
                      'year1month9', 'year1month8', 'year1month7',
                      'year1month6', 'year1month5', 'year1month4',
                      'year1month3', 'year1month2', 'year1month1']
      
          for _slice in slices:
              for ticker in tickers:
                  if ticker not in df.values:
                      log.info(f'{ticker} not available. Skiping...')
                      continue
                  name = df.loc[df['symbol'] == ticker, 'name'].iat[0]
                  log.info(f'Downloading data for {ticker}: {name}...')
                  for timeframe in timeframes:
                      download_path = f'{modpath}/{timeframe}'
                      filepath = f'{download_path}/{ticker}.csv'
      
                      download_previous_data(filepath, ticker, timeframe, _slice)
      
      
      def main():
          get_data()
      
      
      if __name__ == '__main__':
          main()
      
      1 Reply Last reply Reply Quote 0
      • A
        ab_trader last edited by

        how to debug the code :)

        1 Reply Last reply Reply Quote 0
        • Q
          quake004 last edited by

          But how do I reproduce the bug if I don't know why or when it happens.

          1 Reply Last reply Reply Quote 0
          • A
            ab_trader last edited by

            you already have a script which has potential bug, now you need to figure out why it happens. use more logging or debugging tools, check what goes in, if data received correct, in other words check all links in the chain.

            1 Reply Last reply Reply Quote 0
            • 1 / 1
            • First post
              Last post
            Copyright © 2016, 2017, 2018 NodeBB Forums | Contributors
            $(document).ready(function () { app.coldLoad(); }); }