Load Stock Data from AlphaVantage

Ulf Hamster 3 min.
python alphavantage

Stocks in data analysis never gets old - The (false) hope to beat the market with some voodoo finance. Here is a way to get some daily adjusted data. Keep in mind that a model with daily, weekly, or more frequency, you need to adjust for jumps from corporate actions such as dividends, stock splits.

Load packages

import requests
from io import BytesIO, StringIO
import pandas as pd
import numpy as np
import csv
from itertools import zip_longest
import json

Manage the API Key

import pathlib
import configparser

def apikey_save(service, apikey, filename=None):
    if filename is None:
        filename = f"{pathlib.Path.home()}/.apikeystore"    
    config = configparser.ConfigParser()
    config[service] = {}
    config[service]['api-key'] = apikey
    with open(filename, 'w') as fileptr:
        config.write(fileptr)
    return None

def apikey_read(service, filename=None):
    if filename is None:
        filename = f"{pathlib.Path.home()}/.apikeystore"
    config = configparser.ConfigParser()
    config.read(filename)
    return config[service]['api-key']
#apikey_save("alphavantage", "mysuperdupersecretkey")

CSV as response

apikey = apikey_read("alphavantage")
ticker = "MSFT"
tstype = "TIME_SERIES_DAILY_ADJUSTED"
outsz = "compact"  # compact: last 100 days, full: all
dtype = "csv"
FILE_URL = f"https://www.alphavantage.co/query?function={tstype}&symbol={ticker}&apikey={apikey}&outputsize={outsz}&datatype={dtype}"
%time response = requests.get(FILE_URL)
CPU times: user 46.7 ms, sys: 11.8 ms, total: 58.5 ms
Wall time: 1.02 s

Store CSV file locally

Open a file and write the bytes into it.

#open("data123.csv", "wb").write(response.content)

Parse bytes response to DataFrame

%time df = pd.read_csv(BytesIO(response.content))
df.head()
CPU times: user 6.74 ms, sys: 807 µs, total: 7.54 ms
Wall time: 7.95 ms
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

Parse string response to DataFrame

%time df = pd.read_csv(StringIO(response.text))
df.head()
CPU times: user 4.54 ms, sys: 1.22 ms, total: 5.76 ms
Wall time: 5.8 ms
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

Parse to Numpy array

%time x = np.recfromcsv(StringIO(response.text), delimiter=',', encoding='utf-8')
CPU times: user 3.17 ms, sys: 204 µs, total: 3.37 ms
Wall time: 3.31 ms
x[:5]
rec.array([('2019-11-29', 152.1 , 152.3 , 151.28, 151.38, 151.38, 11977300, 0., 1.),
           ('2019-11-27', 152.33, 152.5 , 151.52, 152.32, 152.32, 15201293, 0., 1.),
           ('2019-11-26', 151.36, 152.42, 151.32, 152.03, 152.03, 24635100, 0., 1.),
           ('2019-11-25', 150.  , 151.35, 149.92, 151.23, 151.23, 22428585, 0., 1.),
           ('2019-11-22', 150.07, 150.3 , 148.82, 149.59, 149.59, 15841680, 0., 1.)],
          dtype=[('timestamp', '<U10'), ('open', '<f8'), ('high', '<f8'), ('low', '<f8'), ('close', '<f8'), ('adjusted_close', '<f8'), ('volume', '<i8'), ('dividend_amount', '<f8'), ('split_coefficient', '<f8')])

Extract columns (features) as list

%%time
rows = csv.reader(StringIO(response.text))
cols = list(zip_longest(*rows))
CPU times: user 984 µs, sys: 27 µs, total: 1.01 ms
Wall time: 1.02 ms
for col in cols:
    print(col[:4])
('timestamp', '2019-11-29', '2019-11-27', '2019-11-26')
('open', '152.1000', '152.3300', '151.3600')
('high', '152.3000', '152.5000', '152.4200')
('low', '151.2800', '151.5200', '151.3200')
('close', '151.3800', '152.3200', '152.0300')
('adjusted_close', '151.3800', '152.3200', '152.0300')
('volume', '11977300', '15201293', '24635100')
('dividend_amount', '0.0000', '0.0000', '0.0000')
('split_coefficient', '1.0000', '1.0000', '1.0000')

Extract rows (examples, observations) as list

%%time
rows = csv.reader(StringIO(response.text))
rows = list(rows)
CPU times: user 684 µs, sys: 77 µs, total: 761 µs
Wall time: 787 µs
rows[:3]
[['timestamp',
  'open',
  'high',
  'low',
  'close',
  'adjusted_close',
  'volume',
  'dividend_amount',
  'split_coefficient'],
 ['2019-11-29',
  '152.1000',
  '152.3000',
  '151.2800',
  '151.3800',
  '151.3800',
  '11977300',
  '0.0000',
  '1.0000'],
 ['2019-11-27',
  '152.3300',
  '152.5000',
  '151.5200',
  '152.3200',
  '152.3200',
  '15201293',
  '0.0000',
  '1.0000']]

JSON as response

apikey = apikey_read("alphavantage")
ticker = "MSFT"
tstype = "TIME_SERIES_DAILY_ADJUSTED"
outsz = "compact"  # compact: last 100 days, full: all
dtype = "json"
FILE_URL = f"https://www.alphavantage.co/query?function={tstype}&symbol={ticker}&apikey={apikey}&outputsize={outsz}&datatype={dtype}"
%time response = requests.get(FILE_URL)
CPU times: user 33.2 ms, sys: 5.63 ms, total: 38.9 ms
Wall time: 877 ms

Parse JSON as DataFrame

%%time
jsdat = json.load(BytesIO(response.content))
df = pd.DataFrame.from_dict(jsdat['Time Series (Daily)'], orient='index')
CPU times: user 4.76 ms, sys: 228 µs, total: 4.99 ms
Wall time: 4.9 ms
df.head()
.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

Links