Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
remove dead code from features and labels add batches to hourly crypt…
…o data
  • Loading branch information
somefreestring committed Jan 13, 2020
commit 674d006ee0153b76e761ab32f496683be72c5445
1 change: 1 addition & 0 deletions pandas_ml_utils/datafetching/fetch_cryptocompare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# TODO need to be implemented analog fetch yahoo
50 changes: 46 additions & 4 deletions pandas_ml_utils/extern/cryptocompare.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
# slightly enhanced version of https://github.com/lagerfeuer/cryptocompare/blob/f940fab908a9b79ce9069cc6dea9cc5a3f2e2eee/cryptocompare/cryptocompare.py
import requests
import time

import datetime
import logging
import time

import requests

_log = logging.getLogger(__name__)


# API
URL_COIN_LIST = 'https://www.cryptocompare.com/api/data/coinlist/'
Expand All @@ -10,11 +16,15 @@
URL_PRICE_MULTI_FULL = 'https://min-api.cryptocompare.com/data/pricemultifull?fsyms={}&tsyms={}'
URL_HIST_PRICE = 'https://min-api.cryptocompare.com/data/pricehistorical?fsym={}&tsyms={}&ts={}&e={}'
URL_HIST_PRICE_DAY = 'https://min-api.cryptocompare.com/data/histoday?fsym={}&tsym={}&limit={}&allData={}'
URL_HIST_PRICE_HOUR = 'https://min-api.cryptocompare.com/data/histohour?fsym={}&tsym={}&limit={}'
URL_HIST_PRICE_HOUR = 'https://min-api.cryptocompare.com/data/histohour?fsym={}&tsym={}&limit={}&toTs={}'
URL_HIST_PRICE_MINUTE = 'https://min-api.cryptocompare.com/data/histominute?fsym={}&tsym={}&limit={}'
URL_AVG = 'https://min-api.cryptocompare.com/data/generateAvg?fsym={}&tsym={}&e={}'
URL_EXCHANGES = 'https://www.cryptocompare.com/api/data/exchanges'

# MAX
MAX_LIMIT = 2000
MAX_INT = 2**31 - 1

# FIELDS
PRICE = 'PRICE'
HIGH = 'HIGH24HOUR'
Expand All @@ -23,14 +33,18 @@
CHANGE = 'CHANGE24HOUR'
CHANGE_PERCENT = 'CHANGEPCT24HOUR'
MARKETCAP = 'MKTCAP'
DATA = 'Data'
TIME = 'time'

# DEFAULTS
CURR = 'USD'
LIMIT = 1440
###############################################################################


def query_cryptocompare(url,errorCheck=True):
try:
_log.debug(url)
response = requests.get(url).json()
except Exception as e:
print('Error getting coin information. %s' % str(e))
Expand All @@ -40,6 +54,7 @@ def query_cryptocompare(url,errorCheck=True):
return None
return response


def format_parameter(parameter):
if isinstance(parameter, list):
return ','.join(parameter)
Expand All @@ -48,13 +63,15 @@ def format_parameter(parameter):

###############################################################################


def get_coin_list(format=False):
response = query_cryptocompare(URL_COIN_LIST, False)['Data']
if format:
return list(response.keys())
else:
return response


# TODO: add option to filter json response according to a list of fields
def get_price(coin, curr=CURR, full=False):
if full:
Expand All @@ -66,12 +83,14 @@ def get_price(coin, curr=CURR, full=False):
else:
return query_cryptocompare(URL_PRICE.format(coin, format_parameter(curr)))


def get_historical_price(coin, curr=CURR, timestamp=time.time(), exchange='CCCAGG'):
if isinstance(timestamp, datetime.datetime):
timestamp = time.mktime(timestamp.timetuple())
return query_cryptocompare(URL_HIST_PRICE.format(coin, format_parameter(curr),
int(timestamp), format_parameter(exchange)))


def get_historical_price_day(coin, curr=CURR, limit=LIMIT):
all_data = "false"

Expand All @@ -81,17 +100,40 @@ def get_historical_price_day(coin, curr=CURR, limit=LIMIT):

return query_cryptocompare(URL_HIST_PRICE_DAY.format(coin, format_parameter(curr), limit, all_data))


def get_historical_price_hour(coin, curr=CURR, limit=LIMIT):
return query_cryptocompare(URL_HIST_PRICE_HOUR.format(coin, format_parameter(curr), limit))
if limit is None or limit > LIMIT:
_log.info("batch download < now")
data = query_cryptocompare(URL_HIST_PRICE_HOUR.format(coin, format_parameter(curr), MAX_LIMIT, MAX_INT))
batch = data

while True:
last_ts = batch[DATA][0][TIME]
_log.info(f"batch download < {last_ts}")
batch = query_cryptocompare(URL_HIST_PRICE_HOUR.format(coin, format_parameter(curr), MAX_LIMIT, last_ts - 1))
if batch is None:
return data
else:
data[DATA] += batch[DATA]
if len(batch) <= 0:
return data
elif limit is not None and len(data[DATA]) >= limit:
data[DATA] = data[DATA][:limit]
return data
else:
return query_cryptocompare(URL_HIST_PRICE_HOUR.format(coin, format_parameter(curr), limit, MAX_INT))


def get_historical_price_minute(coin, curr=CURR, limit=LIMIT):
return query_cryptocompare(URL_HIST_PRICE_MINUTE.format(coin, format_parameter(curr), limit))


def get_avg(coin, curr=CURR, exchange='CCCAGG'):
response = query_cryptocompare(URL_AVG.format(coin, curr, format_parameter(exchange)))
if response:
return response['RAW']


def get_exchanges():
response = query_cryptocompare(URL_EXCHANGES)
if response:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,6 @@ def with_kwargs(self, **kwargs):
copy.kwargs = {**self.kwargs, **kwargs}
return copy

def __getitem__(self, item):
if isinstance(item, tuple) and len(item) == 2:
return self.kwargs[item[0]] if item[0] in self.kwargs else item[1]
elif item in self.kwargs:
return self.kwargs[item]
else:
raise KeyError(f"key not found {item}")

def __repr__(self):
return f'FeaturesAndLabels({self.features},{self.labels},{self.targets},' \
f'{self.feature_lags},{self.feature_rescaling}{self.lag_smoothing}) ' \
Expand Down
20 changes: 18 additions & 2 deletions test/extern/test__cryptocompare.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,29 @@
import unittest
import logging
import pandas as pd
import pandas_ml_utils.extern.cryptocompare as cc

logger = logging.getLogger()
logger.setLevel(logging.DEBUG)


class TestCryptoCompare(unittest.TestCase):

def test_download(self):
@unittest.skip("only test if error, takes very long")
def test_download_day(self):
"""when"""
data = cc.get_historical_price_day("BTC", limit=None)["Data"]

"""then"""
print(f"\n{len(data)}")
self.assertTrue(len(data) > 2000)
self.assertTrue(len(data) > 2000)
self.assertEqual(pd.DataFrame(data).duplicated("time").astype(int).sum(), 0)

@unittest.skip("only test if error, takes very long")
def test_bach_load_hour(self):
data = cc.get_historical_price_hour("BTC", limit=None)["Data"]

"""then"""
print(f"\n{len(data)}")
self.assertTrue(len(data) > 300100)
self.assertEqual(pd.DataFrame(data).duplicated("time").astype(int).sum(), 0)