85 lines
3.6 KiB
Python
85 lines
3.6 KiB
Python
from __future__ import annotations
|
||
from typing import Dict, List
|
||
import time, logging
|
||
import pandas as pd
|
||
import yfinance as yf
|
||
|
||
log = logging.getLogger("data")
|
||
|
||
def _normalize_ohlc(df: pd.DataFrame) -> pd.DataFrame:
|
||
"""Ujednolica kolumny do: Open, High, Low, Close, Volume. Obsługa lowercase, MultiIndex, Adj Close."""
|
||
if df is None or len(df) == 0:
|
||
return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])
|
||
|
||
df = df.copy()
|
||
|
||
# Flatten MultiIndex -> zwykłe nazwy
|
||
if isinstance(df.columns, pd.MultiIndex):
|
||
df.columns = [str(tuple(filter(None, map(str, c)))).strip("()").replace("'", "").replace(" ", "") if isinstance(c, tuple) else str(c) for c in df.columns]
|
||
# po flatten często nazwy są np. 'Close,EURUSD=X' – weź pierwszy człon przed przecinkiem
|
||
df.columns = [c.split(",")[0] for c in df.columns]
|
||
|
||
# Zrzuć TZ
|
||
if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None:
|
||
df.index = df.index.tz_localize(None)
|
||
|
||
# Ujednolicenie do TitleCase
|
||
norm = {c: str(c).strip() for c in df.columns}
|
||
# mapuj najczęstsze warianty
|
||
mapping = {}
|
||
for c in norm.values():
|
||
lc = c.lower()
|
||
if lc in ("open", "op", "o"): mapping[c] = "Open"
|
||
elif lc in ("high", "hi", "h"): mapping[c] = "High"
|
||
elif lc in ("low", "lo", "l"): mapping[c] = "Low"
|
||
elif lc in ("close", "cl", "c"): mapping[c] = "Close"
|
||
elif lc in ("adj close","adjclose","adjustedclose"): mapping[c] = "Adj Close"
|
||
elif lc in ("volume","vol","v"): mapping[c] = "Volume"
|
||
else:
|
||
# zostaw jak jest (np. 'Dividends', 'Stock Splits')
|
||
mapping[c] = c
|
||
|
||
df.rename(columns=mapping, inplace=True)
|
||
|
||
# Jeśli brak Close, ale jest Adj Close -> użyj go
|
||
if "Close" not in df.columns and "Adj Close" in df.columns:
|
||
df["Close"] = df["Adj Close"]
|
||
|
||
# Upewnij się, że są wszystkie podstawowe kolumny (dodaj puste jeśli brak)
|
||
for need in ["Open","High","Low","Close","Volume"]:
|
||
if need not in df.columns:
|
||
df[need] = pd.NA
|
||
|
||
# Pozostaw tylko rdzeń (kolejność stała)
|
||
df = df[["Open","High","Low","Close","Volume"]]
|
||
return df
|
||
|
||
def _fetch_single(ticker: str, period: str, interval: str, tries: int = 3, sleep_s: float = 0.7) -> pd.DataFrame:
|
||
"""Pobierz OHLC dla 1 tickera (z retry) i znormalizuj kolumny."""
|
||
for i in range(1, tries + 1):
|
||
try:
|
||
log.info("Yahoo: get %s (try %d/%d) period=%s interval=%s", ticker, i, tries, period, interval)
|
||
df = yf.Ticker(ticker).history(period=period, interval=interval, auto_adjust=False, prepost=False)
|
||
df = _normalize_ohlc(df)
|
||
if len(df) == 0 or "Close" not in df.columns:
|
||
log.warning("Yahoo: %s -> EMPTY or no Close (cols=%s)", ticker, list(df.columns))
|
||
return df
|
||
except Exception as e:
|
||
log.warning("Yahoo: error %s (try %d/%d): %s", ticker, i, tries, e)
|
||
time.sleep(sleep_s)
|
||
# po niepowodzeniu zwróć pusty rdzeń
|
||
return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])
|
||
|
||
def fetch_batch(tickers: List[str], period: str, interval: str) -> Dict[str, pd.DataFrame]:
|
||
"""Pobierz paczkę danych dla listy tickerów."""
|
||
out: Dict[str, pd.DataFrame] = {}
|
||
for tk in tickers:
|
||
df = _fetch_single(tk, period, interval)
|
||
out[tk] = df
|
||
if len(df):
|
||
first = str(df.index[0]); last = str(df.index[-1])
|
||
else:
|
||
first = last = "-"
|
||
log.info("Yahoo: %s -> bars=%d %s..%s cols=%s", tk, len(df), first, last, list(df.columns))
|
||
return out
|