stock/data.py
2025-08-15 12:19:07 +02:00

85 lines
3.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
from typing import Dict, List
import time, logging
import pandas as pd
import yfinance as yf
log = logging.getLogger("data")
def _normalize_ohlc(df: pd.DataFrame) -> pd.DataFrame:
"""Ujednolica kolumny do: Open, High, Low, Close, Volume. Obsługa lowercase, MultiIndex, Adj Close."""
if df is None or len(df) == 0:
return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])
df = df.copy()
# Flatten MultiIndex -> zwykłe nazwy
if isinstance(df.columns, pd.MultiIndex):
df.columns = [str(tuple(filter(None, map(str, c)))).strip("()").replace("'", "").replace(" ", "") if isinstance(c, tuple) else str(c) for c in df.columns]
# po flatten często nazwy są np. 'Close,EURUSD=X' weź pierwszy człon przed przecinkiem
df.columns = [c.split(",")[0] for c in df.columns]
# Zrzuć TZ
if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None:
df.index = df.index.tz_localize(None)
# Ujednolicenie do TitleCase
norm = {c: str(c).strip() for c in df.columns}
# mapuj najczęstsze warianty
mapping = {}
for c in norm.values():
lc = c.lower()
if lc in ("open", "op", "o"): mapping[c] = "Open"
elif lc in ("high", "hi", "h"): mapping[c] = "High"
elif lc in ("low", "lo", "l"): mapping[c] = "Low"
elif lc in ("close", "cl", "c"): mapping[c] = "Close"
elif lc in ("adj close","adjclose","adjustedclose"): mapping[c] = "Adj Close"
elif lc in ("volume","vol","v"): mapping[c] = "Volume"
else:
# zostaw jak jest (np. 'Dividends', 'Stock Splits')
mapping[c] = c
df.rename(columns=mapping, inplace=True)
# Jeśli brak Close, ale jest Adj Close -> użyj go
if "Close" not in df.columns and "Adj Close" in df.columns:
df["Close"] = df["Adj Close"]
# Upewnij się, że są wszystkie podstawowe kolumny (dodaj puste jeśli brak)
for need in ["Open","High","Low","Close","Volume"]:
if need not in df.columns:
df[need] = pd.NA
# Pozostaw tylko rdzeń (kolejność stała)
df = df[["Open","High","Low","Close","Volume"]]
return df
def _fetch_single(ticker: str, period: str, interval: str, tries: int = 3, sleep_s: float = 0.7) -> pd.DataFrame:
"""Pobierz OHLC dla 1 tickera (z retry) i znormalizuj kolumny."""
for i in range(1, tries + 1):
try:
log.info("Yahoo: get %s (try %d/%d) period=%s interval=%s", ticker, i, tries, period, interval)
df = yf.Ticker(ticker).history(period=period, interval=interval, auto_adjust=False, prepost=False)
df = _normalize_ohlc(df)
if len(df) == 0 or "Close" not in df.columns:
log.warning("Yahoo: %s -> EMPTY or no Close (cols=%s)", ticker, list(df.columns))
return df
except Exception as e:
log.warning("Yahoo: error %s (try %d/%d): %s", ticker, i, tries, e)
time.sleep(sleep_s)
# po niepowodzeniu zwróć pusty rdzeń
return pd.DataFrame(columns=["Open","High","Low","Close","Volume"])
def fetch_batch(tickers: List[str], period: str, interval: str) -> Dict[str, pd.DataFrame]:
"""Pobierz paczkę danych dla listy tickerów."""
out: Dict[str, pd.DataFrame] = {}
for tk in tickers:
df = _fetch_single(tk, period, interval)
out[tk] = df
if len(df):
first = str(df.index[0]); last = str(df.index[-1])
else:
first = last = "-"
log.info("Yahoo: %s -> bars=%d %s..%s cols=%s", tk, len(df), first, last, list(df.columns))
return out