from __future__ import annotations from typing import Dict, List import time, logging import pandas as pd import yfinance as yf log = logging.getLogger("data") def _normalize_ohlc(df: pd.DataFrame) -> pd.DataFrame: """Ujednolica kolumny do: Open, High, Low, Close, Volume. Obsługa lowercase, MultiIndex, Adj Close.""" if df is None or len(df) == 0: return pd.DataFrame(columns=["Open","High","Low","Close","Volume"]) df = df.copy() # Flatten MultiIndex -> zwykłe nazwy if isinstance(df.columns, pd.MultiIndex): df.columns = [str(tuple(filter(None, map(str, c)))).strip("()").replace("'", "").replace(" ", "") if isinstance(c, tuple) else str(c) for c in df.columns] # po flatten często nazwy są np. 'Close,EURUSD=X' – weź pierwszy człon przed przecinkiem df.columns = [c.split(",")[0] for c in df.columns] # Zrzuć TZ if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None: df.index = df.index.tz_localize(None) # Ujednolicenie do TitleCase norm = {c: str(c).strip() for c in df.columns} # mapuj najczęstsze warianty mapping = {} for c in norm.values(): lc = c.lower() if lc in ("open", "op", "o"): mapping[c] = "Open" elif lc in ("high", "hi", "h"): mapping[c] = "High" elif lc in ("low", "lo", "l"): mapping[c] = "Low" elif lc in ("close", "cl", "c"): mapping[c] = "Close" elif lc in ("adj close","adjclose","adjustedclose"): mapping[c] = "Adj Close" elif lc in ("volume","vol","v"): mapping[c] = "Volume" else: # zostaw jak jest (np. 'Dividends', 'Stock Splits') mapping[c] = c df.rename(columns=mapping, inplace=True) # Jeśli brak Close, ale jest Adj Close -> użyj go if "Close" not in df.columns and "Adj Close" in df.columns: df["Close"] = df["Adj Close"] # Upewnij się, że są wszystkie podstawowe kolumny (dodaj puste jeśli brak) for need in ["Open","High","Low","Close","Volume"]: if need not in df.columns: df[need] = pd.NA # Pozostaw tylko rdzeń (kolejność stała) df = df[["Open","High","Low","Close","Volume"]] return df def _fetch_single(ticker: str, period: str, interval: str, tries: int = 3, sleep_s: float = 0.7) -> pd.DataFrame: """Pobierz OHLC dla 1 tickera (z retry) i znormalizuj kolumny.""" for i in range(1, tries + 1): try: log.info("Yahoo: get %s (try %d/%d) period=%s interval=%s", ticker, i, tries, period, interval) df = yf.Ticker(ticker).history(period=period, interval=interval, auto_adjust=False, prepost=False) df = _normalize_ohlc(df) if len(df) == 0 or "Close" not in df.columns: log.warning("Yahoo: %s -> EMPTY or no Close (cols=%s)", ticker, list(df.columns)) return df except Exception as e: log.warning("Yahoo: error %s (try %d/%d): %s", ticker, i, tries, e) time.sleep(sleep_s) # po niepowodzeniu zwróć pusty rdzeń return pd.DataFrame(columns=["Open","High","Low","Close","Volume"]) def fetch_batch(tickers: List[str], period: str, interval: str) -> Dict[str, pd.DataFrame]: """Pobierz paczkę danych dla listy tickerów.""" out: Dict[str, pd.DataFrame] = {} for tk in tickers: df = _fetch_single(tk, period, interval) out[tk] = df if len(df): first = str(df.index[0]); last = str(df.index[-1]) else: first = last = "-" log.info("Yahoo: %s -> bars=%d %s..%s cols=%s", tk, len(df), first, last, list(df.columns)) return out