|
|
|
|
@@ -31,6 +31,138 @@ except ImportError:
|
|
|
|
|
XGB_AVAILABLE = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_ffd_weights(d, threshold=1e-4, max_len=100):
|
|
|
|
|
"""
|
|
|
|
|
Computes binomial weights for fractional differentiation.
|
|
|
|
|
Ensures memory retention up to max_len bounds.
|
|
|
|
|
"""
|
|
|
|
|
w = [1.0]
|
|
|
|
|
for k in range(1, max_len):
|
|
|
|
|
w_k = -w[-1] / k * (d - k + 1)
|
|
|
|
|
if abs(w_k) < threshold:
|
|
|
|
|
break
|
|
|
|
|
w.append(w_k)
|
|
|
|
|
return np.array(w[::-1])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fractional_differentiation_ffd(series, d, threshold=1e-4):
|
|
|
|
|
"""
|
|
|
|
|
Applies Fixed-Width Fractional Differentiation (FFD) to a series.
|
|
|
|
|
Preserves memory retention bounds by establishing a fixed window size
|
|
|
|
|
over which the weights are computed and applied.
|
|
|
|
|
"""
|
|
|
|
|
weights = get_ffd_weights(d, threshold)
|
|
|
|
|
width = len(weights)
|
|
|
|
|
res = []
|
|
|
|
|
for i in range(width - 1, len(series)):
|
|
|
|
|
val = np.dot(series.iloc[i - width + 1:i + 1].values, weights)
|
|
|
|
|
res.append(val)
|
|
|
|
|
return pd.Series(res, index=series.index[width - 1:])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class KlaassenMSGJRGARCH:
|
|
|
|
|
"""
|
|
|
|
|
Stub for the discrete Markov-Switching GJR-GARCH model
|
|
|
|
|
incorporating Klaassen path consolidation.
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, n_regimes=3):
|
|
|
|
|
self.n_regimes = n_regimes
|
|
|
|
|
# Transition state matrix (Routing matrix)
|
|
|
|
|
# Row: from state (0=Low Vol, 1=Normal Vol, 2=High/Crisis Vol)
|
|
|
|
|
# Col: to state
|
|
|
|
|
self.transition_matrix = np.array([
|
|
|
|
|
[0.90, 0.08, 0.02], # Low Vol regime state transitions
|
|
|
|
|
[0.05, 0.85, 0.10], # Normal Vol regime state transitions
|
|
|
|
|
[0.01, 0.19, 0.80] # High Vol regime state transitions
|
|
|
|
|
])
|
|
|
|
|
|
|
|
|
|
def fit_regimes(self, returns):
|
|
|
|
|
"""
|
|
|
|
|
Consolidates multi-period conditional variance paths using Klaassen's
|
|
|
|
|
recursive expectations method over consolidated states.
|
|
|
|
|
Returns regime probability matrices and classified states.
|
|
|
|
|
"""
|
|
|
|
|
n_obs = len(returns)
|
|
|
|
|
# Seed regime probabilities initialized uniformly
|
|
|
|
|
regime_probs = np.ones((n_obs, self.n_regimes)) / self.n_regimes
|
|
|
|
|
|
|
|
|
|
# Simulating regime classification via transition routing logic
|
|
|
|
|
for t in range(1, n_obs):
|
|
|
|
|
# Prior state probabilities updated by routing matrix
|
|
|
|
|
prior = regime_probs[t-1] @ self.transition_matrix
|
|
|
|
|
# Dummy likelihoods based on rolling return variance
|
|
|
|
|
vol_proxy = abs(returns.iloc[t])
|
|
|
|
|
if vol_proxy < 0.01:
|
|
|
|
|
likelihood = np.array([0.8, 0.15, 0.05])
|
|
|
|
|
elif vol_proxy < 0.03:
|
|
|
|
|
likelihood = np.array([0.15, 0.7, 0.15])
|
|
|
|
|
else:
|
|
|
|
|
likelihood = np.array([0.05, 0.15, 0.8])
|
|
|
|
|
|
|
|
|
|
posterior = prior * likelihood
|
|
|
|
|
regime_probs[t] = posterior / (np.sum(posterior) + 1e-9)
|
|
|
|
|
|
|
|
|
|
states = np.argmax(regime_probs, axis=1)
|
|
|
|
|
return states, regime_probs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ULSIFDensityRatioEstimator:
|
|
|
|
|
"""
|
|
|
|
|
Unconstrained Least-Squares Importance Fitting (uLSIF)
|
|
|
|
|
density ratio estimator: w(x) = p(x) / q(x)
|
|
|
|
|
Used to counter covariate shift between training (p) and test (q) distributions.
|
|
|
|
|
"""
|
|
|
|
|
def __init__(self, kernel_sigma=1.0, regularization_lambda=0.1, n_centers=100):
|
|
|
|
|
self.kernel_sigma = kernel_sigma
|
|
|
|
|
self.regularization_lambda = regularization_lambda
|
|
|
|
|
self.n_centers = n_centers
|
|
|
|
|
self.weights = None
|
|
|
|
|
self.centers = None
|
|
|
|
|
|
|
|
|
|
def _gaussian_kernel(self, x, y):
|
|
|
|
|
# x shape: (n_samples_x, n_features), y shape: (n_samples_y, n_features)
|
|
|
|
|
# Distance matrix computed efficiently
|
|
|
|
|
sq_dist = np.sum((x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2, axis=-1)
|
|
|
|
|
return np.exp(-sq_dist / (2 * (self.kernel_sigma ** 2)))
|
|
|
|
|
|
|
|
|
|
def fit(self, x_train, x_test):
|
|
|
|
|
r"""
|
|
|
|
|
Computes the closed-form solution for the uLSIF coefficients (theta):
|
|
|
|
|
theta = (H + lambda * I) \ h
|
|
|
|
|
where H is the test data kernel matrix covariance, and h is the train data kernel vector.
|
|
|
|
|
"""
|
|
|
|
|
n_train = len(x_train)
|
|
|
|
|
n_test = len(x_test)
|
|
|
|
|
|
|
|
|
|
# Select kernel centers from training set
|
|
|
|
|
indices = np.random.choice(n_train, min(n_train, self.n_centers), replace=False)
|
|
|
|
|
self.centers = x_train[indices]
|
|
|
|
|
|
|
|
|
|
# Calculate kernels
|
|
|
|
|
phi_train = self._gaussian_kernel(x_train, self.centers) # (n_train, n_centers)
|
|
|
|
|
phi_test = self._gaussian_kernel(x_test, self.centers) # (n_test, n_centers)
|
|
|
|
|
|
|
|
|
|
# Compute H matrix (n_centers x n_centers)
|
|
|
|
|
H = (phi_test.T @ phi_test) / n_test
|
|
|
|
|
# Compute h vector (n_centers x 1)
|
|
|
|
|
h = np.mean(phi_train, axis=0)
|
|
|
|
|
|
|
|
|
|
# Solve for weights (theta) via regularized least squares
|
|
|
|
|
reg_matrix = self.regularization_lambda * np.eye(len(self.centers))
|
|
|
|
|
self.weights = np.linalg.solve(H + reg_matrix, h)
|
|
|
|
|
self.weights = np.maximum(0, self.weights) # non-negativity constraint
|
|
|
|
|
|
|
|
|
|
def estimate_ratio(self, x):
|
|
|
|
|
"""
|
|
|
|
|
Returns estimated density ratios w(x) for target features x.
|
|
|
|
|
"""
|
|
|
|
|
if self.weights is None or self.centers is None:
|
|
|
|
|
return np.ones(len(x))
|
|
|
|
|
phi = self._gaussian_kernel(x, self.centers)
|
|
|
|
|
return phi @ self.weights
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_stationary_features(df):
|
|
|
|
|
"""
|
|
|
|
|
Transforms raw OHLCV price history into an absolute stationary feature matrix.
|
|
|
|
|
@@ -41,6 +173,10 @@ def compute_stationary_features(df):
|
|
|
|
|
high = df['High']
|
|
|
|
|
low = df['Low']
|
|
|
|
|
|
|
|
|
|
# TODO: Integrate Fixed-Width Fractional Differentiation (FFD) based on memory retention bounds
|
|
|
|
|
# Example: features['close_ffd'] = fractional_differentiation_ffd(close, d=0.4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# 1. Log-Returns (1, 3, 7 days)
|
|
|
|
|
features['log_ret_1'] = np.log(close / close.shift(1))
|
|
|
|
|
features['log_ret_3'] = np.log(close / close.shift(3))
|
|
|
|
|
@@ -176,6 +312,17 @@ def train_and_forecast():
|
|
|
|
|
# Compute features
|
|
|
|
|
features = compute_stationary_features(df)
|
|
|
|
|
|
|
|
|
|
# --- Two-Stage Engine: Unsupervised Regime & Covariate Shift Checks (Placeholders) ---
|
|
|
|
|
try:
|
|
|
|
|
# 1. Unsupervised MS-GJR-GARCH Regime Classification
|
|
|
|
|
returns_vol = features['log_ret_1']
|
|
|
|
|
ms_garch = KlaassenMSGJRGARCH(n_regimes=3)
|
|
|
|
|
regimes, regime_probs = ms_garch.fit_regimes(returns_vol)
|
|
|
|
|
active_regime = regimes[-1]
|
|
|
|
|
print(f"Two-Stage Engine: Active Regime identified as {active_regime} (probs: {regime_probs[-1]})")
|
|
|
|
|
except Exception as regime_err:
|
|
|
|
|
print(f"Two-Stage Engine: Regime classification stub failed: {regime_err}")
|
|
|
|
|
|
|
|
|
|
# Horizons setup
|
|
|
|
|
horizons = {1: 'T1', 5: 'T5', 10: 'T10'}
|
|
|
|
|
estimators = {
|
|
|
|
|
@@ -219,6 +366,17 @@ def train_and_forecast():
|
|
|
|
|
X_test = features.iloc[[latest_idx]]
|
|
|
|
|
X_test_scaled = scaler.transform(X_test)
|
|
|
|
|
|
|
|
|
|
# 2. Covariate Shift Weighting via uLSIF (Unconstrained Least-Squares Importance Fitting)
|
|
|
|
|
try:
|
|
|
|
|
ulsif = ULSIFDensityRatioEstimator(kernel_sigma=1.0, regularization_lambda=0.1)
|
|
|
|
|
ulsif.fit(X_train_scaled, X_test_scaled)
|
|
|
|
|
sample_ratios = ulsif.estimate_ratio(X_train_scaled)
|
|
|
|
|
# Placeholder for importance-weighted learning:
|
|
|
|
|
# e.g., clf.fit(X_train_scaled, y_train, sample_weight=sample_ratios)
|
|
|
|
|
print(f"uLSIF Covariate Shift ({h_label}): Computed {len(sample_ratios)} density ratios. Range: [{sample_ratios.min():.4f}, {sample_ratios.max():.4f}]")
|
|
|
|
|
except Exception as ulsif_err:
|
|
|
|
|
print(f"uLSIF Density Ratio Estimation stub failed: {ulsif_err}")
|
|
|
|
|
|
|
|
|
|
# Feature selection gateway for SVM and MLP models (#ISSUE-025-CORE)
|
|
|
|
|
X_train_scaled_selected = X_train_scaled
|
|
|
|
|
X_test_scaled_selected = X_test_scaled
|
|
|
|
|
|