Add LSTM and TCN models for predicting hysteretic curves

- Implemented LSTM model with enhanced training features including mixed precision, early stopping, and validation via DataLoader. - Introduced a new 1D Temporal Convolutional Network (TCN) model for predicting hysteretic curves. - Added functionality for creating sliding windows from the dataset for both training and testing. - Integrated scaling for features and targets using StandardScaler. - Enhanced data handling with explicit cleanup and memory management for GPU. - Updated main function to include model training and prediction for both LSTM and TCN. - Saved predictions to CSV files for further analysis.
parent cf040013
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
"""
LSTM tuning + retrain + predict for hysteretic curves (PyTorch)
VRAM PATCHES:
- Validation via DataLoader (no full x_val tensor on GPU)
- Test prediction via DataLoader (no full x_test tensor on GPU)
- AMP (mixed precision) on GPU using torch.amp.* (positional device arg)
- optimizer.zero_grad(set_to_none=True)
- explicit cleanup + empty_cache between trials
"""
from typing import cast
import gc
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
......@@ -6,214 +20,516 @@ import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# Configuration constants and bounds
W = 2 # Number of windows
B = 29 # Width identifier
# -------------------------
# Configuration
# -------------------------
SEED = 123
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True
W = 5
B = 34
H = 30
if W == 2:
H = 30 # Adjust height identifier for two window case
H = 30
elif W == 3:
H = 45 # Adjust height identifier for three window case
H = 45
elif W == 5:
H = 60 # Adjust height identifier for five window case
H = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
# ============================================================
# 1. Load dataset and separate Case 8
# ============================================================
DATA_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
DATA_FILE = \
f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
df = pd.read_csv(DATA_FILE)
# Identify thickness columns
thickness_cols = [c for c in df.columns if c.startswith("tw")]
feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
target_col = "Force"
# Split data: training on all cases except 8, testing on case 8
df_train = df[df["Case"] != 8].copy()
df_test = df[df["Case"] == 8].copy()
PREDICT_CASE = 0
if W == 2:
PREDICT_CASE = 2
elif W == 3:
PREDICT_CASE = 6
elif W == 5:
PREDICT_CASE = 13
else:
print("Warning: PREDICT_CASE not set for W != 2, 3, or 5")
print("Training cases:", df_train["Case"].unique())
print("Test case:", df_test["Case"].unique())
N_TRIALS = 25
MAX_EPOCHS_TUNE = 60
PATIENCE_TUNE = 8
# ============================================================
# 2. Create sliding windows per case
# ============================================================
MAX_EPOCHS_FINAL = 300
PATIENCE_FINAL = 20
WINDOW_SIZE = 20 # You can tune this
# -------------------------
# Utilities
# -------------------------
def rmse(y_true, y_pred): # type: ignore
y_true = np.asarray(y_true).ravel()
y_pred = np.asarray(y_pred).ravel()
return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
def make_windows_for_case(df_case, feature_cols, target_col, window_size):
"""Create sliding windows for one case (PyTorch-style)."""
def make_windows_for_case(df_case, feature_cols, target_col, window_size): # type: ignore
df_case = df_case.sort_index()
X = df_case[feature_cols].values
x = df_case[feature_cols].values
y = df_case[target_col].values
idx = df_case.index.to_numpy()
X_windows, y_windows, idx_windows = [], [], []
if len(df_case) < window_size:
return np.empty((0, window_size, len(feature_cols))), np.empty((0,)), np.array([])
x_windows, y_windows, idx_windows = [], [], []
for end in range(window_size - 1, len(df_case)):
start = end - window_size + 1
X_windows.append(X[start:end+1])
x_windows.append(x[start:end + 1])
y_windows.append(y[end])
idx_windows.append(idx[end])
return (
np.array(X_windows),
np.array(y_windows),
np.array(idx_windows)
)
# Build windows for all training cases
X_train_list = []
y_train_list = []
for case in sorted(df_train["Case"].unique()):
df_case = df_train[df_train["Case"] == case]
Xc, yc, _ = make_windows_for_case(df_case, feature_cols, target_col, WINDOW_SIZE)
if len(Xc) > 0:
X_train_list.append(Xc)
y_train_list.append(yc)
print(f"Case {case}: {Xc.shape[0]} windows")
X_train = np.concatenate(X_train_list, axis=0)
y_train = np.concatenate(y_train_list, axis=0)
# Windows for case 8
X_test, y_test, test_indices = make_windows_for_case(df_test, feature_cols, target_col, WINDOW_SIZE)
print("Test windows:", X_test.shape)
return np.array(x_windows), np.array(y_windows), np.array(idx_windows)
# ============================================================
# 3. Scaling features and targets
# ============================================================
n_features = X_train.shape[-1]
def build_windows(df_all, cases, feature_cols, target_col, window_size): # type: ignore
x_list, y_list = [], []
for case in sorted(cases):
df_case = df_all[df_all["Case"] == case]
xc, yc, _ = make_windows_for_case(df_case, feature_cols, target_col, window_size)
if len(xc) > 0:
x_list.append(xc)
y_list.append(yc)
if len(x_list) == 0:
return None, None
return np.concatenate(x_list, axis=0), np.concatenate(y_list, axis=0)
scaler_X = StandardScaler()
X_train_2d = X_train.reshape(-1, n_features)
scaler_X.fit(X_train_2d)
X_train_scaled = scaler_X.transform(X_train_2d).reshape(X_train.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, n_features)).reshape(X_test.shape)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).ravel()
# ============================================================
# 4. PyTorch dataset and dataloader
# ============================================================
class WindowDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X, dtype=torch.float32)
def __init__(self, x, y):
self.x = torch.tensor(x, dtype=torch.float32)
self.y = torch.tensor(y, dtype=torch.float32)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
return len(self.x)
train_ds = WindowDataset(X_train_scaled, y_train_scaled)
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
def __getitem__(self, idx): # type: ignore
return self.x[idx], self.y[idx]
# ============================================================
# 5. Define the LSTM model
# ============================================================
class LSTMRegressor(nn.Module):
def __init__(self, input_dim, hidden_dim=64, dense_dim=32):
def __init__(self, input_dim, hidden_dim=64, dense_dim=32, num_layers=1, dropout=0.0):
super().__init__()
self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
self.lstm = nn.LSTM(
input_dim,
hidden_dim,
num_layers=num_layers,
dropout=(dropout if num_layers > 1 else 0.0),
batch_first=True
)
self.fc1 = nn.Linear(hidden_dim, dense_dim)
self.fc2 = nn.Linear(dense_dim, 1)
self.relu = nn.ReLU()
self.drop = nn.Dropout(dropout)
def forward(self, x):
out, (h, c) = self.lstm(x)
_, (h, _) = self.lstm(x)
h_last = h[-1]
out = self.relu(self.fc1(h_last))
out = self.fc2(out)
z = self.drop(self.relu(self.fc1(h_last)))
out = self.fc2(z).squeeze(-1)
return out
model = LSTMRegressor(input_dim=n_features)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# ============================================================
# 6. Training loop with early stopping
# ============================================================
best_loss = np.inf
patience = 10
wait = 0
for epoch in range(200):
model.train()
epoch_losses = []
for X_batch, y_batch in train_loader:
optimizer.zero_grad()
preds = model(X_batch).squeeze()
loss = criterion(preds, y_batch)
loss.backward()
optimizer.step()
epoch_losses.append(loss.item())
avg_loss = np.mean(epoch_losses)
print(f"Epoch {epoch+1} | Train loss = {avg_loss:.6f}")
# Early stopping check
if avg_loss < best_loss:
best_loss = avg_loss
wait = 0
best_state = model.state_dict().copy()
def choose_val_cases(train_pool): # type: ignore
train_pool = sorted(train_pool)
if len(train_pool) < 20:
return [train_pool[-1]]
else:
wait += 1
if wait >= patience:
print("Early stopping triggered.")
break
# Restore best state
model.load_state_dict(best_state)
# ============================================================
# 7. Predict for Case 8
# ============================================================
model.eval()
with torch.no_grad():
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_pred_scaled = model(X_test_tensor).squeeze().numpy()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
return train_pool[-3:]
def _make_grad_scaler(use_amp: bool, device: str):
"""
Compatible across torch versions:
- Prefer torch.amp.GradScaler('cuda', ...) on GPU when available
- Fallback to torch.cuda.amp.GradScaler(...) if needed
"""
if not use_amp:
return None
# Try new API (positional device arg)
try:
return torch.amp.GradScaler("cuda", enabled=True)
except Exception:
# Fallback older cuda.amp
return torch.cuda.amp.GradScaler(enabled=True)
def _autocast_ctx(use_amp: bool, device: str):
"""
Returns the right autocast context manager for the torch version.
"""
if not use_amp:
return torch.no_grad() # dummy; we won't use it in training sections
try:
# Newer API (positional)
return torch.amp.autocast("cuda", enabled=True)
except Exception:
# Older API
return torch.cuda.amp.autocast(enabled=True)
def train_one_trial( # type: ignore
df_all,
feature_cols,
target_col,
train_cases,
val_cases,
window_size,
hidden_dim,
dense_dim,
num_layers,
dropout,
lr,
weight_decay,
batch_size,
huber_beta,
max_epochs,
patience,
grad_clip,
device,
):
x_train, y_train = build_windows(df_all, train_cases, feature_cols, target_col, window_size)
x_val, y_val = build_windows(df_all, val_cases, feature_cols, target_col, window_size)
if x_train is None or x_val is None:
return np.inf, None, None
n_features = x_train.shape[-1]
scaler_x = StandardScaler()
scaler_x.fit(x_train.reshape(-1, n_features))
x_train_scaled = scaler_x.transform(x_train.reshape(-1, n_features)).reshape(x_train.shape)
x_val_scaled = scaler_x.transform(x_val.reshape(-1, n_features)).reshape(x_val.shape)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
pin = (device == "cuda")
train_ds = WindowDataset(x_train_scaled, y_train_scaled)
val_ds = WindowDataset(x_val_scaled, np.zeros(len(x_val_scaled), dtype=np.float32))
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, pin_memory=pin)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, pin_memory=pin)
model = LSTMRegressor(
input_dim=n_features,
hidden_dim=hidden_dim,
dense_dim=dense_dim,
num_layers=num_layers,
dropout=dropout
).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
criterion = nn.SmoothL1Loss(beta=huber_beta)
use_amp = (device == "cuda")
scaler_amp = _make_grad_scaler(use_amp, device)
best_val_rmse = np.inf
best_state = None
best_scalers = None
wait = 0
for _ in range(max_epochs):
model.train()
for xb, yb in train_loader:
xb = xb.to(device, non_blocking=True)
yb = yb.to(device, non_blocking=True)
optimizer.zero_grad(set_to_none=True)
if use_amp and scaler_amp is not None:
# autocast + GradScaler
try:
with torch.amp.autocast("cuda", enabled=True):
pred = model(xb)
loss = criterion(pred, yb)
except Exception:
with torch.cuda.amp.autocast(enabled=True):
pred = model(xb)
loss = criterion(pred, yb)
scaler_amp.scale(loss).backward()
if grad_clip is not None:
scaler_amp.unscale_(optimizer)
torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
scaler_amp.step(optimizer)
scaler_amp.update()
else:
# CPU / no AMP
pred = model(xb)
loss = criterion(pred, yb)
loss.backward()
if grad_clip is not None:
torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
optimizer.step()
# Validation (BATCHED)
model.eval()
preds_scaled = []
with torch.no_grad():
for xb, _ in val_loader:
xb = xb.to(device, non_blocking=True)
if use_amp:
try:
with torch.amp.autocast("cuda", enabled=True):
pb = model(xb)
except Exception:
with torch.cuda.amp.autocast(enabled=True):
pb = model(xb)
else:
pb = model(xb)
preds_scaled.append(pb.detach().float().cpu().numpy())
y_val_pred_scaled = np.concatenate(preds_scaled, axis=0).ravel()
y_val_pred = scaler_y.inverse_transform(y_val_pred_scaled.reshape(-1, 1)).ravel()
val_rmse = rmse(y_val, y_val_pred)
if val_rmse < best_val_rmse:
best_val_rmse = val_rmse
best_state = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
best_scalers = (scaler_x, scaler_y)
wait = 0
else:
wait += 1
if wait >= patience:
break
# cleanup
del model, optimizer, criterion, train_loader, val_loader, train_ds, val_ds
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
return best_val_rmse, best_state, best_scalers
def sample_params(device: str = "cpu"):
window_size = int(np.random.choice([40, 60, 80]))
hidden_dim = int(np.random.choice([32, 64, 128, 256], p=[0.25, 0.35, 0.30, 0.10]))
dense_dim = int(np.random.choice([16, 32, 64, 128], p=[0.20, 0.40, 0.30, 0.10]))
num_layers = int(np.random.choice([1, 2, 3], p=[0.45, 0.40, 0.15]))
dropout = float(np.random.choice([0.0, 0.1, 0.2, 0.3]))
lr = float(10 ** np.random.uniform(-4.0, -2.6))
weight_decay = float(10 ** np.random.uniform(-6.0, -3.5))
huber_beta = float(np.random.choice([0.5, 1.0, 2.0]))
cost = window_size * hidden_dim * num_layers
if device == "cuda":
if cost >= 80 * 256 * 2:
batch_size = 16
elif cost >= 60 * 256 * 2 or cost >= 80 * 128 * 2:
batch_size = 32
else:
batch_size = int(np.random.choice([32, 64], p=[0.55, 0.45]))
if hidden_dim >= 256 and num_layers >= 2 and dense_dim > 64:
dense_dim = 64
else:
batch_size = int(np.random.choice([32, 64, 128]))
return {
"window_size": window_size,
"hidden_dim": hidden_dim,
"dense_dim": dense_dim,
"num_layers": num_layers,
"dropout": dropout,
"lr": lr,
"weight_decay": weight_decay,
"batch_size": batch_size,
"huber_beta": huber_beta
}
def main():
df = pd.read_csv(DATA_FILE)
thickness_cols = [c for c in df.columns if c.startswith("tw")]
feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
target_col = "Force"
all_cases = sorted(df["Case"].unique())
train_pool = [c for c in all_cases if c != PREDICT_CASE]
val_cases = choose_val_cases(train_pool)
train_cases = [c for c in train_pool if c not in val_cases]
print("============================================================")
print("DATA")
print("Predict (test) case:", PREDICT_CASE)
print("Train pool cases:", train_pool)
print("Validation cases:", val_cases)
print("Train cases:", train_cases)
print("n_cases total:", len(all_cases), "| train_pool:", len(train_pool))
print("============================================================")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)
best = {"rmse": np.inf, "params": None, "state": None, "scalers": None}
for t in range(1, N_TRIALS + 1):
params = sample_params(device=device)
try:
val_rmse, best_state, best_scalers = train_one_trial(
df_all=df,
feature_cols=feature_cols,
target_col=target_col,
train_cases=train_cases,
val_cases=val_cases,
max_epochs=MAX_EPOCHS_TUNE,
patience=PATIENCE_TUNE,
grad_clip=1.0,
device=device,
**params
)
except torch.cuda.OutOfMemoryError:
print(f"[Trial {t:02d}] OOM -> skipping trial | {params}")
if torch.cuda.is_available():
torch.cuda.empty_cache()
continue
print(f"[Trial {t:02d}] val RMSE={val_rmse:.4f} | {params}")
if val_rmse < best["rmse"]:
best.update(rmse=val_rmse, params=params, state=best_state, scalers=best_scalers)
del best_state, best_scalers
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
print("\n====================")
print("BEST (TUNING)")
print("val RMSE:", best["rmse"])
print("params:", best["params"])
print("====================\n")
if best["params"] is None:
raise RuntimeError("No valid trial found. Try lowering window sizes or check your data.")
best_trial_params = cast(dict, best["params"])
train_pool = [c for c in all_cases if c != PREDICT_CASE]
final_val_cases = choose_val_cases(train_pool)
final_train_cases = [c for c in train_pool if c not in final_val_cases]
print("============================================================")
print("FINAL RETRAIN")
print("Predict (test) case:", PREDICT_CASE)
print("Final validation cases:", final_val_cases)
print("Final train cases:", final_train_cases)
print("Using best parameters:", best["params"])
print("============================================================")
best_params_dict = dict(best_trial_params)
final_val_rmse, final_state, final_scalers = train_one_trial(
df_all=df,
feature_cols=feature_cols,
target_col=target_col,
train_cases=final_train_cases,
val_cases=final_val_cases,
max_epochs=MAX_EPOCHS_FINAL,
patience=PATIENCE_FINAL,
grad_clip=1.0,
device=device,
**best_params_dict
)
print("Predictions for Case 8:", y_pred[:10])
print(f"Final retrain val RMSE = {final_val_rmse:.4f}")
# ============================================================
# 8. Insert predictions back into df_test
# ============================================================
if final_scalers is None or final_state is None:
raise RuntimeError("Final training failed unexpectedly.")
df_test_pred = df_test.copy()
df_test_pred["Force_RNN"] = np.nan
scaler_x, scaler_y = final_scalers
for idx, pred in zip(test_indices, y_pred):
df_test_pred.loc[idx, "Force_RNN"] = pred
df_test = df[df["Case"] == PREDICT_CASE].copy()
OUT_FILE = "/home/cimne/MLCivilEng/RESILINK/data/width_optimization/2W/H30_B29/case_8_with_rnn_predictions_pytorch.csv"
df_test_pred.to_csv(OUT_FILE, index=False)
window_size_test = best_params_dict["window_size"]
x_test, y_test, test_indices = make_windows_for_case(
df_test, feature_cols, target_col, window_size_test
)
print(f"\nSaved predictions to {OUT_FILE}")
if x_test.shape[0] == 0:
raise RuntimeError(
f"Case {PREDICT_CASE} has fewer points ({len(df_test)}) than "
f"window_size ({window_size_test})."
)
n_features = x_test.shape[-1]
x_test_scaled = scaler_x.transform(x_test.reshape(-1, n_features)).reshape(x_test.shape)
final_model = LSTMRegressor(
input_dim=n_features,
hidden_dim=best_params_dict["hidden_dim"],
dense_dim=best_params_dict["dense_dim"],
num_layers=best_params_dict["num_layers"],
dropout=best_params_dict["dropout"]
).to(device)
final_model.load_state_dict(final_state)
final_model.eval()
pin = (device == "cuda")
test_ds = WindowDataset(x_test_scaled, np.zeros(len(x_test_scaled), dtype=np.float32))
test_loader = DataLoader(test_ds, batch_size=best_params_dict["batch_size"], shuffle=False, pin_memory=pin)
use_amp = (device == "cuda")
preds = []
with torch.no_grad():
for xb, _ in test_loader:
xb = xb.to(device, non_blocking=True)
if use_amp:
try:
with torch.amp.autocast("cuda", enabled=True):
pb = final_model(xb)
except Exception:
with torch.cuda.amp.autocast(enabled=True):
pb = final_model(xb)
else:
pb = final_model(xb)
preds.append(pb.detach().float().cpu().numpy())
y_pred_scaled = np.concatenate(preds, axis=0).ravel()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
test_rmse = rmse(y_test, y_pred)
print(f"Test RMSE on Case {PREDICT_CASE} = {test_rmse:.4f}")
df_test_pred = df_test.copy()
df_test_pred["Force_RNN"] = np.nan
for idx, pred in zip(test_indices, y_pred):
df_test_pred.loc[idx, "Force_RNN"] = pred
out_file = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/case_{PREDICT_CASE}_with_rnn_preds.csv"
df_test_pred.to_csv(out_file, index=False)
print(f"\nSaved predictions to: {out_file}")
del final_model, test_loader, test_ds
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
if __name__ == "__main__":
main()
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
# Si quieres weight_norm moderno (sin warning):
from torch.nn.utils.parametrizations import weight_norm
# ============================================================
# 0) Config
# ============================================================
SEED = 123
np.random.seed(SEED)
torch.manual_seed(SEED)
# --- Geometry identifiers (as in your workflow) ---
W = 2 # Number of windows
B = 29 # Width identifier
H = 30
if W == 2:
H = 30
elif W == 3:
H = 45
elif W == 5:
H = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
DATA_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
# Choose test case (held out completely)
PREDICT_CASE = 0
if W == 2:
PREDICT_CASE = 2
elif W == 3:
PREDICT_CASE = 6
elif W == 5:
PREDICT_CASE = 13
else:
print("Warning: PREDICT_CASE not set for W != 2, 3, or 5")
WINDOW_SIZE = 60 # típicamente 40-80
BATCH_SIZE = 128
LR = 1e-3
WEIGHT_DECAY = 1e-4
MAX_EPOCHS = 200
PATIENCE = 15
GRAD_CLIP = 1.0
# Huber beta (puedes probar 0.5–2.0)
HUBER_BETA = 1.0
# Activar/desactivar weight_norm fácilmente (si quieres simplificar)
USE_WEIGHT_NORM = True
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)
# ============================================================
# Utils
# ============================================================
def rmse(y_true, y_pred):
y_true = np.asarray(y_true).ravel()
y_pred = np.asarray(y_pred).ravel()
return float(np.sqrt(np.mean((y_true - y_pred) ** 2)))
def choose_val_cases(train_pool):
"""<20 -> 1 caso fijo, >=20 -> 3 casos fijos."""
train_pool = sorted(train_pool)
if len(train_pool) < 20:
return [train_pool[-1]]
else:
return train_pool[-3:]
# ============================================================
# 1) Data + features
# ============================================================
df = pd.read_csv(DATA_FILE)
thickness_cols = [c for c in df.columns if c.startswith("tw")]
feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
TARGET_COL = "Force"
df_train_all = df[df["Case"] != PREDICT_CASE].copy()
df_test = df[df["Case"] == PREDICT_CASE].copy()
train_pool_cases = sorted(df_train_all["Case"].unique())
val_cases = choose_val_cases(train_pool_cases)
train_cases = [c for c in train_pool_cases if c not in val_cases]
print("Train pool cases:", train_pool_cases)
print("Validation cases:", val_cases)
print("Final train cases:", train_cases)
print("Test case:", sorted(df_test["Case"].unique()))
# ============================================================
# 2) Sliding windows
# ============================================================
def make_windows_for_case(df_case, feature_cols, target_col, window_size):
df_case = df_case.sort_index()
X = df_case[feature_cols].values
y = df_case[target_col].values
idx = df_case.index.to_numpy()
if len(df_case) < window_size:
return np.empty((0, window_size, len(feature_cols))), np.empty((0,)), np.array([])
Xw, yw, iw = [], [], []
for end in range(window_size - 1, len(df_case)):
start = end - window_size + 1
Xw.append(X[start:end+1])
yw.append(y[end])
iw.append(idx[end])
return np.array(Xw), np.array(yw), np.array(iw)
def build_windows_for_cases(df_all, cases, feature_cols, target_col, window_size):
X_list, y_list = [], []
for case in sorted(cases):
df_case = df_all[df_all["Case"] == case]
Xc, yc, _ = make_windows_for_case(df_case, feature_cols, target_col, window_size)
if len(Xc) > 0:
X_list.append(Xc)
y_list.append(yc)
print(f"Case {case}: {Xc.shape[0]} windows")
else:
print(f"Case {case}: 0 windows (len<{window_size})")
if len(X_list) == 0:
return None, None
return np.concatenate(X_list, axis=0), np.concatenate(y_list, axis=0)
print("\n--- Building TRAIN windows ---")
X_train, y_train = build_windows_for_cases(
df_train_all, train_cases, feature_cols, TARGET_COL, WINDOW_SIZE)
print("\n--- Building VAL windows ---")
X_val, y_val = build_windows_for_cases(
df_train_all, val_cases, feature_cols, TARGET_COL, WINDOW_SIZE)
print("\n--- Building TEST windows ---")
X_test, y_test, test_indices = make_windows_for_case(
df_test, feature_cols, TARGET_COL, WINDOW_SIZE)
print("Test windows:", X_test.shape)
if X_train is None or X_val is None:
raise RuntimeError(
"No windows created for train/val. Check WINDOW_SIZE or data length per case."
)
# ============================================================
# 3) Scaling (fit only on TRAIN)
# ============================================================
n_features = X_train.shape[-1]
scaler_X = StandardScaler()
scaler_X.fit(X_train.reshape(-1, n_features))
X_train_scaled = scaler_X.transform(X_train.reshape(-1, n_features)).reshape(X_train.shape)
X_val_scaled = scaler_X.transform(X_val.reshape(-1, n_features)).reshape(X_val.shape)
X_test_scaled = scaler_X.transform(X_test.reshape(-1, n_features)).reshape(X_test.shape)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_val_scaled = scaler_y.transform(y_val.reshape(-1, 1)).ravel()
y_test_scaled = scaler_y.transform(y_test.reshape(-1, 1)).ravel()
# ============================================================
# 4) PyTorch Dataset
# ============================================================
class WindowDataset(Dataset):
def __init__(self, X, y):
self.X = torch.tensor(X, dtype=torch.float32)
self.y = torch.tensor(y, dtype=torch.float32)
def __len__(self):
return len(self.X)
def __getitem__(self, idx):
return self.X[idx], self.y[idx]
train_loader = DataLoader(WindowDataset(X_train_scaled, y_train_scaled),
batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(WindowDataset(X_val_scaled, y_val_scaled),
batch_size=BATCH_SIZE, shuffle=False)
# ============================================================
# 5) Deep Temporal CNN (TCN) - Residual Dilated Causal Conv
# ============================================================
class Chomp1d(nn.Module):
"""Remove padding on the right to keep causality and correct length."""
def __init__(self, chomp_size: int):
super().__init__()
self.chomp_size = chomp_size
def forward(self, x):
if self.chomp_size == 0:
return x
return x[:, :, :-self.chomp_size]
class TemporalBlock(nn.Module):
def __init__(self, in_ch, out_ch, kernel_size, dilation, dropout, use_weight_norm=True):
super().__init__()
padding = (kernel_size - 1) * dilation
conv1 = nn.Conv1d(in_ch, out_ch, kernel_size, padding=padding, dilation=dilation)
conv2 = nn.Conv1d(out_ch, out_ch, kernel_size, padding=padding, dilation=dilation)
if use_weight_norm:
conv1 = weight_norm(conv1)
conv2 = weight_norm(conv2)
self.conv1 = conv1
self.chomp1 = Chomp1d(padding)
self.relu1 = nn.ReLU()
self.drop1 = nn.Dropout(dropout)
self.conv2 = conv2
self.chomp2 = Chomp1d(padding)
self.relu2 = nn.ReLU()
self.drop2 = nn.Dropout(dropout)
self.downsample = nn.Conv1d(in_ch, out_ch, 1) if in_ch != out_ch else None
self.relu = nn.ReLU()
# init estable (funciona también con parametrizations)
nn.init.kaiming_normal_(self.conv1.weight)
nn.init.kaiming_normal_(self.conv2.weight)
if self.downsample is not None:
nn.init.kaiming_normal_(self.downsample.weight)
def forward(self, x):
out = self.drop1(self.relu1(self.chomp1(self.conv1(x))))
out = self.drop2(self.relu2(self.chomp2(self.conv2(out))))
res = x if self.downsample is None else self.downsample(x)
return self.relu(out + res)
class DeepTCNRegressor(nn.Module):
def __init__(self, n_features, channels=(64, 64, 128, 128, 256),
kernel_size=5, dropout=0.15, head_dim=128, use_weight_norm=True):
super().__init__()
layers = []
in_ch = n_features
dilation = 1
for out_ch in channels:
layers.append(
TemporalBlock(in_ch, out_ch, kernel_size, dilation=dilation,
dropout=dropout, use_weight_norm=use_weight_norm)
)
in_ch = out_ch
dilation *= 2
self.tcn = nn.Sequential(*layers)
self.pool = nn.AdaptiveAvgPool1d(1)
self.head = nn.Sequential(
nn.Linear(in_ch, head_dim),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(head_dim, 1)
)
def forward(self, x):
x = x.transpose(1, 2) # (N, L, F) -> (N, F, L)
z = self.tcn(x) # (N, C, L)
z = self.pool(z).squeeze(-1) # (N, C)
y = self.head(z).squeeze(-1) # (N,)
return y
model = DeepTCNRegressor(
n_features=n_features,
channels=(64, 64, 128, 128, 256),
kernel_size=5,
dropout=0.15,
head_dim=128,
use_weight_norm=USE_WEIGHT_NORM
).to(DEVICE)
criterion = nn.SmoothL1Loss(beta=HUBER_BETA) # Huber
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
# ============================================================
# 6) Training loop with early stopping on VAL RMSE (original units)
# ============================================================
best_val_rmse = np.inf
BEST_STATE = None
WAIT = 0
for epoch in range(MAX_EPOCHS):
# ---- train ----
model.train()
train_losses = []
for Xb, yb in train_loader:
Xb = Xb.to(DEVICE)
yb = yb.to(DEVICE)
optimizer.zero_grad()
pred = model(Xb)
loss = criterion(pred, yb)
loss.backward()
if GRAD_CLIP is not None:
torch.nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)
optimizer.step()
train_losses.append(loss.item())
avg_train_loss = float(np.mean(train_losses))
# ---- validate (RMSE in original units) ----
model.eval()
val_preds_scaled = []
val_true_scaled = []
with torch.no_grad():
for Xb, yb in val_loader:
Xb = Xb.to(DEVICE)
pred = model(Xb).detach().cpu().numpy().ravel()
val_preds_scaled.append(pred)
val_true_scaled.append(yb.detach().cpu().numpy().ravel())
val_preds_scaled = np.concatenate(val_preds_scaled, axis=0)
val_true_scaled = np.concatenate(val_true_scaled, axis=0)
val_pred = scaler_y.inverse_transform(val_preds_scaled.reshape(-1, 1)).ravel()
val_true = scaler_y.inverse_transform(val_true_scaled.reshape(-1, 1)).ravel()
val_rmse = rmse(val_true, val_pred)
print(f"Epoch {epoch+1:03d} | Train loss={avg_train_loss:.6f} | Val RMSE={val_rmse:.4f}")
# early stopping on val RMSE
if val_rmse < best_val_rmse:
best_val_rmse = val_rmse
BEST_STATE = {k: v.detach().cpu().clone() for k, v in model.state_dict().items()}
WAIT = 0
else:
WAIT += 1
if WAIT >= PATIENCE:
print("Early stopping triggered.")
break
model.load_state_dict(BEST_STATE)
print(f"\nBest Val RMSE: {best_val_rmse:.4f}")
# ============================================================
# 7) Predict test case
# ============================================================
model.eval()
with torch.no_grad():
X_test_t = torch.tensor(X_test_scaled, dtype=torch.float32).to(DEVICE)
y_pred_scaled = model(X_test_t).detach().cpu().numpy().ravel()
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
# Test RMSE (optional, but useful)
test_rmse = rmse(y_test, y_pred)
print(f"Test RMSE on Case {PREDICT_CASE}: {test_rmse:.4f}")
# ============================================================
# 8) Insert predictions back into df_test and save
# ============================================================
df_test_pred = df_test.copy()
df_test_pred["Force_TCN"] = np.nan
for idx, pred in zip(test_indices, y_pred):
df_test_pred.loc[idx, "Force_TCN"] = pred
OUT_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/case_{PREDICT_CASE}_with_tcn_preds.csv"
df_test_pred.to_csv(OUT_FILE, index=False)
print(f"\nSaved predictions to {OUT_FILE}")
print("First predictions:", y_pred[:10])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment