feat(run): Add script to execute LSTM predictions with specified parameters

parent 2c5cf557
"""
FAST + STABLE (no "too many open files")
========================================
LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict PREDICT_CASE (PyTorch)
LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict predict_case (PyTorch)
CV rules:
- Use ONLY the first N_TRAIN_CASES cases as train_pool (cases <= N_TRAIN_CASES)
- If N_TRAIN_CASES < 20 -> Leave-One-Out CV by CASE
- Use ONLY the first n_train_case cases as train_pool (cases <= n_train_case)
- If n_train_case < 20 -> Leave-One-Out CV by CASE
- Else -> 5-fold CV by CASE (KFold on case IDs)
After CV tuning:
- Train FINAL model on 100% train_pool (NO early stopping; keep best by TRAIN loss)
- Predict PREDICT_CASE (assumed not in train_pool) and save CSV with Force_RNN column
- Predict predict_case (assumed not in train_pool) and save CSV with Force_RNN column
Speed-ups:
- Vectorized window creation via sliding_window_view
......@@ -26,8 +26,8 @@ Stability fix:
from __future__ import annotations
import argparse
from typing import Dict, Tuple, List, Optional, cast
import os
import gc
import numpy as np
......@@ -56,59 +56,6 @@ torch.manual_seed(SEED)
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True
W = 2
B = 29
H = 30
if W == 2:
H = 30
elif W == 3:
H = 45
elif W == 5:
H = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
DATA_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
N_TRAIN_CASES = 0
PREDICT_CASE = 0
if W == 2:
N_TRAIN_CASES = 8
if B == 29:
PREDICT_CASE = 12
elif B == 34:
PREDICT_CASE = 11
else:
print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
elif W == 3:
N_TRAIN_CASES = 16
if B == 29:
PREDICT_CASE = 22
elif B == 34:
PREDICT_CASE = 21
else:
print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
elif W == 5:
N_TRAIN_CASES = 64
PREDICT_CASE = 13
else:
print("Warning: N_TRAIN_CASES and PREDICT_CASE not set for W != 2, 3, or 5")
N_TRIALS = 15
MAX_EPOCHS_TUNE = 40
PATIENCE_TUNE = 5
MAX_EPOCHS_FINAL = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile)
COMPILE_IN_CV = False
COMPILE_IN_FINAL = True
# -------------------------
# Utils
# -------------------------
......@@ -651,40 +598,103 @@ def sample_params(device: str = "cpu"):
# Main
# -------------------------
def main():
df = pd.read_csv(DATA_FILE)
"""
Main execution pipeline for LSTM train and predict hysteretic curves.
"""
# Configuration constants and bounds
parser = argparse.ArgumentParser()
parser.add_argument("--W", type=int, required=True)
parser.add_argument("--B", type=int, required=True)
parser.add_argument("--it", type=int, required=True)
args = parser.parse_args()
w_val = args.W
b_val = args.B
h_val = 30
if w_val == 2:
h_val = 30
elif w_val == 3:
h_val = 45
elif w_val == 5:
h_val = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv"
n_train_case = 0
predict_case = 0
if w_val == 2:
n_train_case = 8
if b_val == 29:
predict_case = 12
elif b_val == 34:
predict_case = 11
else:
print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
elif w_val == 3:
n_train_case = 16
if b_val == 29:
predict_case = 22
elif b_val == 34:
predict_case = 21
else:
print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
elif w_val == 5:
n_train_case = 64
predict_case = 13
else:
print("Warning: n_train_case and predict_case not set for W != 2, 3, or 5")
n_trials = 15
max_epachs_tune = 40
patience_tune = 5
max_epachs_final = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile)
compile_in_cv = False
compile_in_final = True
df = pd.read_csv(data_file)
thickness_cols = [c for c in df.columns if c.startswith("tw")]
feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
target_col = "Force"
all_cases = sorted(df["Case"].unique())
train_pool = [int(c) for c in all_cases if c <= N_TRAIN_CASES]
train_pool = [int(c) for c in all_cases if c <= n_train_case]
if int(PREDICT_CASE) in train_pool:
raise RuntimeError(f"PREDICT_CASE={PREDICT_CASE} is in train_pool. This must not happen.")
if int(predict_case) in train_pool:
raise RuntimeError(f"predict_case={predict_case} is in train_pool. This must not happen.")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("============================================================")
print("DATA")
print("Device:", device)
print("Predict (test) case:", PREDICT_CASE)
print("Predict (test) case:", predict_case)
print("Train pool cases (for CV):", train_pool)
print("n_cases total:", len(all_cases), "| train_pool:", len(train_pool))
print("torch.compile CV:", "ON" if (device == "cuda" and COMPILE_IN_CV) else "OFF")
print("torch.compile FINAL:", "ON" if (device == "cuda" and COMPILE_IN_FINAL) else "OFF")
print("torch.compile CV:", "ON" if (device == "cuda" and compile_in_cv) else "OFF")
print("torch.compile FINAL:", "ON" if (device == "cuda" and compile_in_final) else "OFF")
print("============================================================")
folds, strategy = make_case_folds(train_pool, seed=SEED, n_splits=5)
print(f"CV strategy: {strategy} | n_folds={len(folds)}")
# Cache cases needed (train_pool for CV, plus test case for prediction)
cases_to_cache_all = sorted(set(train_pool + [int(PREDICT_CASE)]))
cases_to_cache_all = sorted(set(train_pool + [int(predict_case)]))
# ---- CV Tuning ----
best = {"mean": np.inf, "std": np.inf, "params": None}
for t in range(1, N_TRIALS + 1):
for t in range(1, n_trials + 1):
params = sample_params(device=device)
# Separate window_size from model params (cached funcs must NOT receive window_size)
......@@ -717,11 +727,11 @@ def main():
feature_dim=feature_dim,
folds=folds,
params_model=params_model,
max_epochs=MAX_EPOCHS_TUNE,
patience=PATIENCE_TUNE,
max_epochs=max_epachs_tune,
patience=patience_tune,
grad_clip=1.0,
device=device,
compile_model=(COMPILE_IN_CV and device == "cuda"),
compile_model=(compile_in_cv and device == "cuda"),
)
if fold_rmses is None:
......@@ -784,22 +794,22 @@ def main():
cache_ws=cache_ws_best,
feature_dim=feature_dim,
train_cases=train_pool,
max_epochs=MAX_EPOCHS_FINAL,
max_epochs=max_epachs_final,
grad_clip=1.0,
device=device,
compile_model=(COMPILE_IN_FINAL and device == "cuda"),
compile_model=(compile_in_final and device == "cuda"),
**best_params_model
)
scaler_x, scaler_y = final_scalers
# ---- Predict PREDICT_CASE ----
df_test = df[df["Case"] == PREDICT_CASE].copy()
# ---- Predict predict_case ----
df_test = df[df["Case"] == predict_case].copy()
x_test, y_test, test_indices = cache_ws_best[int(PREDICT_CASE)]
x_test, y_test, test_indices = cache_ws_best[int(predict_case)]
if x_test.shape[0] == 0:
raise RuntimeError(
f"Case {PREDICT_CASE} has fewer points ({len(df_test)}) than window_size ({ws_best})."
f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})."
)
x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
......@@ -839,7 +849,7 @@ def main():
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
test_rmse = rmse(y_test, y_pred)
print(f"Test RMSE on Case {PREDICT_CASE} = {test_rmse:.4f}")
print(f"Test RMSE on Case {predict_case} = {test_rmse:.4f}")
# ---- Save results ----
df_test_pred = df_test.copy()
......@@ -847,7 +857,7 @@ def main():
for idx, pred in zip(test_indices, y_pred):
df_test_pred.loc[idx, "Force_RNN"] = pred
out_file = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/case_{PREDICT_CASE}_with_rnn_preds.csv"
out_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/case_{predict_case}_with_rnn_preds.csv"
df_test_pred.to_csv(out_file, index=False)
print(f"\nSaved predictions to: {out_file}")
......
#!/bin/bash
python predict_hysteretic_curves.py --W 2 --B 29
python predict_hysteretic_curves.py --W 2 --B 34
python predict_hysteretic_curves.py --W 3 --B 29
python predict_hysteretic_curves.py --W 3 --B 34
python predict_hysteretic_curves.py --W 5 --B 34
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment