feat(run): Add script to execute LSTM predictions with specified parameters

parent 2c5cf557
""" """
FAST + STABLE (no "too many open files") FAST + STABLE (no "too many open files")
======================================== ========================================
LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict PREDICT_CASE (PyTorch) LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict predict_case (PyTorch)
CV rules: CV rules:
- Use ONLY the first N_TRAIN_CASES cases as train_pool (cases <= N_TRAIN_CASES) - Use ONLY the first n_train_case cases as train_pool (cases <= n_train_case)
- If N_TRAIN_CASES < 20 -> Leave-One-Out CV by CASE - If n_train_case < 20 -> Leave-One-Out CV by CASE
- Else -> 5-fold CV by CASE (KFold on case IDs) - Else -> 5-fold CV by CASE (KFold on case IDs)
After CV tuning: After CV tuning:
- Train FINAL model on 100% train_pool (NO early stopping; keep best by TRAIN loss) - Train FINAL model on 100% train_pool (NO early stopping; keep best by TRAIN loss)
- Predict PREDICT_CASE (assumed not in train_pool) and save CSV with Force_RNN column - Predict predict_case (assumed not in train_pool) and save CSV with Force_RNN column
Speed-ups: Speed-ups:
- Vectorized window creation via sliding_window_view - Vectorized window creation via sliding_window_view
...@@ -26,8 +26,8 @@ Stability fix: ...@@ -26,8 +26,8 @@ Stability fix:
from __future__ import annotations from __future__ import annotations
import argparse
from typing import Dict, Tuple, List, Optional, cast from typing import Dict, Tuple, List, Optional, cast
import os
import gc import gc
import numpy as np import numpy as np
...@@ -56,59 +56,6 @@ torch.manual_seed(SEED) ...@@ -56,59 +56,6 @@ torch.manual_seed(SEED)
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
W = 2
B = 29
H = 30
if W == 2:
H = 30
elif W == 3:
H = 45
elif W == 5:
H = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
DATA_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
N_TRAIN_CASES = 0
PREDICT_CASE = 0
if W == 2:
N_TRAIN_CASES = 8
if B == 29:
PREDICT_CASE = 12
elif B == 34:
PREDICT_CASE = 11
else:
print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
elif W == 3:
N_TRAIN_CASES = 16
if B == 29:
PREDICT_CASE = 22
elif B == 34:
PREDICT_CASE = 21
else:
print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
elif W == 5:
N_TRAIN_CASES = 64
PREDICT_CASE = 13
else:
print("Warning: N_TRAIN_CASES and PREDICT_CASE not set for W != 2, 3, or 5")
N_TRIALS = 15
MAX_EPOCHS_TUNE = 40
PATIENCE_TUNE = 5
MAX_EPOCHS_FINAL = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile)
COMPILE_IN_CV = False
COMPILE_IN_FINAL = True
# ------------------------- # -------------------------
# Utils # Utils
# ------------------------- # -------------------------
...@@ -651,40 +598,103 @@ def sample_params(device: str = "cpu"): ...@@ -651,40 +598,103 @@ def sample_params(device: str = "cpu"):
# Main # Main
# ------------------------- # -------------------------
def main(): def main():
df = pd.read_csv(DATA_FILE) """
Main execution pipeline for LSTM train and predict hysteretic curves.
"""
# Configuration constants and bounds
parser = argparse.ArgumentParser()
parser.add_argument("--W", type=int, required=True)
parser.add_argument("--B", type=int, required=True)
parser.add_argument("--it", type=int, required=True)
args = parser.parse_args()
w_val = args.W
b_val = args.B
h_val = 30
if w_val == 2:
h_val = 30
elif w_val == 3:
h_val = 45
elif w_val == 5:
h_val = 60
else:
print("Warning: H not set for W != 2, 3, or 5")
data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv"
n_train_case = 0
predict_case = 0
if w_val == 2:
n_train_case = 8
if b_val == 29:
predict_case = 12
elif b_val == 34:
predict_case = 11
else:
print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
elif w_val == 3:
n_train_case = 16
if b_val == 29:
predict_case = 22
elif b_val == 34:
predict_case = 21
else:
print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
elif w_val == 5:
n_train_case = 64
predict_case = 13
else:
print("Warning: n_train_case and predict_case not set for W != 2, 3, or 5")
n_trials = 15
max_epachs_tune = 40
patience_tune = 5
max_epachs_final = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile)
compile_in_cv = False
compile_in_final = True
df = pd.read_csv(data_file)
thickness_cols = [c for c in df.columns if c.startswith("tw")] thickness_cols = [c for c in df.columns if c.startswith("tw")]
feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
target_col = "Force" target_col = "Force"
all_cases = sorted(df["Case"].unique()) all_cases = sorted(df["Case"].unique())
train_pool = [int(c) for c in all_cases if c <= N_TRAIN_CASES] train_pool = [int(c) for c in all_cases if c <= n_train_case]
if int(PREDICT_CASE) in train_pool: if int(predict_case) in train_pool:
raise RuntimeError(f"PREDICT_CASE={PREDICT_CASE} is in train_pool. This must not happen.") raise RuntimeError(f"predict_case={predict_case} is in train_pool. This must not happen.")
device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" if torch.cuda.is_available() else "cpu"
print("============================================================") print("============================================================")
print("DATA") print("DATA")
print("Device:", device) print("Device:", device)
print("Predict (test) case:", PREDICT_CASE) print("Predict (test) case:", predict_case)
print("Train pool cases (for CV):", train_pool) print("Train pool cases (for CV):", train_pool)
print("n_cases total:", len(all_cases), "| train_pool:", len(train_pool)) print("n_cases total:", len(all_cases), "| train_pool:", len(train_pool))
print("torch.compile CV:", "ON" if (device == "cuda" and COMPILE_IN_CV) else "OFF") print("torch.compile CV:", "ON" if (device == "cuda" and compile_in_cv) else "OFF")
print("torch.compile FINAL:", "ON" if (device == "cuda" and COMPILE_IN_FINAL) else "OFF") print("torch.compile FINAL:", "ON" if (device == "cuda" and compile_in_final) else "OFF")
print("============================================================") print("============================================================")
folds, strategy = make_case_folds(train_pool, seed=SEED, n_splits=5) folds, strategy = make_case_folds(train_pool, seed=SEED, n_splits=5)
print(f"CV strategy: {strategy} | n_folds={len(folds)}") print(f"CV strategy: {strategy} | n_folds={len(folds)}")
# Cache cases needed (train_pool for CV, plus test case for prediction) # Cache cases needed (train_pool for CV, plus test case for prediction)
cases_to_cache_all = sorted(set(train_pool + [int(PREDICT_CASE)])) cases_to_cache_all = sorted(set(train_pool + [int(predict_case)]))
# ---- CV Tuning ---- # ---- CV Tuning ----
best = {"mean": np.inf, "std": np.inf, "params": None} best = {"mean": np.inf, "std": np.inf, "params": None}
for t in range(1, N_TRIALS + 1): for t in range(1, n_trials + 1):
params = sample_params(device=device) params = sample_params(device=device)
# Separate window_size from model params (cached funcs must NOT receive window_size) # Separate window_size from model params (cached funcs must NOT receive window_size)
...@@ -717,11 +727,11 @@ def main(): ...@@ -717,11 +727,11 @@ def main():
feature_dim=feature_dim, feature_dim=feature_dim,
folds=folds, folds=folds,
params_model=params_model, params_model=params_model,
max_epochs=MAX_EPOCHS_TUNE, max_epochs=max_epachs_tune,
patience=PATIENCE_TUNE, patience=patience_tune,
grad_clip=1.0, grad_clip=1.0,
device=device, device=device,
compile_model=(COMPILE_IN_CV and device == "cuda"), compile_model=(compile_in_cv and device == "cuda"),
) )
if fold_rmses is None: if fold_rmses is None:
...@@ -784,22 +794,22 @@ def main(): ...@@ -784,22 +794,22 @@ def main():
cache_ws=cache_ws_best, cache_ws=cache_ws_best,
feature_dim=feature_dim, feature_dim=feature_dim,
train_cases=train_pool, train_cases=train_pool,
max_epochs=MAX_EPOCHS_FINAL, max_epochs=max_epachs_final,
grad_clip=1.0, grad_clip=1.0,
device=device, device=device,
compile_model=(COMPILE_IN_FINAL and device == "cuda"), compile_model=(compile_in_final and device == "cuda"),
**best_params_model **best_params_model
) )
scaler_x, scaler_y = final_scalers scaler_x, scaler_y = final_scalers
# ---- Predict PREDICT_CASE ---- # ---- Predict predict_case ----
df_test = df[df["Case"] == PREDICT_CASE].copy() df_test = df[df["Case"] == predict_case].copy()
x_test, y_test, test_indices = cache_ws_best[int(PREDICT_CASE)] x_test, y_test, test_indices = cache_ws_best[int(predict_case)]
if x_test.shape[0] == 0: if x_test.shape[0] == 0:
raise RuntimeError( raise RuntimeError(
f"Case {PREDICT_CASE} has fewer points ({len(df_test)}) than window_size ({ws_best})." f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})."
) )
x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False) x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
...@@ -839,7 +849,7 @@ def main(): ...@@ -839,7 +849,7 @@ def main():
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel() y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()
test_rmse = rmse(y_test, y_pred) test_rmse = rmse(y_test, y_pred)
print(f"Test RMSE on Case {PREDICT_CASE} = {test_rmse:.4f}") print(f"Test RMSE on Case {predict_case} = {test_rmse:.4f}")
# ---- Save results ---- # ---- Save results ----
df_test_pred = df_test.copy() df_test_pred = df_test.copy()
...@@ -847,7 +857,7 @@ def main(): ...@@ -847,7 +857,7 @@ def main():
for idx, pred in zip(test_indices, y_pred): for idx, pred in zip(test_indices, y_pred):
df_test_pred.loc[idx, "Force_RNN"] = pred df_test_pred.loc[idx, "Force_RNN"] = pred
out_file = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/case_{PREDICT_CASE}_with_rnn_preds.csv" out_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/case_{predict_case}_with_rnn_preds.csv"
df_test_pred.to_csv(out_file, index=False) df_test_pred.to_csv(out_file, index=False)
print(f"\nSaved predictions to: {out_file}") print(f"\nSaved predictions to: {out_file}")
......
#!/bin/bash
python predict_hysteretic_curves.py --W 2 --B 29
python predict_hysteretic_curves.py --W 2 --B 34
python predict_hysteretic_curves.py --W 3 --B 29
python predict_hysteretic_curves.py --W 3 --B 34
python predict_hysteretic_curves.py --W 5 --B 34
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment