feat(run): Add script to execute LSTM predictions with specified parameters

0dd8cedc · Joaquín Irazábal González · 2c5cf557 · 0dd8cedc · 0dd8cedc
Commit 0dd8cedc authored Feb 16, 2026 by Joaquín Irazábal González
Hide whitespace changes
Inline Side-by-side

Showing with 94 additions and 78 deletions

predict_hysteretic_curves.py Code/src/hysteretic_curves/predict_hysteretic_curves.py +88 -78

run.sh Code/src/hysteretic_curves/run.sh +6 -0

No files found.
--- a/Code/src/hysteretic_curves/predict_hysteretic_curves.py
+++ b/Code/src/hysteretic_curves/predict_hysteretic_curves.py
 """
 FAST + STABLE (no "too many open files")
 ========================================
-LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict PREDICT_CASE (PyTorch)
+LSTM tuning (CV by CASE) + final retrain on 100% train_pool + predict predict_case (PyTorch)

 CV rules:
- Use ONLY the first N_TRAIN_CASES cases as train_pool (cases <= N_TRAIN_CASES)
- If N_TRAIN_CASES < 20  -> Leave-One-Out CV by CASE
+- Use ONLY the first n_train_case cases as train_pool (cases <= n_train_case)
+- If n_train_case < 20  -> Leave-One-Out CV by CASE
 - Else                   -> 5-fold CV by CASE (KFold on case IDs)

 After CV tuning:
 - Train FINAL model on 100% train_pool (NO early stopping; keep best by TRAIN loss)
- Predict PREDICT_CASE (assumed not in train_pool) and save CSV with Force_RNN column
+- Predict predict_case (assumed not in train_pool) and save CSV with Force_RNN column

 Speed-ups:
 - Vectorized window creation via sliding_window_view
@@ -26,8 +26,8 @@ Stability fix:

 from __future__ import annotations

+import argparse
 from typing import Dict, Tuple, List, Optional, cast
-import os
 import gc

 import numpy as np
@@ -56,59 +56,6 @@ torch.manual_seed(SEED)
 if torch.cuda.is_available():
    torch.backends.cudnn.benchmark = True

-W = 2
-B = 29
-H = 30
-if W == 2:
-    H = 30
-elif W == 3:
-    H = 45
-elif W == 5:
-    H = 60
-else:
-    print("Warning: H not set for W != 2, 3, or 5")
-
-DATA_FILE = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/merged_dataset_points.csv"
-
-N_TRAIN_CASES = 0
-PREDICT_CASE = 0
-if W == 2:
-    N_TRAIN_CASES = 8
-    if B == 29:
-        PREDICT_CASE = 12
-    elif B == 34:
-        PREDICT_CASE = 11
-    else:
-        print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
-elif W == 3:
-    N_TRAIN_CASES = 16
-    if B == 29:
-        PREDICT_CASE = 22
-    elif B == 34:
-        PREDICT_CASE = 21
-    else:
-        print(f"Warning: No PREDICT_CASE set for W={W}, B={B}")
-elif W == 5:
-    N_TRAIN_CASES = 64
-    PREDICT_CASE = 13
-else:
-    print("Warning: N_TRAIN_CASES and PREDICT_CASE not set for W != 2, 3, or 5")
-
-N_TRIALS = 15
-MAX_EPOCHS_TUNE = 40
-PATIENCE_TUNE = 5
-MAX_EPOCHS_FINAL = 200
-
-# DataLoader knobs (kept safe)
-PIN_MEMORY_ON_CUDA = True
-
-# torch.compile knobs (stable choice)
-# - OFF for CV (many recompiles, can be slower + more resources)
-# - ON for FINAL (single compile)
-COMPILE_IN_CV = False
-COMPILE_IN_FINAL = True
-
-
 # -------------------------
 # Utils
 # -------------------------
@@ -651,40 +598,103 @@ def sample_params(device: str = "cpu"):
 # Main
 # -------------------------
 def main():
-    df = pd.read_csv(DATA_FILE)
+    """
+    Main execution pipeline for LSTM train and predict hysteretic curves.
+    """
+    # Configuration constants and bounds
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--W", type=int, required=True)
+    parser.add_argument("--B", type=int, required=True)
+    parser.add_argument("--it", type=int, required=True)
+    args = parser.parse_args()
+
+    w_val = args.W
+    b_val = args.B
+
+    h_val = 30
+    if w_val == 2:
+        h_val = 30
+    elif w_val == 3:
+        h_val = 45
+    elif w_val == 5:
+        h_val = 60
+    else:
+        print("Warning: H not set for W != 2, 3, or 5")
+
+    data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv"
+
+    n_train_case = 0
+    predict_case = 0
+    if w_val == 2:
+        n_train_case = 8
+        if b_val == 29:
+            predict_case = 12
+        elif b_val == 34:
+            predict_case = 11
+        else:
+            print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
+    elif w_val == 3:
+        n_train_case = 16
+        if b_val == 29:
+            predict_case = 22
+        elif b_val == 34:
+            predict_case = 21
+        else:
+            print(f"Warning: No predict_case set for W={w_val}, B={b_val}")
+    elif w_val == 5:
+        n_train_case = 64
+        predict_case = 13
+    else:
+        print("Warning: n_train_case and predict_case not set for W != 2, 3, or 5")
+
+    n_trials = 15
+    max_epachs_tune = 40
+    patience_tune = 5
+    max_epachs_final = 200
+
+    # DataLoader knobs (kept safe)
+    PIN_MEMORY_ON_CUDA = True
+
+    # torch.compile knobs (stable choice)
+    # - OFF for CV (many recompiles, can be slower + more resources)
+    # - ON for FINAL (single compile)
+    compile_in_cv = False
+    compile_in_final = True
+
+    df = pd.read_csv(data_file)

    thickness_cols = [c for c in df.columns if c.startswith("tw")]
    feature_cols = ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl"] + thickness_cols
    target_col = "Force"

    all_cases = sorted(df["Case"].unique())
-    train_pool = [int(c) for c in all_cases if c <= N_TRAIN_CASES]
+    train_pool = [int(c) for c in all_cases if c <= n_train_case]

-    if int(PREDICT_CASE) in train_pool:
-        raise RuntimeError(f"PREDICT_CASE={PREDICT_CASE} is in train_pool. This must not happen.")
+    if int(predict_case) in train_pool:
+        raise RuntimeError(f"predict_case={predict_case} is in train_pool. This must not happen.")

    device = "cuda" if torch.cuda.is_available() else "cpu"

    print("============================================================")
    print("DATA")
    print("Device:", device)
-    print("Predict (test) case:", PREDICT_CASE)
+    print("Predict (test) case:", predict_case)
    print("Train pool cases (for CV):", train_pool)
    print("n_cases total:", len(all_cases), "| train_pool:", len(train_pool))
-    print("torch.compile CV:", "ON" if (device == "cuda" and COMPILE_IN_CV) else "OFF")
-    print("torch.compile FINAL:", "ON" if (device == "cuda" and COMPILE_IN_FINAL) else "OFF")
+    print("torch.compile CV:", "ON" if (device == "cuda" and compile_in_cv) else "OFF")
+    print("torch.compile FINAL:", "ON" if (device == "cuda" and compile_in_final) else "OFF")
    print("============================================================")

    folds, strategy = make_case_folds(train_pool, seed=SEED, n_splits=5)
    print(f"CV strategy: {strategy} | n_folds={len(folds)}")

    # Cache cases needed (train_pool for CV, plus test case for prediction)
-    cases_to_cache_all = sorted(set(train_pool + [int(PREDICT_CASE)]))
+    cases_to_cache_all = sorted(set(train_pool + [int(predict_case)]))

    # ---- CV Tuning ----
    best = {"mean": np.inf, "std": np.inf, "params": None}

-    for t in range(1, N_TRIALS + 1):
+    for t in range(1, n_trials + 1):
        params = sample_params(device=device)

        # Separate window_size from model params (cached funcs must NOT receive window_size)
@@ -717,11 +727,11 @@ def main():
            feature_dim=feature_dim,
            folds=folds,
            params_model=params_model,
-            max_epochs=MAX_EPOCHS_TUNE,
-            patience=PATIENCE_TUNE,
+            max_epochs=max_epachs_tune,
+            patience=patience_tune,
            grad_clip=1.0,
            device=device,
-            compile_model=(COMPILE_IN_CV and device == "cuda"),
+            compile_model=(compile_in_cv and device == "cuda"),
        )

        if fold_rmses is None:
@@ -784,22 +794,22 @@ def main():
        cache_ws=cache_ws_best,
        feature_dim=feature_dim,
        train_cases=train_pool,
-        max_epochs=MAX_EPOCHS_FINAL,
+        max_epochs=max_epachs_final,
        grad_clip=1.0,
        device=device,
-        compile_model=(COMPILE_IN_FINAL and device == "cuda"),
+        compile_model=(compile_in_final and device == "cuda"),
        **best_params_model
    )

    scaler_x, scaler_y = final_scalers

-    # ---- Predict PREDICT_CASE ----
-    df_test = df[df["Case"] == PREDICT_CASE].copy()
+    # ---- Predict predict_case ----
+    df_test = df[df["Case"] == predict_case].copy()

-    x_test, y_test, test_indices = cache_ws_best[int(PREDICT_CASE)]
+    x_test, y_test, test_indices = cache_ws_best[int(predict_case)]
    if x_test.shape[0] == 0:
        raise RuntimeError(
-            f"Case {PREDICT_CASE} has fewer points ({len(df_test)}) than window_size ({ws_best})."
+            f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})."
        )

    x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
@@ -839,7 +849,7 @@ def main():
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

    test_rmse = rmse(y_test, y_pred)
-    print(f"Test RMSE on Case {PREDICT_CASE} = {test_rmse:.4f}")
+    print(f"Test RMSE on Case {predict_case} = {test_rmse:.4f}")

    # ---- Save results ----
    df_test_pred = df_test.copy()
@@ -847,7 +857,7 @@ def main():
    for idx, pred in zip(test_indices, y_pred):
        df_test_pred.loc[idx, "Force_RNN"] = pred

-    out_file = f"../../data/hysteretic_curves/{W}W/H{H}_B{B}/case_{PREDICT_CASE}_with_rnn_preds.csv"
+    out_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/case_{predict_case}_with_rnn_preds.csv"
    df_test_pred.to_csv(out_file, index=False)
    print(f"\nSaved predictions to: {out_file}")


--- a/Code/src/hysteretic_curves/run.sh
+++ b/Code/src/hysteretic_curves/run.sh
+#!/bin/bash
+python predict_hysteretic_curves.py --W 2 --B 29
+python predict_hysteretic_curves.py --W 2 --B 34
+python predict_hysteretic_curves.py --W 3 --B 29
+python predict_hysteretic_curves.py --W 3 --B 34
+python predict_hysteretic_curves.py --W 5 --B 34