Refactor hysteretic curves processing and prediction scripts

- Added a new function `load_tfdmap` to load TFDMap data from files, ensuring proper error handling for missing or malformed data. - Updated the hysteretic curves processing script to include TFDMap data in the point-level DataFrame, maintaining alignment with case folders. - Enhanced the prediction script to handle multiple target columns dynamically, improving flexibility for model training and evaluation. - Modified caching mechanism for window sizes to use a tuple key for better organization and retrieval. - Improved logging and error messages for better debugging and user feedback. - Updated the run script to ensure consistent execution of prediction tasks with specified parameters.

Refactor hysteretic curves processing and prediction scripts
9ceb46a7 · Joaquín Irazábal González · f2ad8555 · 9ceb46a7 · 9ceb46a7 · 9ceb46a7
Commit 9ceb46a7 authored Apr 16, 2026 by Joaquín Irazábal González
16 changed files
--- a/Code/data/hysteretic_curves/2W/H30_B29/merged_dataset_points.csv
+++ b/Code/data/hysteretic_curves/2W/H30_B29/merged_dataset_points.csv
--- a/Code/data/hysteretic_curves/2W/H30_B34/merged_dataset_points.csv
+++ b/Code/data/hysteretic_curves/2W/H30_B34/merged_dataset_points.csv
--- a/Code/data/hysteretic_curves/3W/H45_B29/merged_dataset_points.csv
+++ b/Code/data/hysteretic_curves/3W/H45_B29/merged_dataset_points.csv
--- a/Code/data/hysteretic_curves/3W/H45_B34/merged_dataset_points.csv
+++ b/Code/data/hysteretic_curves/3W/H45_B34/merged_dataset_points.csv
--- a/Code/data/hysteretic_curves/5W/H60_B34/merged_dataset_points.csv
+++ b/Code/data/hysteretic_curves/5W/H60_B34/merged_dataset_points.csv
--- a/Code/models/hysteretic_curves/2W/models/B29_H30.joblib
+++ b/Code/models/hysteretic_curves/2W/models/B29_H30.joblib
--- a/Code/models/hysteretic_curves/2W/models/B34_H30.joblib
+++ b/Code/models/hysteretic_curves/2W/models/B34_H30.joblib
--- a/Code/reports/hysteretic_curves/2W/H30_B29/case_12_with_rnn_preds.csv
+++ b/Code/reports/hysteretic_curves/2W/H30_B29/case_12_with_rnn_preds.csv
--- a/Code/reports/hysteretic_curves/3W/H45_B29/case_22_with_rnn_preds.csv
+++ b/Code/reports/hysteretic_curves/3W/H45_B29/case_22_with_rnn_preds.csv
--- a/Code/reports/hysteretic_curves/3W/H45_B34/case_21_with_rnn_preds.csv
+++ b/Code/reports/hysteretic_curves/3W/H45_B34/case_21_with_rnn_preds.csv
--- a/Code/reports/hysteretic_curves/5W/H60_B34/case_70_with_rnn_preds.csv
+++ b/Code/reports/hysteretic_curves/5W/H60_B34/case_70_with_rnn_preds.csv
--- a/Code/src/hysteretic_curves/__pycache__/hysteretic_curves.cpython-312.pyc
+++ b/Code/src/hysteretic_curves/__pycache__/hysteretic_curves.cpython-312.pyc
--- a/Code/src/hysteretic_curves/__pycache__/predict_hysteretic_curves.cpython-312.pyc
+++ b/Code/src/hysteretic_curves/__pycache__/predict_hysteretic_curves.cpython-312.pyc
--- a/Code/src/hysteretic_curves/hysteretic_curves.py
+++ b/Code/src/hysteretic_curves/hysteretic_curves.py
 import os
-import pandas as pd
+
 import numpy as np
+import pandas as pd
+
 # import matplotlib.pyplot as plt

 # Configuration constants and bounds
-W = 5   # Number of windows
-B = 34  # Width identifier
+W = 2  # Number of windows
+B = 29  # Width identifier
 H = 30
 if W == 2:
    H = 30  # Adjust height identifier for two window case
@@ -40,22 +42,56 @@ print(f"Number of cases: {N_CASES}")
 print(f"Number of windows per case: {n_windows}")

 # Column names for tw1, tw2, ...
-thickness_cols = [f"tw{i+1}" for i in range(n_windows)]
+thickness_cols = [f"tw{i + 1}" for i in range(n_windows)]

 all_rows = []

+
+def load_tfdmap(case_folder_path):
+    tfdmap_file = os.path.join(case_folder_path, "TFDMap.txt")
+    if not os.path.isfile(tfdmap_file):
+        raise FileNotFoundError(f"TFDMap file not found: {tfdmap_file}")
+
+    df_tfdmap = pd.read_csv(
+        tfdmap_file,
+        comment="#",
+        sep=r"\s+",
+        engine="python",
+        header=None,
+    )
+
+    # Some files contain an extra non-comment header row such as
+    # "Tiempo promW1 ... promF0". Keep only fully numeric rows.
+    df_tfdmap = df_tfdmap.apply(pd.to_numeric, errors="coerce")
+    df_tfdmap = df_tfdmap.dropna(axis=0, how="any").reset_index(drop=True)
+
+    if df_tfdmap.empty:
+        raise ValueError(f"TFDMap file has no numeric data rows: {tfdmap_file}")
+
+    if df_tfdmap.shape[1] < 2:
+        raise ValueError(
+            f"TFDMap file must contain at least two columns after ignoring headers: {tfdmap_file}"
+        )
+
+    df_tfdmap = df_tfdmap.iloc[:, 1:].copy()
+    n_cols = df_tfdmap.shape[1]
+
+    if n_cols == 1:
+        df_tfdmap.columns = ["TFDMapF"]
+    else:
+        df_tfdmap.columns = [f"TFDMapW{i + 1}" for i in range(n_cols - 1)] + ["TFDMapF"]
+
+    return df_tfdmap
+
+
 # Detect all numeric folders automatically
-case_folders = sorted(
-    [f for f in os.listdir(CURVES_DIR) if f.isdigit()],
-    key=int
-)
+case_folders = sorted([f for f in os.listdir(CURVES_DIR) if f.isdigit()], key=int)

 case_folders_sorted = sorted(case_folders, key=lambda x: int(x))

 # thickness_data debe tener la MISMA longitud y orden que case_folders_sorted
 thickness_by_case = {
-    int(folder): thickness_data[idx]
-    for idx, folder in enumerate(case_folders_sorted)
+    int(folder): thickness_data[idx] for idx, folder in enumerate(case_folders_sorted)
 }

 # === Loop ===
@@ -67,7 +103,13 @@ for folder_name in case_folders_sorted:
        print(f"⚠️ Folder not found: {folder_path}, skipping.")
        continue

-    files = [f for f in os.listdir(folder_path) if f.endswith((".txt", ".dat", ".csv"))]
+    files = [
+        f
+        for f in os.listdir(folder_path)
+        if f.endswith((".txt", ".dat", ".csv"))
+        and f != "TFDMap.txt"
+        and not f.startswith("merged_dataset_")
+    ]
    if not files:
        print(f"⚠️ No hysteresis file in {folder_path}.")
        continue
@@ -76,8 +118,12 @@ for folder_name in case_folders_sorted:
    print(f"Processing Case {i}: {datafile}")

    df_curve = pd.read_csv(
-        datafile, comment="#", sep=r"\s+", engine="python",
-        names=["Displ", "Force"], header=None
+        datafile,
+        comment="#",
+        sep=r"\s+",
+        engine="python",
+        names=["Displ", "Force"],
+        header=None,
    )

    df_curve["Case"] = i
@@ -116,9 +162,8 @@ def compute_load_dir(displ):
 df["LoadDir"] = df.groupby("Case")["Displ"].transform(compute_load_dir)

 # CumDispl: cumulative |ΔΔ| per case
-df["CumDispl"] = (
-    df.groupby("Case")["Displ"]
-      .transform(lambda s: s.diff().abs().fillna(0).cumsum())
+df["CumDispl"] = df.groupby("Case")["Displ"].transform(
+    lambda s: s.diff().abs().fillna(0).cumsum()
 )


@@ -143,18 +188,47 @@ df["MaxAmpl"] = df.groupby("Case")["Displ"].transform(lambda x: x.abs().max())
 # 4) Reorder point-level columns (Force last)
 # ------------------------------------------------------------------
 ordered_cols_point = (
-    ["Case"] +
-    thickness_cols +
-    ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl", "Force"]
+    ["Case"]
+    + thickness_cols
+    + ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl", "Force"]
 )
 df = df[ordered_cols_point]

-print("\n=== Point-level DataFrame (df) ===")
-print(df.head())
+# Point-level export: keep 1 out of every 5 rows within each case
+keep_mask = df.groupby("Case", sort=False).cumcount() % 5 == 0
+df_points = df.loc[keep_mask].reset_index(drop=True)
+
+# Add TFDMap data from the corresponding file in each case folder
+tfdmap_frames = []
+for folder_name in case_folders_sorted:
+    case_id = int(folder_name)
+    folder_path = os.path.join(CURVES_DIR, folder_name)
+    df_case_points = df_points[df_points["Case"] == case_id].reset_index(drop=True)
+
+    if df_case_points.empty:
+        continue
+
+    df_tfdmap = load_tfdmap(folder_path)
+
+    if len(df_tfdmap) != len(df_case_points):
+        raise ValueError(
+            f"TFDMap length mismatch for case {case_id}: "
+            f"{len(df_tfdmap)} rows in TFDMap.txt vs {len(df_case_points)} sampled rows."
+        )
+
+    df_case_points = pd.concat(
+        [df_case_points, df_tfdmap.reset_index(drop=True)],
+        axis=1,
+    )
+    tfdmap_frames.append(df_case_points)
+
+df_points = pd.concat(tfdmap_frames, ignore_index=True)
+
+print("\n=== Point-level DataFrame (df_points) ===")

 # Save point-level dataset
 output_file_points = os.path.join(CURVES_DIR, "merged_dataset_points.csv")
-df.to_csv(output_file_points, index=False)
+df_points.to_csv(output_file_points, index=False)
 print(f"\nSaved point-level dataset to: {output_file_points}")

 # ==================================================================
@@ -238,21 +312,25 @@ df_cycles["StiffnessRatio"] = df_cycles.groupby("Case")["SecantStiffness"].trans

 # Order cycle-level columns nicely
 ordered_cols_cycles = (
-    ["Case", "CycleNum"] +
-    thickness_cols +
-    [
+    ["Case", "CycleNum"]
+    + thickness_cols
+    + [
        "CycleMaxAmpl",
-        "PosEnvDispl", "PosEnvForce",
-        "NegEnvDispl", "NegEnvForce",
-        "LoopEnergySigned", "LoopEnergyAbs", "CumLoopEnergyAbs",
-        "SecantStiffness", "StiffnessRatio"
+        "PosEnvDispl",
+        "PosEnvForce",
+        "NegEnvDispl",
+        "NegEnvForce",
+        "LoopEnergySigned",
+        "LoopEnergyAbs",
+        "CumLoopEnergyAbs",
+        "SecantStiffness",
+        "StiffnessRatio",
    ]
 )

 df_cycles = df_cycles[ordered_cols_cycles]

 print("\n=== Cycle-level DataFrame (df_cycles) ===")
-print(df_cycles.head())

 # Save cycle-level summary
 output_file_cycles = os.path.join(CURVES_DIR, "merged_dataset_cycles.csv")

--- a/Code/src/hysteretic_curves/predict_hysteretic_curves.py
+++ b/Code/src/hysteretic_curves/predict_hysteretic_curves.py
--- a/Code/src/hysteretic_curves/run.sh
+++ b/Code/src/hysteretic_curves/run.sh
 #!/bin/bash
-# python predict_hysteretic_curves.py --W 2 --B 29 --n-trials 25
+python predict_hysteretic_curves.py --W 2 --B 29 --n-trials 25
 python predict_hysteretic_curves.py --W 2 --B 34 --n-trials 25
 python predict_hysteretic_curves.py --W 3 --B 29 --n-trials 25
 python predict_hysteretic_curves.py --W 3 --B 34 --n-trials 25