Refactor hysteretic curves processing and prediction scripts

- Added a new function `load_tfdmap` to load TFDMap data from files, ensuring proper error handling for missing or malformed data. - Updated the hysteretic curves processing script to include TFDMap data in the point-level DataFrame, maintaining alignment with case folders. - Enhanced the prediction script to handle multiple target columns dynamically, improving flexibility for model training and evaluation. - Modified caching mechanism for window sizes to use a tuple key for better organization and retrieval. - Improved logging and error messages for better debugging and user feedback. - Updated the run script to ensure consistent execution of prediction tasks with specified parameters.
parent f2ad8555
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
import os
import pandas as pd
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
# Configuration constants and bounds
W = 5 # Number of windows
B = 34 # Width identifier
W = 2 # Number of windows
B = 29 # Width identifier
H = 30
if W == 2:
H = 30 # Adjust height identifier for two window case
......@@ -40,22 +42,56 @@ print(f"Number of cases: {N_CASES}")
print(f"Number of windows per case: {n_windows}")
# Column names for tw1, tw2, ...
thickness_cols = [f"tw{i+1}" for i in range(n_windows)]
thickness_cols = [f"tw{i + 1}" for i in range(n_windows)]
all_rows = []
def load_tfdmap(case_folder_path):
tfdmap_file = os.path.join(case_folder_path, "TFDMap.txt")
if not os.path.isfile(tfdmap_file):
raise FileNotFoundError(f"TFDMap file not found: {tfdmap_file}")
df_tfdmap = pd.read_csv(
tfdmap_file,
comment="#",
sep=r"\s+",
engine="python",
header=None,
)
# Some files contain an extra non-comment header row such as
# "Tiempo promW1 ... promF0". Keep only fully numeric rows.
df_tfdmap = df_tfdmap.apply(pd.to_numeric, errors="coerce")
df_tfdmap = df_tfdmap.dropna(axis=0, how="any").reset_index(drop=True)
if df_tfdmap.empty:
raise ValueError(f"TFDMap file has no numeric data rows: {tfdmap_file}")
if df_tfdmap.shape[1] < 2:
raise ValueError(
f"TFDMap file must contain at least two columns after ignoring headers: {tfdmap_file}"
)
df_tfdmap = df_tfdmap.iloc[:, 1:].copy()
n_cols = df_tfdmap.shape[1]
if n_cols == 1:
df_tfdmap.columns = ["TFDMapF"]
else:
df_tfdmap.columns = [f"TFDMapW{i + 1}" for i in range(n_cols - 1)] + ["TFDMapF"]
return df_tfdmap
# Detect all numeric folders automatically
case_folders = sorted(
[f for f in os.listdir(CURVES_DIR) if f.isdigit()],
key=int
)
case_folders = sorted([f for f in os.listdir(CURVES_DIR) if f.isdigit()], key=int)
case_folders_sorted = sorted(case_folders, key=lambda x: int(x))
# thickness_data debe tener la MISMA longitud y orden que case_folders_sorted
thickness_by_case = {
int(folder): thickness_data[idx]
for idx, folder in enumerate(case_folders_sorted)
int(folder): thickness_data[idx] for idx, folder in enumerate(case_folders_sorted)
}
# === Loop ===
......@@ -67,7 +103,13 @@ for folder_name in case_folders_sorted:
print(f"⚠️ Folder not found: {folder_path}, skipping.")
continue
files = [f for f in os.listdir(folder_path) if f.endswith((".txt", ".dat", ".csv"))]
files = [
f
for f in os.listdir(folder_path)
if f.endswith((".txt", ".dat", ".csv"))
and f != "TFDMap.txt"
and not f.startswith("merged_dataset_")
]
if not files:
print(f"⚠️ No hysteresis file in {folder_path}.")
continue
......@@ -76,8 +118,12 @@ for folder_name in case_folders_sorted:
print(f"Processing Case {i}: {datafile}")
df_curve = pd.read_csv(
datafile, comment="#", sep=r"\s+", engine="python",
names=["Displ", "Force"], header=None
datafile,
comment="#",
sep=r"\s+",
engine="python",
names=["Displ", "Force"],
header=None,
)
df_curve["Case"] = i
......@@ -116,9 +162,8 @@ def compute_load_dir(displ):
df["LoadDir"] = df.groupby("Case")["Displ"].transform(compute_load_dir)
# CumDispl: cumulative |ΔΔ| per case
df["CumDispl"] = (
df.groupby("Case")["Displ"]
.transform(lambda s: s.diff().abs().fillna(0).cumsum())
df["CumDispl"] = df.groupby("Case")["Displ"].transform(
lambda s: s.diff().abs().fillna(0).cumsum()
)
......@@ -143,18 +188,47 @@ df["MaxAmpl"] = df.groupby("Case")["Displ"].transform(lambda x: x.abs().max())
# 4) Reorder point-level columns (Force last)
# ------------------------------------------------------------------
ordered_cols_point = (
["Case"] +
thickness_cols +
["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl", "Force"]
["Case"]
+ thickness_cols
+ ["Displ", "CumDispl", "LoadDir", "CycleNum", "MaxAmpl", "Force"]
)
df = df[ordered_cols_point]
print("\n=== Point-level DataFrame (df) ===")
print(df.head())
# Point-level export: keep 1 out of every 5 rows within each case
keep_mask = df.groupby("Case", sort=False).cumcount() % 5 == 0
df_points = df.loc[keep_mask].reset_index(drop=True)
# Add TFDMap data from the corresponding file in each case folder
tfdmap_frames = []
for folder_name in case_folders_sorted:
case_id = int(folder_name)
folder_path = os.path.join(CURVES_DIR, folder_name)
df_case_points = df_points[df_points["Case"] == case_id].reset_index(drop=True)
if df_case_points.empty:
continue
df_tfdmap = load_tfdmap(folder_path)
if len(df_tfdmap) != len(df_case_points):
raise ValueError(
f"TFDMap length mismatch for case {case_id}: "
f"{len(df_tfdmap)} rows in TFDMap.txt vs {len(df_case_points)} sampled rows."
)
df_case_points = pd.concat(
[df_case_points, df_tfdmap.reset_index(drop=True)],
axis=1,
)
tfdmap_frames.append(df_case_points)
df_points = pd.concat(tfdmap_frames, ignore_index=True)
print("\n=== Point-level DataFrame (df_points) ===")
# Save point-level dataset
output_file_points = os.path.join(CURVES_DIR, "merged_dataset_points.csv")
df.to_csv(output_file_points, index=False)
df_points.to_csv(output_file_points, index=False)
print(f"\nSaved point-level dataset to: {output_file_points}")
# ==================================================================
......@@ -238,21 +312,25 @@ df_cycles["StiffnessRatio"] = df_cycles.groupby("Case")["SecantStiffness"].trans
# Order cycle-level columns nicely
ordered_cols_cycles = (
["Case", "CycleNum"] +
thickness_cols +
[
["Case", "CycleNum"]
+ thickness_cols
+ [
"CycleMaxAmpl",
"PosEnvDispl", "PosEnvForce",
"NegEnvDispl", "NegEnvForce",
"LoopEnergySigned", "LoopEnergyAbs", "CumLoopEnergyAbs",
"SecantStiffness", "StiffnessRatio"
"PosEnvDispl",
"PosEnvForce",
"NegEnvDispl",
"NegEnvForce",
"LoopEnergySigned",
"LoopEnergyAbs",
"CumLoopEnergyAbs",
"SecantStiffness",
"StiffnessRatio",
]
)
df_cycles = df_cycles[ordered_cols_cycles]
print("\n=== Cycle-level DataFrame (df_cycles) ===")
print(df_cycles.head())
# Save cycle-level summary
output_file_cycles = os.path.join(CURVES_DIR, "merged_dataset_cycles.csv")
......
#!/bin/bash
# python predict_hysteretic_curves.py --W 2 --B 29 --n-trials 25
python predict_hysteretic_curves.py --W 2 --B 29 --n-trials 25
python predict_hysteretic_curves.py --W 2 --B 34 --n-trials 25
python predict_hysteretic_curves.py --W 3 --B 29 --n-trials 25
python predict_hysteretic_curves.py --W 3 --B 34 --n-trials 25
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment