feat(main): Implement model saving functionality and improve DataLoader stability

parent 0dd8cedc
......@@ -29,6 +29,8 @@ from __future__ import annotations
import argparse
from typing import Dict, Tuple, List, Optional, cast
import gc
import os
import joblib
import numpy as np
import pandas as pd
......@@ -56,6 +58,10 @@ torch.manual_seed(SEED)
if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# -------------------------
# Utils
# -------------------------
......@@ -104,7 +110,9 @@ def make_windows_for_case_fast(
xw = sliding_window_view(x, window_shape=(window_size, x.shape[1]))[:, 0, :, :]
else:
# fallback
xw = np.stack([x[i - window_size + 1:i + 1] for i in range(window_size - 1, n)], axis=0).astype(np.float32)
xw = np.stack(
[x[i - window_size + 1:i + 1] for i in range(window_size - 1, n)],
axis=0).astype(np.float32)
yw = y[window_size - 1:]
iw = idx[window_size - 1:]
......@@ -247,7 +255,13 @@ def make_case_folds(train_pool: List[int], seed=123, n_splits=5):
# -------------------------
# DataLoader: STABLE (no multiprocessing in CV)
# -------------------------
def make_loader(ds: Dataset, batch_size: int, shuffle: bool, device: str, use_workers: bool) -> DataLoader:
def make_loader(
ds: Dataset,
batch_size: int,
shuffle: bool,
device: str,
use_workers: bool
) -> DataLoader:
pin = (device == "cuda") and PIN_MEMORY_ON_CUDA
# Critical stability choice:
......@@ -296,18 +310,23 @@ def train_one_fold_cached( # type: ignore
# --- scalers on TRAIN only ---
scaler_x = StandardScaler()
scaler_x.fit(x_train.reshape(-1, feature_dim))
x_train_scaled = scaler_x.transform(x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
x_val_scaled = scaler_x.transform(x_val.reshape(-1, feature_dim)).reshape(x_val.shape).astype(np.float32, copy=False)
x_train_scaled = scaler_x.transform(
x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
x_val_scaled = scaler_x.transform(
x_val.reshape(-1, feature_dim)).reshape(x_val.shape).astype(np.float32, copy=False)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
y_train_scaled = scaler_y.fit_transform(
y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
train_ds = WindowDataset(x_train_scaled, y_train_scaled)
val_ds = WindowDataset(x_val_scaled, np.zeros((x_val_scaled.shape[0],), dtype=np.float32))
# CV: use_workers=False (avoid too many open files)
train_loader = make_loader(train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
val_loader = make_loader(val_ds, batch_size=batch_size, shuffle=False, device=device, use_workers=False)
train_loader = make_loader(
train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
val_loader = make_loader(
val_ds, batch_size=batch_size, shuffle=False, device=device, use_workers=False)
model = LSTMRegressor(
input_dim=feature_dim,
......@@ -470,15 +489,18 @@ def train_final_full_trainpool_cached( # type: ignore
scaler_x = StandardScaler()
scaler_x.fit(x_train.reshape(-1, feature_dim))
x_train_scaled = scaler_x.transform(x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
x_train_scaled = scaler_x.transform(
x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
y_train_scaled = scaler_y.fit_transform(
y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
train_ds = WindowDataset(x_train_scaled, y_train_scaled)
# Final: still keep workers OFF for stability; you can try True later.
train_loader = make_loader(train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
train_loader = make_loader(
train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
model = LSTMRegressor(
input_dim=feature_dim,
......@@ -605,7 +627,6 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--W", type=int, required=True)
parser.add_argument("--B", type=int, required=True)
parser.add_argument("--it", type=int, required=True)
args = parser.parse_args()
w_val = args.W
......@@ -623,6 +644,10 @@ def main():
data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv"
model_dir = f"../../models/hysteretic_curves/{w_val}W/models"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, f"B{b_val}_H{h_val}.joblib")
n_train_case = 0
predict_case = 0
if w_val == 2:
......@@ -652,9 +677,6 @@ def main():
patience_tune = 5
max_epachs_final = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile)
......@@ -739,10 +761,14 @@ def main():
continue
fold_str = ", ".join([f"{v:.3f}" for v in fold_rmses.tolist()])
print(f"[Trial {t:02d}] CV mean RMSE={mean_rmse:.4f} | std={std_rmse:.4f} | folds=[{fold_str}]")
print(
f"[Trial {t:02d}]"
f" CV mean RMSE={mean_rmse:.4f} | std={std_rmse:.4f} | folds=[{fold_str}]")
print(f" params: {params}")
if (mean_rmse < best["mean"]) or (np.isclose(mean_rmse, best["mean"]) and std_rmse < best["std"]):
if (
mean_rmse < best["mean"]) or (
np.isclose(mean_rmse, best["mean"]) and std_rmse < best["std"]):
best.update(mean=mean_rmse, std=std_rmse, params=params)
gc.collect()
......@@ -803,6 +829,32 @@ def main():
scaler_x, scaler_y = final_scalers
# -------------------------
# Save best model bundle (joblib)
# -------------------------
bundle = {
"framework": "pytorch",
"model_class": "LSTMRegressor",
"state_dict": final_state, # already CPU tensors
"params": {**best_params_model, "window_size": ws_best},
"feature_cols": feature_cols,
"target_col": target_col,
"scaler_x": scaler_x,
"scaler_y": scaler_y,
"meta": {
"W": w_val,
"B": b_val,
"H": h_val,
"n_train_case": n_train_case,
"predict_case": predict_case,
"seed": SEED,
"device_trained": device,
},
}
joblib.dump(bundle, model_path, compress=3)
print(f"Saved best model bundle to: {model_path}")
# ---- Predict predict_case ----
df_test = df[df["Case"] == predict_case].copy()
......@@ -812,7 +864,8 @@ def main():
f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})."
)
x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
x_test_scaled = scaler_x.transform(
x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
final_model = LSTMRegressor(
input_dim=feature_dim,
......@@ -822,12 +875,18 @@ def main():
dropout=best_params_model["dropout"]
).to(device)
# compile not necessary for predict, but harmless if final was compiled; keep OFF to avoid overhead
# compile not necessary for predict, but harmless if final was compiled; keep OFF to avoid
# overhead
final_model.load_state_dict(final_state)
final_model.eval()
test_ds = WindowDataset(x_test_scaled, np.zeros((x_test_scaled.shape[0],), dtype=np.float32))
test_loader = make_loader(test_ds, batch_size=best_params_model["batch_size"], shuffle=False, device=device, use_workers=False)
test_loader = make_loader(
test_ds,
batch_size=best_params_model["batch_size"],
shuffle=False,
device=device,
use_workers=False)
use_amp = (device == "cuda")
preds = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment