feat(main): Implement model saving functionality and improve DataLoader stability

parent 0dd8cedc
...@@ -29,6 +29,8 @@ from __future__ import annotations ...@@ -29,6 +29,8 @@ from __future__ import annotations
import argparse import argparse
from typing import Dict, Tuple, List, Optional, cast from typing import Dict, Tuple, List, Optional, cast
import gc import gc
import os
import joblib
import numpy as np import numpy as np
import pandas as pd import pandas as pd
...@@ -56,6 +58,10 @@ torch.manual_seed(SEED) ...@@ -56,6 +58,10 @@ torch.manual_seed(SEED)
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# ------------------------- # -------------------------
# Utils # Utils
# ------------------------- # -------------------------
...@@ -104,7 +110,9 @@ def make_windows_for_case_fast( ...@@ -104,7 +110,9 @@ def make_windows_for_case_fast(
xw = sliding_window_view(x, window_shape=(window_size, x.shape[1]))[:, 0, :, :] xw = sliding_window_view(x, window_shape=(window_size, x.shape[1]))[:, 0, :, :]
else: else:
# fallback # fallback
xw = np.stack([x[i - window_size + 1:i + 1] for i in range(window_size - 1, n)], axis=0).astype(np.float32) xw = np.stack(
[x[i - window_size + 1:i + 1] for i in range(window_size - 1, n)],
axis=0).astype(np.float32)
yw = y[window_size - 1:] yw = y[window_size - 1:]
iw = idx[window_size - 1:] iw = idx[window_size - 1:]
...@@ -247,7 +255,13 @@ def make_case_folds(train_pool: List[int], seed=123, n_splits=5): ...@@ -247,7 +255,13 @@ def make_case_folds(train_pool: List[int], seed=123, n_splits=5):
# ------------------------- # -------------------------
# DataLoader: STABLE (no multiprocessing in CV) # DataLoader: STABLE (no multiprocessing in CV)
# ------------------------- # -------------------------
def make_loader(ds: Dataset, batch_size: int, shuffle: bool, device: str, use_workers: bool) -> DataLoader: def make_loader(
ds: Dataset,
batch_size: int,
shuffle: bool,
device: str,
use_workers: bool
) -> DataLoader:
pin = (device == "cuda") and PIN_MEMORY_ON_CUDA pin = (device == "cuda") and PIN_MEMORY_ON_CUDA
# Critical stability choice: # Critical stability choice:
...@@ -296,18 +310,23 @@ def train_one_fold_cached( # type: ignore ...@@ -296,18 +310,23 @@ def train_one_fold_cached( # type: ignore
# --- scalers on TRAIN only --- # --- scalers on TRAIN only ---
scaler_x = StandardScaler() scaler_x = StandardScaler()
scaler_x.fit(x_train.reshape(-1, feature_dim)) scaler_x.fit(x_train.reshape(-1, feature_dim))
x_train_scaled = scaler_x.transform(x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False) x_train_scaled = scaler_x.transform(
x_val_scaled = scaler_x.transform(x_val.reshape(-1, feature_dim)).reshape(x_val.shape).astype(np.float32, copy=False) x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
x_val_scaled = scaler_x.transform(
x_val.reshape(-1, feature_dim)).reshape(x_val.shape).astype(np.float32, copy=False)
scaler_y = StandardScaler() scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False) y_train_scaled = scaler_y.fit_transform(
y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
train_ds = WindowDataset(x_train_scaled, y_train_scaled) train_ds = WindowDataset(x_train_scaled, y_train_scaled)
val_ds = WindowDataset(x_val_scaled, np.zeros((x_val_scaled.shape[0],), dtype=np.float32)) val_ds = WindowDataset(x_val_scaled, np.zeros((x_val_scaled.shape[0],), dtype=np.float32))
# CV: use_workers=False (avoid too many open files) # CV: use_workers=False (avoid too many open files)
train_loader = make_loader(train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False) train_loader = make_loader(
val_loader = make_loader(val_ds, batch_size=batch_size, shuffle=False, device=device, use_workers=False) train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
val_loader = make_loader(
val_ds, batch_size=batch_size, shuffle=False, device=device, use_workers=False)
model = LSTMRegressor( model = LSTMRegressor(
input_dim=feature_dim, input_dim=feature_dim,
...@@ -470,15 +489,18 @@ def train_final_full_trainpool_cached( # type: ignore ...@@ -470,15 +489,18 @@ def train_final_full_trainpool_cached( # type: ignore
scaler_x = StandardScaler() scaler_x = StandardScaler()
scaler_x.fit(x_train.reshape(-1, feature_dim)) scaler_x.fit(x_train.reshape(-1, feature_dim))
x_train_scaled = scaler_x.transform(x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False) x_train_scaled = scaler_x.transform(
x_train.reshape(-1, feature_dim)).reshape(x_train.shape).astype(np.float32, copy=False)
scaler_y = StandardScaler() scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False) y_train_scaled = scaler_y.fit_transform(
y_train.reshape(-1, 1)).ravel().astype(np.float32, copy=False)
train_ds = WindowDataset(x_train_scaled, y_train_scaled) train_ds = WindowDataset(x_train_scaled, y_train_scaled)
# Final: still keep workers OFF for stability; you can try True later. # Final: still keep workers OFF for stability; you can try True later.
train_loader = make_loader(train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False) train_loader = make_loader(
train_ds, batch_size=batch_size, shuffle=True, device=device, use_workers=False)
model = LSTMRegressor( model = LSTMRegressor(
input_dim=feature_dim, input_dim=feature_dim,
...@@ -605,7 +627,6 @@ def main(): ...@@ -605,7 +627,6 @@ def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--W", type=int, required=True) parser.add_argument("--W", type=int, required=True)
parser.add_argument("--B", type=int, required=True) parser.add_argument("--B", type=int, required=True)
parser.add_argument("--it", type=int, required=True)
args = parser.parse_args() args = parser.parse_args()
w_val = args.W w_val = args.W
...@@ -623,6 +644,10 @@ def main(): ...@@ -623,6 +644,10 @@ def main():
data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv" data_file = f"../../data/hysteretic_curves/{w_val}W/H{h_val}_B{b_val}/merged_dataset_points.csv"
model_dir = f"../../models/hysteretic_curves/{w_val}W/models"
os.makedirs(model_dir, exist_ok=True)
model_path = os.path.join(model_dir, f"B{b_val}_H{h_val}.joblib")
n_train_case = 0 n_train_case = 0
predict_case = 0 predict_case = 0
if w_val == 2: if w_val == 2:
...@@ -652,9 +677,6 @@ def main(): ...@@ -652,9 +677,6 @@ def main():
patience_tune = 5 patience_tune = 5
max_epachs_final = 200 max_epachs_final = 200
# DataLoader knobs (kept safe)
PIN_MEMORY_ON_CUDA = True
# torch.compile knobs (stable choice) # torch.compile knobs (stable choice)
# - OFF for CV (many recompiles, can be slower + more resources) # - OFF for CV (many recompiles, can be slower + more resources)
# - ON for FINAL (single compile) # - ON for FINAL (single compile)
...@@ -739,10 +761,14 @@ def main(): ...@@ -739,10 +761,14 @@ def main():
continue continue
fold_str = ", ".join([f"{v:.3f}" for v in fold_rmses.tolist()]) fold_str = ", ".join([f"{v:.3f}" for v in fold_rmses.tolist()])
print(f"[Trial {t:02d}] CV mean RMSE={mean_rmse:.4f} | std={std_rmse:.4f} | folds=[{fold_str}]") print(
f"[Trial {t:02d}]"
f" CV mean RMSE={mean_rmse:.4f} | std={std_rmse:.4f} | folds=[{fold_str}]")
print(f" params: {params}") print(f" params: {params}")
if (mean_rmse < best["mean"]) or (np.isclose(mean_rmse, best["mean"]) and std_rmse < best["std"]): if (
mean_rmse < best["mean"]) or (
np.isclose(mean_rmse, best["mean"]) and std_rmse < best["std"]):
best.update(mean=mean_rmse, std=std_rmse, params=params) best.update(mean=mean_rmse, std=std_rmse, params=params)
gc.collect() gc.collect()
...@@ -803,6 +829,32 @@ def main(): ...@@ -803,6 +829,32 @@ def main():
scaler_x, scaler_y = final_scalers scaler_x, scaler_y = final_scalers
# -------------------------
# Save best model bundle (joblib)
# -------------------------
bundle = {
"framework": "pytorch",
"model_class": "LSTMRegressor",
"state_dict": final_state, # already CPU tensors
"params": {**best_params_model, "window_size": ws_best},
"feature_cols": feature_cols,
"target_col": target_col,
"scaler_x": scaler_x,
"scaler_y": scaler_y,
"meta": {
"W": w_val,
"B": b_val,
"H": h_val,
"n_train_case": n_train_case,
"predict_case": predict_case,
"seed": SEED,
"device_trained": device,
},
}
joblib.dump(bundle, model_path, compress=3)
print(f"Saved best model bundle to: {model_path}")
# ---- Predict predict_case ---- # ---- Predict predict_case ----
df_test = df[df["Case"] == predict_case].copy() df_test = df[df["Case"] == predict_case].copy()
...@@ -812,7 +864,8 @@ def main(): ...@@ -812,7 +864,8 @@ def main():
f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})." f"Case {predict_case} has fewer points ({len(df_test)}) than window_size ({ws_best})."
) )
x_test_scaled = scaler_x.transform(x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False) x_test_scaled = scaler_x.transform(
x_test.reshape(-1, feature_dim)).reshape(x_test.shape).astype(np.float32, copy=False)
final_model = LSTMRegressor( final_model = LSTMRegressor(
input_dim=feature_dim, input_dim=feature_dim,
...@@ -822,12 +875,18 @@ def main(): ...@@ -822,12 +875,18 @@ def main():
dropout=best_params_model["dropout"] dropout=best_params_model["dropout"]
).to(device) ).to(device)
# compile not necessary for predict, but harmless if final was compiled; keep OFF to avoid overhead # compile not necessary for predict, but harmless if final was compiled; keep OFF to avoid
# overhead
final_model.load_state_dict(final_state) final_model.load_state_dict(final_state)
final_model.eval() final_model.eval()
test_ds = WindowDataset(x_test_scaled, np.zeros((x_test_scaled.shape[0],), dtype=np.float32)) test_ds = WindowDataset(x_test_scaled, np.zeros((x_test_scaled.shape[0],), dtype=np.float32))
test_loader = make_loader(test_ds, batch_size=best_params_model["batch_size"], shuffle=False, device=device, use_workers=False) test_loader = make_loader(
test_ds,
batch_size=best_params_model["batch_size"],
shuffle=False,
device=device,
use_workers=False)
use_amp = (device == "cuda") use_amp = (device == "cuda")
preds = [] preds = []
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment