Skip to content
Snippets Groups Projects
Commit 138d5fd4 authored by Andri Joos's avatar Andri Joos :blush:
Browse files

add model evaluation

parent 59d83d93
No related branches found
No related tags found
No related merge requests found
......@@ -47,3 +47,16 @@ multi_feature_regression_manual_features:
script:
- vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass'
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore
model_evaluation:
extends: .run_script
needs:
- job: multi_feature_regression_p_value
artifacts: true
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim model-evaluation --model out/multi_feature_regression_model.pickle --test-data dataset/InjectionMolding_Test.csv --target mass --out out
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase Inj1HtgEd3Act_1stPCscore ClpFceAct_1stPCscore
......@@ -75,6 +75,29 @@
"Inj1PrsAct_meanOfInjPhase",
"ClpFceAct_1stPCscore"
]
},
{
"name": "Python Debugger: Model Evaluation",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"model-evaluation",
"-m",
"out/multi_feature_regression_model.pickle",
"--test-data",
"dataset/InjectionMolding_Test.csv",
"--target",
"mass",
"-o",
"out",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore"
]
}
]
}
......@@ -7,6 +7,7 @@ dependencies = [
"seaborn >= 0.13.2, < 1.0.0",
"matplotlib >= 3.9.2, < 4.0.0",
"statsmodels >= 0.14.4, < 1.0.0",
"scikit-learn >= 1.5.2, < 2.0.0",
]
maintainers = [
{name = "Andri Joos"},
......
......@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
import math
from typing import List, Tuple
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error # is not deprecated, only squared param
TRAIN_DATA_ARG = '--train-data'
TRAIN_DATA_ARG_SHORT = '-t'
......@@ -20,6 +21,9 @@ P_VALUE_THRESHOLD_ARG = '--p-value-threshold'
DEFAULT_P_VALUE_THRESHOLD = 0.05
FEATURES_ARG = '--features'
FEATURES_ARG_SHORT = '-f'
MODEL_ARG = '--model'
MODEL_ARG_SHORT = '-m'
TEST_DATA_ARG = '--test-data'
PVALUE_COLUMN_NAME = 'p-value'
RSQUARED_COLUMN_NAME = 'R^2'
......@@ -40,7 +44,7 @@ def get_possible_features_from_p_value(data: pd.DataFrame, target: str, p_value_
return possible_features.where(possible_features[PVALUE_COLUMN_NAME] < p_value_threshold).dropna()
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> sm.OLS:
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> Tuple[sm.OLS, List[str]]:
features: List[str] = None
if selected_features is not None and p_value_threshold is None:
features = selected_features
......@@ -149,14 +153,37 @@ R^2: {best_feature[RSQUARED_COLUMN_NAME]}
def multi_feature_regression(train_data_file: Path, target: str, selected_features: List[str] | None, p_value_threshold: float | None, out_dir: Path):
train_data = pd.read_csv(train_data_file)
y = train_data[target]
model, features = multi_feature_regression_model(train_data, selected_features, p_value_threshold, target)
print(model.summary())
ensure_directory(out_dir)
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_results.txt'
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_train_results.txt'
with open(multi_feature_regression_results_file, 'w') as f:
f.write(f'''features: {features}
rsquared: {model.rsquared}
rsquared: {model.rsquared},
train_MSE: {mean_squared_error(y, model.fittedvalues)}
''')
ensure_directory(out_dir)
model_file = out_dir / 'multi_feature_regression_model.pickle'
model.save(model_file)
def model_evaluation(model_file: Path, test_data_file: Path, features: List[str], target: str, out_dir: Path):
test_data = pd.read_csv(test_data_file)
model: sm.OLS = sm.load(model_file)
X_test = sm.add_constant(test_data[features])
y_test = test_data[target]
y_pred = model.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
print(f'Test MSE: {test_mse}')
ensure_directory(out_dir)
model_evaluation_file = out_dir / 'model_evaluation.txt'
with open(model_evaluation_file, 'w') as f:
f.write(f'''test_MSE: {test_mse}
''')
def main():
......@@ -185,6 +212,14 @@ def main():
multi_feature_regression_features_group.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=None)
multi_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, features, p_value_threshold, func: multi_feature_regression(train_data, target, features, p_value_threshold, out))
model_evaluation_subparser = subparsers.add_parser('model-evaluation', aliases=['me'], description='Evaluates a model')
model_evaluation_subparser.add_argument(MODEL_ARG, MODEL_ARG_SHORT, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TEST_DATA_ARG, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
model_evaluation_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
model_evaluation_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
model_evaluation_subparser.set_defaults(func=lambda model, test_data, target, out, features, func: model_evaluation(model, test_data, features, target, out))
parsed_args = argument_parser.parse_args()
args = vars(parsed_args)
parsed_args.func(**args)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment