Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ost/ml/virtual-quality-control-for-injection-molding/project
1 result
Show changes
Commits on Source (3)
......@@ -3,9 +3,9 @@ stages:
.run_script:
stage: run_scripts
image: python:3-alpine
image: python:3-bookworm
tags:
- amd64 # needed for matplotlib
- amd64 # needed for matplotlib & scikit-learn
variables:
PIP_ROOT_USER_ACTION: ignore
before_script:
......@@ -47,3 +47,16 @@ multi_feature_regression_manual_features:
script:
- vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass'
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore
model_evaluation:
extends: .run_script
needs:
- job: multi_feature_regression_p_value
artifacts: true
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim model-evaluation --model out/multi_feature_regression_model.pickle --test-data dataset/InjectionMolding_Test.csv --target mass --out out
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase Inj1HtgEd3Act_1stPCscore ClpFceAct_1stPCscore
......@@ -75,6 +75,29 @@
"Inj1PrsAct_meanOfInjPhase",
"ClpFceAct_1stPCscore"
]
},
{
"name": "Python Debugger: Model Evaluation",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"model-evaluation",
"-m",
"out/multi_feature_regression_model.pickle",
"--test-data",
"dataset/InjectionMolding_Test.csv",
"--target",
"mass",
"-o",
"out",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore"
]
}
]
}
......@@ -7,6 +7,7 @@ dependencies = [
"seaborn >= 0.13.2, < 1.0.0",
"matplotlib >= 3.9.2, < 4.0.0",
"statsmodels >= 0.14.4, < 1.0.0",
"scikit-learn >= 1.5.2, < 2.0.0",
]
maintainers = [
{name = "Andri Joos"},
......
......@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
import math
from typing import List, Tuple
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error # is not deprecated, only squared param
TRAIN_DATA_ARG = '--train-data'
TRAIN_DATA_ARG_SHORT = '-t'
......@@ -20,6 +21,9 @@ P_VALUE_THRESHOLD_ARG = '--p-value-threshold'
DEFAULT_P_VALUE_THRESHOLD = 0.05
FEATURES_ARG = '--features'
FEATURES_ARG_SHORT = '-f'
MODEL_ARG = '--model'
MODEL_ARG_SHORT = '-m'
TEST_DATA_ARG = '--test-data'
PVALUE_COLUMN_NAME = 'p-value'
RSQUARED_COLUMN_NAME = 'R^2'
......@@ -40,7 +44,7 @@ def get_possible_features_from_p_value(data: pd.DataFrame, target: str, p_value_
return possible_features.where(possible_features[PVALUE_COLUMN_NAME] < p_value_threshold).dropna()
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> sm.OLS:
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> Tuple[sm.OLS, List[str]]:
features: List[str] = None
if selected_features is not None and p_value_threshold is None:
features = selected_features
......@@ -149,14 +153,37 @@ R^2: {best_feature[RSQUARED_COLUMN_NAME]}
def multi_feature_regression(train_data_file: Path, target: str, selected_features: List[str] | None, p_value_threshold: float | None, out_dir: Path):
train_data = pd.read_csv(train_data_file)
y = train_data[target]
model, features = multi_feature_regression_model(train_data, selected_features, p_value_threshold, target)
print(model.summary())
ensure_directory(out_dir)
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_results.txt'
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_train_results.txt'
with open(multi_feature_regression_results_file, 'w') as f:
f.write(f'''features: {features}
rsquared: {model.rsquared}
rsquared: {model.rsquared},
train_MSE: {mean_squared_error(y, model.fittedvalues)}
''')
ensure_directory(out_dir)
model_file = out_dir / 'multi_feature_regression_model.pickle'
model.save(model_file)
def model_evaluation(model_file: Path, test_data_file: Path, features: List[str], target: str, out_dir: Path):
test_data = pd.read_csv(test_data_file)
model: sm.OLS = sm.load(model_file)
X_test = sm.add_constant(test_data[features])
y_test = test_data[target]
y_pred = model.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
print(f'Test MSE: {test_mse}')
ensure_directory(out_dir)
model_evaluation_file = out_dir / 'model_evaluation.txt'
with open(model_evaluation_file, 'w') as f:
f.write(f'''test_MSE: {test_mse}
''')
def main():
......@@ -185,6 +212,14 @@ def main():
multi_feature_regression_features_group.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=None)
multi_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, features, p_value_threshold, func: multi_feature_regression(train_data, target, features, p_value_threshold, out))
model_evaluation_subparser = subparsers.add_parser('model-evaluation', aliases=['me'], description='Evaluates a model')
model_evaluation_subparser.add_argument(MODEL_ARG, MODEL_ARG_SHORT, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TEST_DATA_ARG, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
model_evaluation_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
model_evaluation_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
model_evaluation_subparser.set_defaults(func=lambda model, test_data, target, out, features, func: model_evaluation(model, test_data, features, target, out))
parsed_args = argument_parser.parse_args()
args = vars(parsed_args)
parsed_args.func(**args)
......