Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ost/ml/virtual-quality-control-for-injection-molding/project
1 result
Show changes
Commits on Source (2)
...@@ -3,9 +3,9 @@ stages: ...@@ -3,9 +3,9 @@ stages:
.run_script: .run_script:
stage: run_scripts stage: run_scripts
image: python:3-alpine image: python:3-bookworm
tags: tags:
- amd64 # needed for matplotlib - amd64 # needed for matplotlib & scikit-learn
variables: variables:
PIP_ROOT_USER_ACTION: ignore PIP_ROOT_USER_ACTION: ignore
before_script: before_script:
...@@ -47,3 +47,16 @@ multi_feature_regression_manual_features: ...@@ -47,3 +47,16 @@ multi_feature_regression_manual_features:
script: script:
- vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' - vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass'
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore --features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore
model_evaluation:
extends: .run_script
needs:
- job: multi_feature_regression_p_value
artifacts: true
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim model-evaluation --model out/multi_feature_regression_model.pickle --test-data dataset/InjectionMolding_Test.csv --target mass --out out
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase Inj1HtgEd3Act_1stPCscore ClpFceAct_1stPCscore
...@@ -75,6 +75,29 @@ ...@@ -75,6 +75,29 @@
"Inj1PrsAct_meanOfInjPhase", "Inj1PrsAct_meanOfInjPhase",
"ClpFceAct_1stPCscore" "ClpFceAct_1stPCscore"
] ]
},
{
"name": "Python Debugger: Model Evaluation",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"model-evaluation",
"-m",
"out/multi_feature_regression_model.pickle",
"--test-data",
"dataset/InjectionMolding_Test.csv",
"--target",
"mass",
"-o",
"out",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore"
]
} }
] ]
} }
...@@ -7,6 +7,7 @@ dependencies = [ ...@@ -7,6 +7,7 @@ dependencies = [
"seaborn >= 0.13.2, < 1.0.0", "seaborn >= 0.13.2, < 1.0.0",
"matplotlib >= 3.9.2, < 4.0.0", "matplotlib >= 3.9.2, < 4.0.0",
"statsmodels >= 0.14.4, < 1.0.0", "statsmodels >= 0.14.4, < 1.0.0",
"scikit-learn >= 1.5.2, < 2.0.0",
] ]
maintainers = [ maintainers = [
{name = "Andri Joos"}, {name = "Andri Joos"},
......
...@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt ...@@ -6,6 +6,7 @@ import matplotlib.pyplot as plt
import math import math
from typing import List, Tuple from typing import List, Tuple
import statsmodels.api as sm import statsmodels.api as sm
from sklearn.metrics import mean_squared_error # is not deprecated, only squared param
TRAIN_DATA_ARG = '--train-data' TRAIN_DATA_ARG = '--train-data'
TRAIN_DATA_ARG_SHORT = '-t' TRAIN_DATA_ARG_SHORT = '-t'
...@@ -20,6 +21,9 @@ P_VALUE_THRESHOLD_ARG = '--p-value-threshold' ...@@ -20,6 +21,9 @@ P_VALUE_THRESHOLD_ARG = '--p-value-threshold'
DEFAULT_P_VALUE_THRESHOLD = 0.05 DEFAULT_P_VALUE_THRESHOLD = 0.05
FEATURES_ARG = '--features' FEATURES_ARG = '--features'
FEATURES_ARG_SHORT = '-f' FEATURES_ARG_SHORT = '-f'
MODEL_ARG = '--model'
MODEL_ARG_SHORT = '-m'
TEST_DATA_ARG = '--test-data'
PVALUE_COLUMN_NAME = 'p-value' PVALUE_COLUMN_NAME = 'p-value'
RSQUARED_COLUMN_NAME = 'R^2' RSQUARED_COLUMN_NAME = 'R^2'
...@@ -40,7 +44,7 @@ def get_possible_features_from_p_value(data: pd.DataFrame, target: str, p_value_ ...@@ -40,7 +44,7 @@ def get_possible_features_from_p_value(data: pd.DataFrame, target: str, p_value_
return possible_features.where(possible_features[PVALUE_COLUMN_NAME] < p_value_threshold).dropna() return possible_features.where(possible_features[PVALUE_COLUMN_NAME] < p_value_threshold).dropna()
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> sm.OLS: def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> Tuple[sm.OLS, List[str]]:
features: List[str] = None features: List[str] = None
if selected_features is not None and p_value_threshold is None: if selected_features is not None and p_value_threshold is None:
features = selected_features features = selected_features
...@@ -149,14 +153,37 @@ R^2: {best_feature[RSQUARED_COLUMN_NAME]} ...@@ -149,14 +153,37 @@ R^2: {best_feature[RSQUARED_COLUMN_NAME]}
def multi_feature_regression(train_data_file: Path, target: str, selected_features: List[str] | None, p_value_threshold: float | None, out_dir: Path): def multi_feature_regression(train_data_file: Path, target: str, selected_features: List[str] | None, p_value_threshold: float | None, out_dir: Path):
train_data = pd.read_csv(train_data_file) train_data = pd.read_csv(train_data_file)
y = train_data[target]
model, features = multi_feature_regression_model(train_data, selected_features, p_value_threshold, target) model, features = multi_feature_regression_model(train_data, selected_features, p_value_threshold, target)
print(model.summary()) print(model.summary())
ensure_directory(out_dir) ensure_directory(out_dir)
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_results.txt' multi_feature_regression_results_file = out_dir / 'multi_feature_regression_train_results.txt'
with open(multi_feature_regression_results_file, 'w') as f: with open(multi_feature_regression_results_file, 'w') as f:
f.write(f'''features: {features} f.write(f'''features: {features}
rsquared: {model.rsquared} rsquared: {model.rsquared},
train_MSE: {mean_squared_error(y, model.fittedvalues)}
''')
ensure_directory(out_dir)
model_file = out_dir / 'multi_feature_regression_model.pickle'
model.save(model_file)
def model_evaluation(model_file: Path, test_data_file: Path, features: List[str], target: str, out_dir: Path):
test_data = pd.read_csv(test_data_file)
model: sm.OLS = sm.load(model_file)
X_test = sm.add_constant(test_data[features])
y_test = test_data[target]
y_pred = model.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
print(f'Test MSE: {test_mse}')
ensure_directory(out_dir)
model_evaluation_file = out_dir / 'model_evaluation.txt'
with open(model_evaluation_file, 'w') as f:
f.write(f'''test_MSE: {test_mse}
''') ''')
def main(): def main():
...@@ -185,6 +212,14 @@ def main(): ...@@ -185,6 +212,14 @@ def main():
multi_feature_regression_features_group.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=None) multi_feature_regression_features_group.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=None)
multi_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, features, p_value_threshold, func: multi_feature_regression(train_data, target, features, p_value_threshold, out)) multi_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, features, p_value_threshold, func: multi_feature_regression(train_data, target, features, p_value_threshold, out))
model_evaluation_subparser = subparsers.add_parser('model-evaluation', aliases=['me'], description='Evaluates a model')
model_evaluation_subparser.add_argument(MODEL_ARG, MODEL_ARG_SHORT, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TEST_DATA_ARG, action='store', type=Path, required=True)
model_evaluation_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
model_evaluation_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
model_evaluation_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
model_evaluation_subparser.set_defaults(func=lambda model, test_data, target, out, features, func: model_evaluation(model, test_data, features, target, out))
parsed_args = argument_parser.parse_args() parsed_args = argument_parser.parse_args()
args = vars(parsed_args) args = vars(parsed_args)
parsed_args.func(**args) parsed_args.func(**args)
......