Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ost/ml/virtual-quality-control-for-injection-molding/project
1 result
Show changes
Commits on Source (2)
......@@ -60,3 +60,13 @@ model_evaluation:
script:
- vqcfim model-evaluation --model out/multi_feature_regression_model.pickle --test-data dataset/InjectionMolding_Test.csv --target mass --out out
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase Inj1HtgEd3Act_1stPCscore ClpFceAct_1stPCscore
quadratic_term:
extends: .run_script
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim higher-order-terms --train-data dataset/InjectionMolding_Train.csv --test-data dataset/InjectionMolding_Test.csv --target mass --out out
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase Inj1HtgEd3Act_1stPCscore ClpFceAct_1stPCscore --degree 2
......@@ -98,6 +98,58 @@
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore"
]
},
{
"name": "Python Debugger: Higher Order Terms",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"higher-order-terms",
"--train-data",
"dataset/InjectionMolding_Train.csv",
"--test-data",
"dataset/InjectionMolding_Test.csv",
"--target",
"mass",
"-o",
"out",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore",
"--degree",
"2"
]
},
{
"name": "Python Debugger: Best Higher Order Terms",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"best-higher-order-terms",
"--train-data",
"dataset/InjectionMolding_Train.csv",
"--test-data",
"dataset/InjectionMolding_Test.csv",
"--target",
"mass",
"-o",
"out/higher-order-terms",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"Inj1HtgEd3Act_1stPCscore",
"ClpFceAct_1stPCscore",
"--degree-bottom-boundary",
"1",
"--degree-top-boundary",
"50"
]
}
]
}
......@@ -7,6 +7,9 @@ import math
from typing import List, Tuple
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error # is not deprecated, only squared param
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
TRAIN_DATA_ARG = '--train-data'
TRAIN_DATA_ARG_SHORT = '-t'
......@@ -24,9 +27,17 @@ FEATURES_ARG_SHORT = '-f'
MODEL_ARG = '--model'
MODEL_ARG_SHORT = '-m'
TEST_DATA_ARG = '--test-data'
DEGREE_ARG = '--degree'
INTERACTION_ONLY_ARG = '--interaction-only'
DEGREE_BOTTOM_BOUNDARY_ARG = '--degree-bottom-boundary'
DEGREE_TOP_ARG = '--degree-top-boundary'
PVALUE_COLUMN_NAME = 'p-value'
RSQUARED_COLUMN_NAME = 'R^2'
DEGREE_COLUMN_NAME = 'degree'
INTERACTION_ONLY_COLUMN_NAME = 'interaction_only'
TRAIN_MSE_COLUMN_NAME = 'train_mse'
TEST_MSE_COLUMN_NAME = 'test_mse'
def ensure_directory(directory: Path):
directory.mkdir(parents=True, exist_ok=True)
......@@ -185,6 +196,79 @@ def model_evaluation(model_file: Path, test_data_file: Path, features: List[str]
with open(model_evaluation_file, 'w') as f:
f.write(f'''test_MSE: {test_mse}
''')
def higher_order_terms(train_data_file: Path, test_data_file: Path, features: List[str], target: str, interaction_only: bool, degree: int, out_dir: Path):
train_data = pd.read_csv(train_data_file)
test_data = pd.read_csv(test_data_file)
X_train = train_data[features]
y_train = train_data[target]
X_test = test_data[features]
y_test = test_data[target]
pipeline = Pipeline([
('poly', PolynomialFeatures(degree=degree, include_bias=False, interaction_only=interaction_only)),
('linear', LinearRegression())
])
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_train)
train_mse = mean_squared_error(y_train, y_pred)
rsquared = pipeline.score(X_train, y_train)
y_pred = pipeline.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred)
print(f'train MSE: {train_mse}, test mse: {test_mse}, R^2: {rsquared}')
ensure_directory(out_dir)
output_file = out_dir / f'higher_order_terms_{degree}_{interaction_only}.txt'
with open(output_file, 'w') as f:
f.write(f'''train MSE: {train_mse}
test MSE: {test_mse}
rsquared: {rsquared}
''')
return train_mse, test_mse, rsquared
def best_higher_order_terms(train_data_file: Path, test_data_file: Path, features: List[str], target: str, degree_bottom_boundary: int, degree_top_boundary: int, out_dir: Path):
if degree_bottom_boundary > degree_top_boundary:
raise ValueError('degree bottom boundary must be smaller or equal than degree top boundary')
configurations = pd.DataFrame({
DEGREE_COLUMN_NAME: pd.Series(dtype=int),
INTERACTION_ONLY_COLUMN_NAME: pd.Series(dtype=bool),
TRAIN_MSE_COLUMN_NAME: pd.Series(dtype=float),
TEST_MSE_COLUMN_NAME: pd.Series(dtype=float),
RSQUARED_COLUMN_NAME: pd.Series(dtype='float'),
})
for degree in range(degree_bottom_boundary, degree_top_boundary + 1):
interaction_only = False
train_mse, test_mse, rsquared = higher_order_terms(train_data_file, test_data_file, features, target, interaction_only, degree, out_dir)
configurations.loc[-1] = {
DEGREE_COLUMN_NAME: degree,
INTERACTION_ONLY_COLUMN_NAME: interaction_only,
TRAIN_MSE_COLUMN_NAME: train_mse,
TEST_MSE_COLUMN_NAME: test_mse,
RSQUARED_COLUMN_NAME: rsquared,
}
configurations.index = configurations.index + 1
interaction_only = True
train_mse, test_mse, rsquared = higher_order_terms(train_data_file, test_data_file, features, target, interaction_only, degree, out_dir)
configurations.loc[-1] = {
DEGREE_COLUMN_NAME: degree,
INTERACTION_ONLY_COLUMN_NAME: interaction_only,
TRAIN_MSE_COLUMN_NAME: train_mse,
TEST_MSE_COLUMN_NAME: test_mse,
RSQUARED_COLUMN_NAME: rsquared,
}
configurations.index = configurations.index + 1
configurations = configurations.sort_values(RSQUARED_COLUMN_NAME, ascending=False)
print(configurations)
def main():
argument_parser = ArgumentParser('vqcfim', description='Virtual Quality Control for Injection Molding')
......@@ -220,6 +304,26 @@ def main():
model_evaluation_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
model_evaluation_subparser.set_defaults(func=lambda model, test_data, target, out, features, func: model_evaluation(model, test_data, features, target, out))
higher_order_terms_subparser = subparsers.add_parser('higher-order-terms', aliases=['hot'], description='Creates and evaluates a model with higher order terms')
higher_order_terms_subparser.add_argument(TRAIN_DATA_ARG, TRAIN_DATA_ARG_SHORT, action='store', type=Path, required=True)
higher_order_terms_subparser.add_argument(TEST_DATA_ARG, action='store', type=Path, required=True)
higher_order_terms_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
higher_order_terms_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
higher_order_terms_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
higher_order_terms_subparser.add_argument(DEGREE_ARG, action='store', type=int, required=True)
higher_order_terms_subparser.add_argument(INTERACTION_ONLY_ARG, action='store_true', required=False)
higher_order_terms_subparser.set_defaults(func=lambda train_data, test_data, features, target, degree, interaction_only, out, func: higher_order_terms(train_data, test_data, features, target, interaction_only, degree, out))
best_higher_order_terms_subparser = subparsers.add_parser('best-higher-order-terms', aliases=['bhot'], description='Creates and evaluates models with higher order terms')
best_higher_order_terms_subparser.add_argument(TRAIN_DATA_ARG, TRAIN_DATA_ARG_SHORT, action='store', type=Path, required=True)
best_higher_order_terms_subparser.add_argument(TEST_DATA_ARG, action='store', type=Path, required=True)
best_higher_order_terms_subparser.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=True, nargs='+')
best_higher_order_terms_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
best_higher_order_terms_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
best_higher_order_terms_subparser.add_argument(DEGREE_BOTTOM_BOUNDARY_ARG, action='store', type=int, required=True)
best_higher_order_terms_subparser.add_argument(DEGREE_TOP_ARG, action='store', type=int, required=True)
best_higher_order_terms_subparser.set_defaults(func=lambda train_data, test_data, features, degree_bottom_boundary, degree_top_boundary, target, out, func: best_higher_order_terms(train_data, test_data, features, target, degree_bottom_boundary, degree_top_boundary, out))
parsed_args = argument_parser.parse_args()
args = vars(parsed_args)
parsed_args.func(**args)
......