Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • ost/ml/virtual-quality-control-for-injection-molding/project
1 result
Show changes
Commits on Source (2)
......@@ -28,3 +28,22 @@ best_single_feature_regression:
- out/
script:
- vqcfim best-single-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' --p-value-threshold 0.05
multi_feature_regression_p_value:
extends: .run_script
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' --p-value-threshold 0.05
multi_feature_regression_manual_features:
extends: .run_script
artifacts:
expire_in: 1d
paths:
- out/
script:
- vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass'
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore
......@@ -37,6 +37,44 @@
"--p-value-threshold",
"0.05"
]
},
{
"name": "Python Debugger: Multi Feature Regression with p-value",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"multi-feature-regression",
"-t",
"dataset/InjectionMolding_Train.csv",
"--target",
"mass",
"-o",
"out",
"--p-value-threshold",
"0.05"
]
},
{
"name": "Python Debugger: Multi Feature Regression with features",
"type": "debugpy",
"request": "launch",
"program": "${workspaceFolder}/src/app.py",
"console": "integratedTerminal",
"args": [
"multi-feature-regression",
"-t",
"dataset/InjectionMolding_Train.csv",
"--target",
"mass",
"-o",
"out",
"-f",
"Inj1PosVolAct_Var",
"Inj1PrsAct_meanOfInjPhase",
"ClpFceAct_1stPCscore"
]
}
]
}
......@@ -18,6 +18,8 @@ DEFAULT_CORRELATION_THRESHOLD = 0.9
TARGET_ARG = '--target'
P_VALUE_THRESHOLD_ARG = '--p-value-threshold'
DEFAULT_P_VALUE_THRESHOLD = 0.05
FEATURES_ARG = '--features'
FEATURES_ARG_SHORT = '-f'
PVALUE_COLUMN_NAME = 'p-value'
RSQUARED_COLUMN_NAME = 'R^2'
......@@ -25,6 +27,34 @@ RSQUARED_COLUMN_NAME = 'R^2'
def ensure_directory(directory: Path):
directory.mkdir(parents=True, exist_ok=True)
def get_possible_features_from_p_value(data: pd.DataFrame, target: str, p_value_threshold: float) -> pd.DataFrame:
features = data.columns
features = features.drop(target)
possible_features = pd.DataFrame({
PVALUE_COLUMN_NAME: pd.Series(dtype='float'),
RSQUARED_COLUMN_NAME: pd.Series(dtype='float'),
})
for feature in features:
pvalue, rsquared = single_feature_regression(data, feature, target)
possible_features.loc[feature] = {PVALUE_COLUMN_NAME: pvalue, RSQUARED_COLUMN_NAME: rsquared}
return possible_features.where(possible_features[PVALUE_COLUMN_NAME] < p_value_threshold).dropna()
def multi_feature_regression_model(train_data: pd.DataFrame, selected_features: List[str] | None, p_value_threshold: float | None, target: str) -> sm.OLS:
features: List[str] = None
if selected_features is not None and p_value_threshold is None:
features = selected_features
elif p_value_threshold is not None and selected_features is None:
features = list(get_possible_features_from_p_value(train_data, target, p_value_threshold).index)
else:
raise ValueError(f'selected_features is {selected_features} and p_value_threshold is {p_value_threshold}, but expected exactly one to be set')
X = sm.add_constant(train_data[features])
y = train_data[target]
model = sm.OLS(y, X).fit()
return model, features
def correlation_analysis(train_data_file: Path, out_dir: Path, correlation_threshold: float):
# Load training and test data
train_data = pd.read_csv(train_data_file)
......@@ -62,7 +92,7 @@ def correlation_analysis(train_data_file: Path, out_dir: Path, correlation_thres
with open(correlations_file, 'w') as f:
f.writelines(correlations)
def single_feature_regression(data: pd.DataFrame, feature: str, target: str):
def single_feature_regression(data: pd.DataFrame, feature: str, target: str) -> Tuple[float, float]:
X = sm.add_constant(data[[feature]]) # Add constant for intercept
y = data[target]
model = sm.OLS(y, X).fit()
......@@ -70,16 +100,7 @@ def single_feature_regression(data: pd.DataFrame, feature: str, target: str):
def best_single_feature_regression(train_data_file: Path, target: str, p_value_threshold: float, out_dir: Path):
train_data = pd.read_csv(train_data_file)
features = train_data.columns
features = features.drop(target)
evaluated_features = pd.DataFrame({
PVALUE_COLUMN_NAME: pd.Series(dtype='float'),
RSQUARED_COLUMN_NAME: pd.Series(dtype='float'),
})
for feature in features:
pvalue, rsquared = single_feature_regression(train_data, feature, target)
evaluated_features.loc[feature] = {PVALUE_COLUMN_NAME: pvalue, RSQUARED_COLUMN_NAME: rsquared}
evaluated_features = get_possible_features_from_p_value(train_data, target, p_value_threshold)
print('Evaluated features')
print(evaluated_features)
......@@ -125,6 +146,18 @@ def best_single_feature_regression(train_data_file: Path, target: str, p_value_t
p-value: {best_feature[PVALUE_COLUMN_NAME]}
R^2: {best_feature[RSQUARED_COLUMN_NAME]}
''')
def multi_feature_regression(train_data_file: Path, target: str, selected_features: List[str] | None, p_value_threshold: float | None, out_dir: Path):
train_data = pd.read_csv(train_data_file)
model, features = multi_feature_regression_model(train_data, selected_features, p_value_threshold, target)
print(model.summary())
ensure_directory(out_dir)
multi_feature_regression_results_file = out_dir / 'multi_feature_regression_results.txt'
with open(multi_feature_regression_results_file, 'w') as f:
f.write(f'''features: {features}
rsquared: {model.rsquared}
''')
def main():
argument_parser = ArgumentParser('vqcfim', description='Virtual Quality Control for Injection Molding')
......@@ -143,6 +176,15 @@ def main():
best_single_feature_regression_subparser.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=DEFAULT_P_VALUE_THRESHOLD)
best_single_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, p_value_threshold, func: best_single_feature_regression(train_data, target, p_value_threshold, out))
multi_feature_regression_subparser = subparsers.add_parser('multi-feature-regression', aliases=['mfr'], description='Performs a linear regression using multiple features')
multi_feature_regression_subparser.add_argument(TRAIN_DATA_ARG, TRAIN_DATA_ARG_SHORT, action='store', type=Path, required=True)
multi_feature_regression_subparser.add_argument(TARGET_ARG, action='store', type=str, required=True)
multi_feature_regression_subparser.add_argument(OUT_DIR_ARG, OUT_DIR_ARG_SHORT, action='store', type=Path, required=False, default=DEFAULT_OUT_DIR)
multi_feature_regression_features_group = multi_feature_regression_subparser.add_mutually_exclusive_group(required=True)
multi_feature_regression_features_group.add_argument(FEATURES_ARG, FEATURES_ARG_SHORT, action='store', type=str, required=False, nargs='+')
multi_feature_regression_features_group.add_argument(P_VALUE_THRESHOLD_ARG, action='store', type=float, required=False, default=None)
multi_feature_regression_subparser.set_defaults(func=lambda train_data, target, out, features, p_value_threshold, func: multi_feature_regression(train_data, target, features, p_value_threshold, out))
parsed_args = argument_parser.parse_args()
args = vars(parsed_args)
parsed_args.func(**args)
......