Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
project
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
OST
ML
Virtual Quality Control for Injection Molding
project
Compare revisions
0ba9e975d608fe628a0b7a3a71c06514b6ee8428 to 59d83d93171525607329d45595668f9f827cd0d5
Compare revisions
Changes are shown as if the
source
revision was being merged into the
target
revision.
Learn more about comparing revisions.
Source
ost/ml/virtual-quality-control-for-injection-molding/project
Select target project
No results found
59d83d93171525607329d45595668f9f827cd0d5
Select Git revision
Swap
Target
ost/ml/virtual-quality-control-for-injection-molding/project
Select target project
ost/ml/virtual-quality-control-for-injection-molding/project
1 result
0ba9e975d608fe628a0b7a3a71c06514b6ee8428
Select Git revision
Show changes
Only incoming changes from source
Include changes to target since source was created
Compare
Commits on Source (2)
add multi feature regression
· 17598523
Andri Joos
authored
6 months ago
17598523
Merge branch 'multi_feature_regression'
· 59d83d93
Andri Joos
authored
6 months ago
59d83d93
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitlab-ci.yml
+19
-0
19 additions, 0 deletions
.gitlab-ci.yml
.vscode/launch.json
+38
-0
38 additions, 0 deletions
.vscode/launch.json
src/app.py
+53
-11
53 additions, 11 deletions
src/app.py
with
110 additions
and
11 deletions
.gitlab-ci.yml
View file @
59d83d93
...
...
@@ -28,3 +28,22 @@ best_single_feature_regression:
-
out/
script
:
-
vqcfim best-single-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' --p-value-threshold
0.05
multi_feature_regression_p_value
:
extends
:
.run_script
artifacts
:
expire_in
:
1d
paths
:
-
out/
script
:
-
vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' --p-value-threshold
0.05
multi_feature_regression_manual_features
:
extends
:
.run_script
artifacts
:
expire_in
:
1d
paths
:
-
out/
script
:
-
vqcfim multi-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass'
--features Inj1PosVolAct_Var Inj1PrsAct_meanOfInjPhase ClpFceAct_1stPCscore
This diff is collapsed.
Click to expand it.
.vscode/launch.json
View file @
59d83d93
...
...
@@ -37,6 +37,44 @@
"--p-value-threshold"
,
"0.05"
]
},
{
"name"
:
"Python Debugger: Multi Feature Regression with p-value"
,
"type"
:
"debugpy"
,
"request"
:
"launch"
,
"program"
:
"${workspaceFolder}/src/app.py"
,
"console"
:
"integratedTerminal"
,
"args"
:
[
"multi-feature-regression"
,
"-t"
,
"dataset/InjectionMolding_Train.csv"
,
"--target"
,
"mass"
,
"-o"
,
"out"
,
"--p-value-threshold"
,
"0.05"
]
},
{
"name"
:
"Python Debugger: Multi Feature Regression with features"
,
"type"
:
"debugpy"
,
"request"
:
"launch"
,
"program"
:
"${workspaceFolder}/src/app.py"
,
"console"
:
"integratedTerminal"
,
"args"
:
[
"multi-feature-regression"
,
"-t"
,
"dataset/InjectionMolding_Train.csv"
,
"--target"
,
"mass"
,
"-o"
,
"out"
,
"-f"
,
"Inj1PosVolAct_Var"
,
"Inj1PrsAct_meanOfInjPhase"
,
"ClpFceAct_1stPCscore"
]
}
]
}
This diff is collapsed.
Click to expand it.
src/app.py
View file @
59d83d93
...
...
@@ -18,6 +18,8 @@ DEFAULT_CORRELATION_THRESHOLD = 0.9
TARGET_ARG
=
'
--target
'
P_VALUE_THRESHOLD_ARG
=
'
--p-value-threshold
'
DEFAULT_P_VALUE_THRESHOLD
=
0.05
FEATURES_ARG
=
'
--features
'
FEATURES_ARG_SHORT
=
'
-f
'
PVALUE_COLUMN_NAME
=
'
p-value
'
RSQUARED_COLUMN_NAME
=
'
R^2
'
...
...
@@ -25,6 +27,34 @@ RSQUARED_COLUMN_NAME = 'R^2'
def
ensure_directory
(
directory
:
Path
):
directory
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
def
get_possible_features_from_p_value
(
data
:
pd
.
DataFrame
,
target
:
str
,
p_value_threshold
:
float
)
->
pd
.
DataFrame
:
features
=
data
.
columns
features
=
features
.
drop
(
target
)
possible_features
=
pd
.
DataFrame
({
PVALUE_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
RSQUARED_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
})
for
feature
in
features
:
pvalue
,
rsquared
=
single_feature_regression
(
data
,
feature
,
target
)
possible_features
.
loc
[
feature
]
=
{
PVALUE_COLUMN_NAME
:
pvalue
,
RSQUARED_COLUMN_NAME
:
rsquared
}
return
possible_features
.
where
(
possible_features
[
PVALUE_COLUMN_NAME
]
<
p_value_threshold
).
dropna
()
def
multi_feature_regression_model
(
train_data
:
pd
.
DataFrame
,
selected_features
:
List
[
str
]
|
None
,
p_value_threshold
:
float
|
None
,
target
:
str
)
->
sm
.
OLS
:
features
:
List
[
str
]
=
None
if
selected_features
is
not
None
and
p_value_threshold
is
None
:
features
=
selected_features
elif
p_value_threshold
is
not
None
and
selected_features
is
None
:
features
=
list
(
get_possible_features_from_p_value
(
train_data
,
target
,
p_value_threshold
).
index
)
else
:
raise
ValueError
(
f
'
selected_features is
{
selected_features
}
and p_value_threshold is
{
p_value_threshold
}
, but expected exactly one to be set
'
)
X
=
sm
.
add_constant
(
train_data
[
features
])
y
=
train_data
[
target
]
model
=
sm
.
OLS
(
y
,
X
).
fit
()
return
model
,
features
def
correlation_analysis
(
train_data_file
:
Path
,
out_dir
:
Path
,
correlation_threshold
:
float
):
# Load training and test data
train_data
=
pd
.
read_csv
(
train_data_file
)
...
...
@@ -62,7 +92,7 @@ def correlation_analysis(train_data_file: Path, out_dir: Path, correlation_thres
with
open
(
correlations_file
,
'
w
'
)
as
f
:
f
.
writelines
(
correlations
)
def
single_feature_regression
(
data
:
pd
.
DataFrame
,
feature
:
str
,
target
:
str
):
def
single_feature_regression
(
data
:
pd
.
DataFrame
,
feature
:
str
,
target
:
str
)
->
Tuple
[
float
,
float
]
:
X
=
sm
.
add_constant
(
data
[[
feature
]])
# Add constant for intercept
y
=
data
[
target
]
model
=
sm
.
OLS
(
y
,
X
).
fit
()
...
...
@@ -70,16 +100,7 @@ def single_feature_regression(data: pd.DataFrame, feature: str, target: str):
def
best_single_feature_regression
(
train_data_file
:
Path
,
target
:
str
,
p_value_threshold
:
float
,
out_dir
:
Path
):
train_data
=
pd
.
read_csv
(
train_data_file
)
features
=
train_data
.
columns
features
=
features
.
drop
(
target
)
evaluated_features
=
pd
.
DataFrame
({
PVALUE_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
RSQUARED_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
})
for
feature
in
features
:
pvalue
,
rsquared
=
single_feature_regression
(
train_data
,
feature
,
target
)
evaluated_features
.
loc
[
feature
]
=
{
PVALUE_COLUMN_NAME
:
pvalue
,
RSQUARED_COLUMN_NAME
:
rsquared
}
evaluated_features
=
get_possible_features_from_p_value
(
train_data
,
target
,
p_value_threshold
)
print
(
'
Evaluated features
'
)
print
(
evaluated_features
)
...
...
@@ -125,6 +146,18 @@ def best_single_feature_regression(train_data_file: Path, target: str, p_value_t
p-value:
{
best_feature
[
PVALUE_COLUMN_NAME
]
}
R^2:
{
best_feature
[
RSQUARED_COLUMN_NAME
]
}
'''
)
def
multi_feature_regression
(
train_data_file
:
Path
,
target
:
str
,
selected_features
:
List
[
str
]
|
None
,
p_value_threshold
:
float
|
None
,
out_dir
:
Path
):
train_data
=
pd
.
read_csv
(
train_data_file
)
model
,
features
=
multi_feature_regression_model
(
train_data
,
selected_features
,
p_value_threshold
,
target
)
print
(
model
.
summary
())
ensure_directory
(
out_dir
)
multi_feature_regression_results_file
=
out_dir
/
'
multi_feature_regression_results.txt
'
with
open
(
multi_feature_regression_results_file
,
'
w
'
)
as
f
:
f
.
write
(
f
'''
features:
{
features
}
rsquared:
{
model
.
rsquared
}
'''
)
def
main
():
argument_parser
=
ArgumentParser
(
'
vqcfim
'
,
description
=
'
Virtual Quality Control for Injection Molding
'
)
...
...
@@ -143,6 +176,15 @@ def main():
best_single_feature_regression_subparser
.
add_argument
(
P_VALUE_THRESHOLD_ARG
,
action
=
'
store
'
,
type
=
float
,
required
=
False
,
default
=
DEFAULT_P_VALUE_THRESHOLD
)
best_single_feature_regression_subparser
.
set_defaults
(
func
=
lambda
train_data
,
target
,
out
,
p_value_threshold
,
func
:
best_single_feature_regression
(
train_data
,
target
,
p_value_threshold
,
out
))
multi_feature_regression_subparser
=
subparsers
.
add_parser
(
'
multi-feature-regression
'
,
aliases
=
[
'
mfr
'
],
description
=
'
Performs a linear regression using multiple features
'
)
multi_feature_regression_subparser
.
add_argument
(
TRAIN_DATA_ARG
,
TRAIN_DATA_ARG_SHORT
,
action
=
'
store
'
,
type
=
Path
,
required
=
True
)
multi_feature_regression_subparser
.
add_argument
(
TARGET_ARG
,
action
=
'
store
'
,
type
=
str
,
required
=
True
)
multi_feature_regression_subparser
.
add_argument
(
OUT_DIR_ARG
,
OUT_DIR_ARG_SHORT
,
action
=
'
store
'
,
type
=
Path
,
required
=
False
,
default
=
DEFAULT_OUT_DIR
)
multi_feature_regression_features_group
=
multi_feature_regression_subparser
.
add_mutually_exclusive_group
(
required
=
True
)
multi_feature_regression_features_group
.
add_argument
(
FEATURES_ARG
,
FEATURES_ARG_SHORT
,
action
=
'
store
'
,
type
=
str
,
required
=
False
,
nargs
=
'
+
'
)
multi_feature_regression_features_group
.
add_argument
(
P_VALUE_THRESHOLD_ARG
,
action
=
'
store
'
,
type
=
float
,
required
=
False
,
default
=
None
)
multi_feature_regression_subparser
.
set_defaults
(
func
=
lambda
train_data
,
target
,
out
,
features
,
p_value_threshold
,
func
:
multi_feature_regression
(
train_data
,
target
,
features
,
p_value_threshold
,
out
))
parsed_args
=
argument_parser
.
parse_args
()
args
=
vars
(
parsed_args
)
parsed_args
.
func
(
**
args
)
...
...
This diff is collapsed.
Click to expand it.