Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
P
project
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
OST
ML
Virtual Quality Control for Injection Molding
project
Commits
28f554b1
Commit
28f554b1
authored
6 months ago
by
Andri Joos
Browse files
Options
Downloads
Patches
Plain Diff
add single feature regression
parent
dc6265c2
Loading
Loading
No related merge requests found
Pipeline
#6715
passed with stage
in 2 minutes and 23 seconds
Changes
4
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitlab-ci.yml
+9
-0
9 additions, 0 deletions
.gitlab-ci.yml
.vscode/launch.json
+18
-0
18 additions, 0 deletions
.vscode/launch.json
pyproject.toml
+1
-0
1 addition, 0 deletions
pyproject.toml
src/app.py
+72
-0
72 additions, 0 deletions
src/app.py
with
100 additions
and
0 deletions
.gitlab-ci.yml
+
9
−
0
View file @
28f554b1
...
...
@@ -19,3 +19,12 @@ correlation_analysis:
-
out/
script
:
-
vqcfim correlation-analysis --train-data dataset/InjectionMolding_Train.csv --out out --correlation-threshold
0.9
best_single_feature_regression
:
extends
:
.run_script
artifacts
:
expire_in
:
1d
paths
:
-
out/
script
:
-
vqcfim best-single-feature-regression --train-data dataset/InjectionMolding_Train.csv --out out --target 'mass' --p-value-threshold
0.05
This diff is collapsed.
Click to expand it.
.vscode/launch.json
+
18
−
0
View file @
28f554b1
...
...
@@ -19,6 +19,24 @@
"-c"
,
"0.9"
]
},
{
"name"
:
"Python Debugger: Best Single Feature Regression"
,
"type"
:
"debugpy"
,
"request"
:
"launch"
,
"program"
:
"${workspaceFolder}/src/app.py"
,
"console"
:
"integratedTerminal"
,
"args"
:
[
"best-single-feature-regression"
,
"-t"
,
"dataset/InjectionMolding_Train.csv"
,
"--target"
,
"mass"
,
"-o"
,
"out"
,
"--p-value-threshold"
,
"0.05"
]
}
]
}
This diff is collapsed.
Click to expand it.
pyproject.toml
+
1
−
0
View file @
28f554b1
...
...
@@ -6,6 +6,7 @@ dependencies = [
"pandas >= 2.2.3, < 3.0.0"
,
"seaborn >= 0.13.2, < 1.0.0"
,
"matplotlib >= 3.9.2, < 4.0.0"
,
"statsmodels >= 0.14.4, < 1.0.0"
,
]
maintainers
=
[
{name
=
"Andri Joos"
}
,
...
...
This diff is collapsed.
Click to expand it.
src/app.py
+
72
−
0
View file @
28f554b1
...
...
@@ -5,6 +5,7 @@ import seaborn as sns
import
matplotlib.pyplot
as
plt
import
math
from
typing
import
List
,
Tuple
import
statsmodels.api
as
sm
TRAIN_DATA_ARG
=
'
--train-data
'
TRAIN_DATA_ARG_SHORT
=
'
-t
'
...
...
@@ -14,6 +15,12 @@ DEFAULT_OUT_DIR = 'out/'
CORRELATION_THRESHOLD_ARG
=
'
--correlation-threshold
'
CORRELATION_THRESHOLD_ARG_SHORT
=
'
-c
'
DEFAULT_CORRELATION_THRESHOLD
=
0.9
TARGET_ARG
=
'
--target
'
P_VALUE_THRESHOLD_ARG
=
'
--p-value-threshold
'
DEFAULT_P_VALUE_THRESHOLD
=
0.05
PVALUE_COLUMN_NAME
=
'
p-value
'
RSQUARED_COLUMN_NAME
=
'
R^2
'
def
ensure_directory
(
directory
:
Path
):
directory
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
...
...
@@ -55,6 +62,64 @@ def correlation_analysis(train_data_file: Path, out_dir: Path, correlation_thres
with
open
(
correlations_file
,
'
w
'
)
as
f
:
f
.
writelines
(
correlations
)
def
single_feature_regression
(
data
:
pd
.
DataFrame
,
feature
:
str
,
target
:
str
):
X
=
sm
.
add_constant
(
data
[[
feature
]])
# Add constant for intercept
y
=
data
[
target
]
model
=
sm
.
OLS
(
y
,
X
).
fit
()
return
model
.
pvalues
.
iloc
[
1
],
model
.
rsquared
def
best_single_feature_regression
(
train_data_file
:
Path
,
target
:
str
,
p_value_threshold
:
float
,
out_dir
:
Path
):
train_data
=
pd
.
read_csv
(
train_data_file
)
features
=
train_data
.
columns
features
=
features
.
drop
(
target
)
evaluated_features
=
pd
.
DataFrame
({
PVALUE_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
RSQUARED_COLUMN_NAME
:
pd
.
Series
(
dtype
=
'
float
'
),
})
for
feature
in
features
:
pvalue
,
rsquared
=
single_feature_regression
(
train_data
,
feature
,
target
)
evaluated_features
.
loc
[
feature
]
=
{
PVALUE_COLUMN_NAME
:
pvalue
,
RSQUARED_COLUMN_NAME
:
rsquared
}
print
(
'
Evaluated features
'
)
print
(
evaluated_features
)
plt
.
figure
(
figsize
=
(
1.75
,
4.8
))
evaluated_pvalues
=
evaluated_features
[[
PVALUE_COLUMN_NAME
]]
sns
.
heatmap
(
evaluated_pvalues
,
annot
=
True
,
cmap
=
'
coolwarm
'
,
vmin
=
0
,
vmax
=
1
)
ensure_directory
(
out_dir
)
evaluated_pvalues_file_path
=
out_dir
/
"
evaluated_pvalues.png
"
plt
.
savefig
(
evaluated_pvalues_file_path
,
bbox_inches
=
'
tight
'
)
plt
.
figure
(
figsize
=
(
1.75
,
4.8
))
evaluated_rsquares
=
evaluated_features
[[
RSQUARED_COLUMN_NAME
]]
sns
.
heatmap
(
evaluated_rsquares
,
annot
=
True
,
cmap
=
'
coolwarm
'
,
vmin
=
0
,
vmax
=
1
)
ensure_directory
(
out_dir
)
evaluated_rsquares_file_path
=
out_dir
/
"
evaluated_rsquares.png
"
plt
.
savefig
(
evaluated_rsquares_file_path
,
bbox_inches
=
'
tight
'
)
best_feature
:
pd
.
Series
=
None
for
_
,
row
in
evaluated_features
.
iterrows
():
pvalue
=
row
[
PVALUE_COLUMN_NAME
]
rsquared
=
row
[
RSQUARED_COLUMN_NAME
]
if
best_feature
is
None
or
(
pvalue
<
best_feature
[
PVALUE_COLUMN_NAME
]
and
rsquared
>
best_feature
[
RSQUARED_COLUMN_NAME
]):
best_feature
=
row
print
()
print
(
'
Best Feature
'
)
print
(
best_feature
)
ensure_directory
(
out_dir
)
best_feature_file
=
out_dir
/
'
best_feature.txt
'
with
open
(
best_feature_file
,
'
w
'
)
as
f
:
f
.
write
(
f
'''
Name:
{
best_feature
.
name
}
p-value:
{
best_feature
[
PVALUE_COLUMN_NAME
]
}
R^2:
{
best_feature
[
RSQUARED_COLUMN_NAME
]
}
'''
)
def
main
():
argument_parser
=
ArgumentParser
(
'
vqcfim
'
,
description
=
'
Virtual Quality Control for Injection Molding
'
)
subparsers
=
argument_parser
.
add_subparsers
(
title
=
'
action
'
)
...
...
@@ -65,6 +130,13 @@ def main():
correlation_analysis_subparser
.
add_argument
(
CORRELATION_THRESHOLD_ARG
,
CORRELATION_THRESHOLD_ARG_SHORT
,
action
=
'
store
'
,
type
=
float
,
required
=
False
,
default
=
DEFAULT_CORRELATION_THRESHOLD
)
correlation_analysis_subparser
.
set_defaults
(
func
=
lambda
train_data
,
out
,
correlation_threshold
,
func
:
correlation_analysis
(
train_data
,
out
,
correlation_threshold
))
best_single_feature_regression_subparser
=
subparsers
.
add_parser
(
'
best-single-feature-regression
'
,
aliases
=
[
'
bsfr
'
],
description
=
'
Evaluates the best single feature regression feature from the dataset
'
)
best_single_feature_regression_subparser
.
add_argument
(
TRAIN_DATA_ARG
,
TRAIN_DATA_ARG_SHORT
,
action
=
'
store
'
,
type
=
Path
,
required
=
True
)
best_single_feature_regression_subparser
.
add_argument
(
TARGET_ARG
,
action
=
'
store
'
,
type
=
str
,
required
=
True
)
best_single_feature_regression_subparser
.
add_argument
(
OUT_DIR_ARG
,
OUT_DIR_ARG_SHORT
,
action
=
'
store
'
,
type
=
Path
,
required
=
False
,
default
=
DEFAULT_OUT_DIR
)
best_single_feature_regression_subparser
.
add_argument
(
P_VALUE_THRESHOLD_ARG
,
action
=
'
store
'
,
type
=
float
,
required
=
False
,
default
=
DEFAULT_P_VALUE_THRESHOLD
)
best_single_feature_regression_subparser
.
set_defaults
(
func
=
lambda
train_data
,
target
,
out
,
p_value_threshold
,
func
:
best_single_feature_regression
(
train_data
,
target
,
p_value_threshold
,
out
))
parsed_args
=
argument_parser
.
parse_args
()
args
=
vars
(
parsed_args
)
parsed_args
.
func
(
**
args
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment