Skip to content
Snippets Groups Projects
Commit 82b588ed authored by Andri Joos's avatar Andri Joos :blush:
Browse files

comment out covariance predictor selection

parent 3fa701ef
No related branches found
No related tags found
No related merge requests found
...@@ -178,16 +178,16 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path ...@@ -178,16 +178,16 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path
columns_to_analyze = [col for col in df.columns if col not in columns_to_keep] columns_to_analyze = [col for col in df.columns if col not in columns_to_keep]
columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD]) columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD])
df: pd.DataFrame = df.drop(columns=columns_to_keep) # df: pd.DataFrame = df.drop(columns=columns_to_keep)
correlation_matrix = df.corr().abs() # correlation_matrix = df.corr().abs()
# Select the upper triangle of the correlation matrix # # Select the upper triangle of the correlation matrix
upper_tri = correlation_matrix.where( # upper_tri = correlation_matrix.where(
np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool) # np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
) # )
columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)]) # columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)])
for file in all_files: for file in all_files:
print(f'Removing not important predictors from {file.name}') print(f'Removing not important predictors from {file.name}')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment