From 82b588edef23722b70307b086038cc1c6db14124 Mon Sep 17 00:00:00 2001 From: Andri Joos <andri@joos.io> Date: Fri, 15 Nov 2024 13:24:15 +0100 Subject: [PATCH] comment out covariance predictor selection --- app/preprocessing/transform_dataset.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/app/preprocessing/transform_dataset.py b/app/preprocessing/transform_dataset.py index bc44285..702f927 100644 --- a/app/preprocessing/transform_dataset.py +++ b/app/preprocessing/transform_dataset.py @@ -178,16 +178,16 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path columns_to_analyze = [col for col in df.columns if col not in columns_to_keep] columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD]) - df: pd.DataFrame = df.drop(columns=columns_to_keep) + # df: pd.DataFrame = df.drop(columns=columns_to_keep) - correlation_matrix = df.corr().abs() + # correlation_matrix = df.corr().abs() - # Select the upper triangle of the correlation matrix - upper_tri = correlation_matrix.where( - np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool) - ) + # # Select the upper triangle of the correlation matrix + # upper_tri = correlation_matrix.where( + # np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool) + # ) - columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)]) + # columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)]) for file in all_files: print(f'Removing not important predictors from {file.name}') -- GitLab