From 82b588edef23722b70307b086038cc1c6db14124 Mon Sep 17 00:00:00 2001
From: Andri Joos <andri@joos.io>
Date: Fri, 15 Nov 2024 13:24:15 +0100
Subject: [PATCH] comment out covariance predictor selection

---
 app/preprocessing/transform_dataset.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/app/preprocessing/transform_dataset.py b/app/preprocessing/transform_dataset.py
index bc44285..702f927 100644
--- a/app/preprocessing/transform_dataset.py
+++ b/app/preprocessing/transform_dataset.py
@@ -178,16 +178,16 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path
         columns_to_analyze = [col for col in df.columns if col not in columns_to_keep]
         columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD])
 
-        df: pd.DataFrame = df.drop(columns=columns_to_keep)
+        # df: pd.DataFrame = df.drop(columns=columns_to_keep)
 
-        correlation_matrix = df.corr().abs()
+        # correlation_matrix = df.corr().abs()
 
-        # Select the upper triangle of the correlation matrix
-        upper_tri = correlation_matrix.where(
-            np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
-        )
+        # # Select the upper triangle of the correlation matrix
+        # upper_tri = correlation_matrix.where(
+        #     np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)
+        # )
 
-        columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)])
+        # columns_to_keep.update([col for col in upper_tri.columns if all(upper_tri[col] <= CORRELATION_THRESHOLD)])
 
     for file in all_files:
         print(f'Removing not important predictors from {file.name}')
-- 
GitLab