diff --git a/app/preprocessing/transform_dataset.py b/app/preprocessing/transform_dataset.py index 914be5ad0cb0839ebcf8a132fbf960bf3ee4dd11..bc44285e9b19c9f0ac22236b60f13833a6a289cb 100644 --- a/app/preprocessing/transform_dataset.py +++ b/app/preprocessing/transform_dataset.py @@ -18,7 +18,7 @@ from .json_maneuver_data import JsonManeuverData DOUBLE_PATTERN = r'Double(\d+)' MAX_DATASET_MEMORY_SIZE = 16602933278 MIN_JOBS = 2 -VARIANCE_THRESHOLD = 0.01 +VARIANCE_THRESHOLD = 1e-10 CORRELATION_THRESHOLD = 0.9 Y_CLASS_COLUMN = 'Maneuver' MANUALLY_EXCLUDED_COLUMNS = [ @@ -176,7 +176,7 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path df = pd.read_parquet(file) columns_to_analyze = [col for col in df.columns if col not in columns_to_keep] - columns_to_keep.update([col for col in columns_to_analyze if np.std(df[col]) >= VARIANCE_THRESHOLD]) + columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD]) df: pd.DataFrame = df.drop(columns=columns_to_keep)