Skip to content
Snippets Groups Projects
Commit 3fa701ef authored by Andri Joos's avatar Andri Joos :blush:
Browse files

set variance threshold to ~0

parent 9a45dc8b
No related branches found
No related tags found
No related merge requests found
...@@ -18,7 +18,7 @@ from .json_maneuver_data import JsonManeuverData ...@@ -18,7 +18,7 @@ from .json_maneuver_data import JsonManeuverData
DOUBLE_PATTERN = r'Double(\d+)' DOUBLE_PATTERN = r'Double(\d+)'
MAX_DATASET_MEMORY_SIZE = 16602933278 MAX_DATASET_MEMORY_SIZE = 16602933278
MIN_JOBS = 2 MIN_JOBS = 2
VARIANCE_THRESHOLD = 0.01 VARIANCE_THRESHOLD = 1e-10
CORRELATION_THRESHOLD = 0.9 CORRELATION_THRESHOLD = 0.9
Y_CLASS_COLUMN = 'Maneuver' Y_CLASS_COLUMN = 'Maneuver'
MANUALLY_EXCLUDED_COLUMNS = [ MANUALLY_EXCLUDED_COLUMNS = [
...@@ -176,7 +176,7 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path ...@@ -176,7 +176,7 @@ def _remove_unimportant_predictors(train_files: List[Path], all_files: List[Path
df = pd.read_parquet(file) df = pd.read_parquet(file)
columns_to_analyze = [col for col in df.columns if col not in columns_to_keep] columns_to_analyze = [col for col in df.columns if col not in columns_to_keep]
columns_to_keep.update([col for col in columns_to_analyze if np.std(df[col]) >= VARIANCE_THRESHOLD]) columns_to_keep.update([col for col in columns_to_analyze if np.var(df[col]) > VARIANCE_THRESHOLD])
df: pd.DataFrame = df.drop(columns=columns_to_keep) df: pd.DataFrame = df.drop(columns=columns_to_keep)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment