Skip to content
Snippets Groups Projects
Commit b9fbb709 authored by Andri Joos's avatar Andri Joos :blush:
Browse files

allow nan values in column casting

parent afa046ae
No related branches found
No related tags found
No related merge requests found
......@@ -32,20 +32,22 @@ def _cast_columns(df: pd.DataFrame, column_type_mapping: Dict[str | int, str]) -
double_match = re.match(DOUBLE_PATTERN, column_type)
if column_type == 'Double':
df[column] = df[column].astype(np.float64)
df[column] = df[column].astype('Float64')
elif column_type == 'Int32':
df[column] = df[column].astype(np.int32)
df[column] = df[column].astype('Int32')
elif column_type == 'Boolean':
df[column] = df[column].astype(np.int32)
df[column] = df[column].astype('Int32')
elif column_type == 'String':
df[column] = df[column].astype(str)
elif double_match:
vector_rows = int(double_match.group(1)) # it is certain that this is an int because of the pattern
df[column] = df[column].apply(lambda vec: np.array(vec, dtype=np.float64)).apply(lambda arr: _ensure_shape(arr, (vector_rows,)))
df[column] = df[column] \
.apply(lambda vec: np.array(vec, dtype=np.float64)) \
.apply(lambda arr: _ensure_shape(arr, (vector_rows,)) if isinstance(arr, np.ndarray) else arr) # if it is not instance of np.ndarray, it is NaN (empty cell)
else:
raise ValueError(f'Unexpected type {column_type}')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment