From 42e5520596acc42186204734e4eb364665ae65f2 Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Sep 2024 16:55:18 -0700 Subject: [PATCH 1/2] fixed: Count DF was being validated even without count data for score set updates --- src/mavedb/routers/score_sets.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py index 24c602ca6..44fd2a307 100644 --- a/src/mavedb/routers/score_sets.py +++ b/src/mavedb/routers/score_sets.py @@ -854,9 +854,13 @@ async def update_score_set( scores_data = pd.DataFrame( variants_to_csv_rows(item.variants, columns=score_columns, dtype="score_data") ).replace("NA", pd.NA) - count_data = pd.DataFrame( - variants_to_csv_rows(item.variants, columns=count_columns, dtype="count_data") - ).replace("NA", pd.NA) + + if item.dataset_columns["count_columns"]: + count_data = pd.DataFrame( + variants_to_csv_rows(item.variants, columns=count_columns, dtype="count_data") + ).replace("NA", pd.NA) + else: + count_data = None # Although this is also updated within the variant creation job, update it here # as well so that we can display the proper UI components (queue invocation delay From fa984bb212b1af43bd38b9492a63e49f124c1bca Mon Sep 17 00:00:00 2001 From: Ben Capodanno Date: Wed, 11 Sep 2024 16:56:15 -0700 Subject: [PATCH 2/2] Add handling for NA values during CSV validation --- src/mavedb/lib/mave/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/mavedb/lib/mave/utils.py b/src/mavedb/lib/mave/utils.py index f59a3fca0..dd6b75916 100644 --- a/src/mavedb/lib/mave/utils.py +++ b/src/mavedb/lib/mave/utils.py @@ -1,5 +1,7 @@ import re +import pandas as pd + NA_VALUE = "NA" NULL_VALUES = ("", "na", "nan", "nil", "none", "null", "n/a", "undefined", NA_VALUE) @@ -22,6 +24,9 @@ def is_csv_null(value): """Return True if a string from a CSV file represents a NULL value.""" + # Avoid any boolean miscasts from comparisons by handling NA types up front. + if pd.isna(value): + return True # Number 0 is treated as False so that all 0 will be converted to NA value. if value == 0: return value