SciCompMod · MariamaJ · Jul 24, 2024 · Feb 28, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/pycode/memilio-epidata/memilio/epidata/defaultDict.py b/pycode/memilio-epidata/memilio/epidata/defaultDict.py
@@ -51,7 +51,7 @@
     'file_format': 'json_timeasstring',
     'no_raw': False,
     'rep_date': False,
-    'sanitize_data': 1
+    'sanitize_data': 1,
 }
 
 # The following dict EngEng makes sure that for all

diff --git a/pycode/memilio-epidata/memilio/epidata/download_config.conf b/pycode/memilio-epidata/memilio/epidata/download_config.conf
@@ -32,3 +32,6 @@ no_raw = False
 
 # matplotlib backend to use
 mpl_backend = QtAgg
+
+# To dataset allows to not generate json file but rather return python objects
+to_dataset = False
diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseData.py b/pycode/memilio-epidata/memilio/epidata/getCaseData.py
diff --git a/pycode/memilio-epidata/memilio/epidata/getDIVIData.py b/pycode/memilio-epidata/memilio/epidata/getDIVIData.py
diff --git a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py
@@ -84,7 +84,6 @@ def __init__(self, out_folder, **kwargs):
 
         # activate CoW for more predictable behaviour of pandas DataFrames
         pd.options.mode.copy_on_write = True
-
         # read in config file
         # if no config file is given, use default values
         if os.path.exists(path):
@@ -105,12 +104,17 @@ def __init__(self, out_folder, **kwargs):
                 if key not in kwargs:
                     kwargs.update({key: parser['SETTINGS'][key]})
 
-            Conf.show_progr = True if kwargs['show_progress'] == 'True' else False
+            Conf.show_progr = True if str(
+                kwargs['show_progress']) == 'True' else False
             Conf.v_level = str(kwargs['verbosity_level'])
-            self.checks = True if kwargs['run_checks'] == 'True' else False
-            self.interactive = True if kwargs['interactive'] == 'True' else False
-            self.plot = True if kwargs['make_plot'] == 'True' else False
-            self.no_raw = True if kwargs['no_raw'] == 'True' else False
+            self.checks = True if str(
+                kwargs['run_checks']) == 'True' else False
+            self.interactive = True if str(
+                kwargs['interactive']) == 'True' else False
+            self.plot = True if str(kwargs['make_plot']) == 'True' else False
+            self.no_raw = True if str(kwargs['no_raw']) == 'True' else False
+            self.to_dataset = True if str(
+                kwargs['to_dataset']) == 'True' else False
         else:
             # default values:
             Conf.show_progr = kwargs['show_progress'] if 'show_progress' in kwargs.keys(
@@ -126,6 +130,8 @@ def __init__(self, out_folder, **kwargs):
             self.no_raw = kwargs['no_raw'] if 'no_raw' in kwargs.keys(
             ) else dd.defaultDict['no_raw']
             self.path_to_use = out_folder
+            self.to_dataset = kwargs['to_dataset'] if 'to_dataset' in kwargs.keys(
+            ) else False
 
         # suppress Future & DepricationWarnings
         if VerbosityLevel[Conf.v_level].value <= 2:
@@ -354,6 +360,7 @@ def cli(what):
     - no_raw
     - username
     - password
+    - to_dataset
 
     @param what Defines what packages calls and thus what kind of command line arguments should be defined.
     """
@@ -493,6 +500,13 @@ def cli(what):
         parser.add_argument(
             '--password', type=str
         )
+    if '--to-dataset' in sys.argv:
+        parser.add_argument(
+            '--to-dataset', dest='to_dataset',
+            help="To return saved dataframes as objects.",
+            action='store_true'
+        )
+
     args = vars(parser.parse_args())
 
     return args

diff --git a/pycode/memilio-epidata/memilio/epidata/getPopulationData.py b/pycode/memilio-epidata/memilio/epidata/getPopulationData.py
@@ -42,15 +42,15 @@
 
 
 def read_population_data(username, password):
-    '''! Reads Population data from regionalstatistik.de
+    """! Reads Population data from regionalstatistik.de
 
     Username and Password are required to sign in on regionalstatistik.de.
     A request is made to regionalstatistik.de and the StringIO is read in as a csv into the dataframe format.
 
     @param username Username to sign in at regionalstatistik.de. 
     @param password Password to sign in at regionalstatistik.de.
     @return DataFrame
-    '''
+    """
 
     download_url = 'https://www.regionalstatistik.de/genesis/online?operation=download&code=12411-02-03-4&option=csv'
     req = requests.get(download_url, auth=(username, password))
@@ -63,14 +63,14 @@ def read_population_data(username, password):
 
 
 def path_to_credential_file():
-    '''Returns path to .ini file where credentials are stored.
+    """! Returns path to .ini file where credentials are stored.
     The Path can be changed if neccessary.
-    '''
+    """
     return os.path.join(os.path.dirname(os.path.abspath(__file__)), 'CredentialsRegio.ini')
 
 
 def manage_credentials(interactive):
-    '''! Manages credentials for regionalstatistik.de (needed for dowload).
+    """! Manages credentials for regionalstatistik.de (needed for dowload).
 
     A connfig file inside the epidata folder is either written (if not existent yet)
     with input from user or read with following format:
@@ -79,7 +79,7 @@ def manage_credentials(interactive):
     Password = XXXXX
 
     @return Username and password to sign in at regionalstatistik.de. 
-    '''
+    """
     # path where ini file is found
     path = path_to_credential_file()
 
@@ -118,8 +118,8 @@ def manage_credentials(interactive):
     return username, password
 
 
-def export_population_dataframe(df_pop, directory, file_format, merge_eisenach):
-    '''! Writes population dataframe into directory with new column names and age groups
+def export_population_dataframe(df_pop: pd.DataFrame, directory: str, file_format: str, merge_eisenach: bool):
+    """! Writes population dataframe into directory with new column names and age groups
 
     @param df_pop Population data DataFrame to be exported
     @param directory Directory where data is written to.
@@ -128,7 +128,7 @@ def export_population_dataframe(df_pop, directory, file_format, merge_eisenach):
         and 'Eisenach' are listed separately or
         combined as one entity 'Wartburgkreis'.
     @return exported DataFrame
-    '''
+    """
 
     new_cols = [
         dd.EngEng['idCounty'],
@@ -194,7 +194,7 @@ def export_population_dataframe(df_pop, directory, file_format, merge_eisenach):
 
 
 def assign_population_data(df_pop_raw, counties, age_cols, idCounty_idx):
-    '''! Assigns population data of all counties of old dataframe in new created dataframe
+    """! Assigns population data of all counties of old dataframe in new created dataframe
 
     In df_pop_raw there might be additional information like federal states, 
     governing regions etc. which is not necessary for the dataframe.
@@ -205,7 +205,7 @@ def assign_population_data(df_pop_raw, counties, age_cols, idCounty_idx):
     @param age_cols Age groups in old DataFrame
     @param idCountyidx indexes in old DataFrame where data of corresponding county starts
     @return new DataFrame
-    '''
+    """
 
     new_cols = {dd.EngEng['idCounty']: counties[:, 1],
                 dd.EngEng['county']: counties[:, 0]}
@@ -283,45 +283,25 @@ def test_total_population(df_pop, age_cols):
         raise gd.DataError('Total Population does not match expectation.')
 
 
-def get_population_data(read_data=dd.defaultDict['read_data'],
-                        file_format=dd.defaultDict['file_format'],
-                        out_folder=dd.defaultDict['out_folder'],
-                        merge_eisenach=True,
-                        username='',
-                        password='',
-                        **kwargs):
-    """! Download age-stratified population data for the German counties.
-
-    The data we use is:
-    Official 'Bevölkerungsfortschreibung' 12411-02-03-4:
-    'Bevölkerung nach Geschlecht und Altersgruppen (17)' 
-    of regionalstatistik.de. 
-    ATTENTION: The raw file cannot be downloaded 
-    automatically by our scripts without an Genesis Online account. In order to
-    work on this dataset, please enter your username and password or manually download it from:
-
-    https://www.regionalstatistik.de/genesis/online -> "1: Gebiet, Bevölkerung,
-    Arbeitsmarkt, Wahlen" -> "12: Bevölkerung" -> "12411 Fortschreibung des
-    Bevölkerungsstandes" ->  "12411-02-03-4: Bevölkerung nach Geschlecht und 
-    Altersgruppen (17) - Stichtag 31.12. - regionale Tiefe: Kreise und
-    krfr. Städte". 
-
-    Download the xlsx or csv file and put it under dd.defaultDict['out_folder'], 
-    this normally is Memilio/data/pydata/Germany. 
-    The folders 'pydata/Germany' have to be created if they do not exist yet. 
-    Then this script can be run.
+def fetch_population_data(read_data: bool = dd.defaultDict['read_data'],
+                          out_folder: str = dd.defaultDict['out_folder'],
+                          username='',
+                          password='',
+                          **kwargs
+                          ) -> pd.DataFrame:
+    """! Downloads or reads the population data.
+    If it does not already exist, the folder Germany is generated in the given out_folder.
+    If read_data == True and the file "FullData_population.json" exists, the data is read form this file
+    and stored in a pandas dataframe. If read_data = True and the file does not exist the program is stopped.
+    The downloaded dataframe is written to the file "FullData_population".
 
     @param read_data False or True. Defines if data is read from file or
         downloaded. Default defined in defaultDict.
-    @param file_format File format which is used for writing the data.
-        Default defined in defaultDict.
     @param out_folder Path to folder where data is written in folder
         out_folder/Germany. Default defined in defaultDict.
-    @param merge_eisenach [Default: True] or False. Defines whether the
-        counties 'Wartburgkreis' and 'Eisenach' are listed separately or
-        combined as one entity 'Wartburgkreis'.
-    @param username Username to sign in at regionalstatistik.de. 
+    @param username Username to sign in at regionalstatistik.de.
     @param password Password to sign in at regionalstatistik.de.
+
     @return DataFrame with adjusted population data for all ages to current level.
     """
     conf = gd.Conf(out_folder, **kwargs)
@@ -341,6 +321,22 @@ def get_population_data(read_data=dd.defaultDict['read_data'],
 
     df_pop_raw = read_population_data(username, password)
 
+    return df_pop_raw
+
+
+def preprocess_population_data(df_pop_raw: pd.DataFrame,
+                               merge_eisenach: bool = True,
+                               ) -> pd.DataFrame:
+    """! Processing of the downloaded data
+        * the columns are renamed to English and the state and county names are added.
+
+    @param df_pop_raw pd.DataFrame. A Dataframe containing input population data
+    @param merge_eisenach [Default: True] or False. Defines whether the
+     counties 'Wartburgkreis' and 'Eisenach' are listed separately or
+     combined as one entity 'Wartburgkreis'.
+
+    @return df pd.DataFrame. Processed population data
+    """
     column_names = list(df_pop_raw.columns)
     # rename columns
     rename_columns = {
@@ -381,12 +377,96 @@ def get_population_data(read_data=dd.defaultDict['read_data'],
 
     df_pop = assign_population_data(
         df_pop_raw, counties, age_cols, idCounty_idx)
-
     test_total_population(df_pop, age_cols)
+    return df_pop
 
+
+def write_population_data(df_pop: pd.DataFrame,
+                          out_folder: str = dd.defaultDict['out_folder'],
+                          file_format: str = dd.defaultDict['file_format'],
+                          merge_eisenach: bool = True
+                          ) -> None or pd.DataFrame:
+    """! Write the population data into json files
+    Three kinds of structuring of the data are done.
+    We obtain the chronological sequence of ICU and ICU_ventilated
+    stored in the files "county_population".json", "state_population.json" and "germany_population.json"
+    for counties, states and whole Germany, respectively.
+
+    @param df_pop pd.DataFrame. A Dataframe containing processed population data
+    @param file_format str. File format which is used for writing the data. Default defined in defaultDict.
+    @param out_folder str. Folder where data is written to. Default defined in defaultDict.
+    @param merge_eisenach [Default: True] or False. Defines whether the
+        counties 'Wartburgkreis' and 'Eisenach' are listed separately or
+        combined as one entity 'Wartburgkreis'.
+
+    @return None
+    """
+    directory = os.path.join(out_folder, 'Germany')
     df_pop_export = export_population_dataframe(
         df_pop, directory, file_format, merge_eisenach)
+    return df_pop_export
+
+
+def get_population_data(read_data: bool = dd.defaultDict['read_data'],
+                        file_format: str = dd.defaultDict['file_format'],
+                        out_folder: str = dd.defaultDict['out_folder'],
+                        merge_eisenach: bool = True,
+                        username='',
+                        password='',
+                        **kwargs
+                        ):
+    """! Download age-stratified population data for the German counties.
 
+    The data we use is:
+    Official 'Bevölkerungsfortschreibung' 12411-02-03-4:
+    'Bevölkerung nach Geschlecht und Altersgruppen (17)' 
+    of regionalstatistik.de. 
+    ATTENTION: The raw file cannot be downloaded 
+    automatically by our scripts without an Genesis Online account. In order to
+    work on this dataset, please enter your username and password or manually download it from:
+
+    https://www.regionalstatistik.de/genesis/online -> "1: Gebiet, Bevölkerung,
+    Arbeitsmarkt, Wahlen" -> "12: Bevölkerung" -> "12411 Fortschreibung des
+    Bevölkerungsstandes" ->  "12411-02-03-4: Bevölkerung nach Geschlecht und 
+    Altersgruppen (17) - Stichtag 31.12. - regionale Tiefe: Kreise und
+    krfr. Städte". 
+
+    Download the xlsx or csv file and put it under dd.defaultDict['out_folder'], 
+    this normally is Memilio/data/pydata/Germany. 
+    The folders 'pydata/Germany' have to be created if they do not exist yet. 
+    Then this script can be run.
+
+    @param read_data False or True. Defines if data is read from file or
+        downloaded. Default defined in defaultDict.
+    @param file_format File format which is used for writing the data.
+        Default defined in defaultDict.
+    @param out_folder Path to folder where data is written in folder
+        out_folder/Germany. Default defined in defaultDict.
+    @param merge_eisenach [Default: True] or False. Defines whether the
+        counties 'Wartburgkreis' and 'Eisenach' are listed separately or
+        combined as one entity 'Wartburgkreis'.
+    @param username str. Username to sign in at regionalstatistik.de.
+    @param password str. Password to sign in at regionalstatistik.de.
+    @return DataFrame with adjusted population data for all ages to current level.
+    """
+    raw_df = fetch_population_data(
+        read_data=read_data,
+        out_folder=out_folder,
+        file_format=file_format,
+        username=username,
+        password=password,
+        **kwargs
+    )
+    preprocess_df = preprocess_population_data(
+        df_pop_raw=raw_df,
+        merge_eisenach=merge_eisenach
+    )
+    df_pop_export = write_population_data(
+        df_pop=preprocess_df,
+        file_format=file_format,
+        out_folder=out_folder,
+        merge_eisenach=True
+    )
     return df_pop_export