SciCompMod · patricklnz · Apr 25, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/.github/actions/test-py/action.yml b/.github/actions/test-py/action.yml
@@ -81,7 +81,6 @@ runs:
           getdividata --help
           getsimdata --help
           cleandata --help
-          getcasesestimation --help
           getcommutermobility --help
           getvaccinationdata --help
           gethospitalizationdata --help

diff --git a/pycode/memilio-epidata/README.rst b/pycode/memilio-epidata/README.rst
@@ -66,8 +66,7 @@ After installation the scripts can be run via the following entry points.
   - getjhdata (get case data from john hopkins university, see Results: JH)
   - getdividata (get ICU data from DIVI, see Results: DIVI)
   - getsimdata (get simulation data including case and vaccination data from rki, population data and ICU data, see Results: RKI-C, RKI-V, P, DIVI)
-  - cleandata (deletes written files)
-  - getcasesestimation (get case data with estimations from rki, see Results: RKI-Estimation)
+  - cleandata (deletes written files) 
   - getcommutermobility (get data about commuter mobility, see Results: BAA)
   - gettestingdata (get data about number of tests, see Results: RKI-T)
   - gethospitalizationdata (get hospitalization data from RKI, see Results: RKI-H)

diff --git a/pycode/memilio-epidata/memilio/epidata/README.rst b/pycode/memilio-epidata/memilio/epidata/README.rst
@@ -31,7 +31,7 @@ Sources
 
   - Testing Data (RKI-T)
 
-    https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx
+    https://github.com/robert-koch-institut/SARS-CoV-2-PCR-Testungen_in_Deutschland/raw/main/SARS-CoV-2-PCR-Testungen_in_Deutschland.csv
 
   - Hospitalization data (RKI-H)
 
@@ -191,16 +191,6 @@ RKI-H          Germany     hospit_germany_age                  hospitalizations
 RKI-H          Germany     hospit_state_age                    hospitalizations per day for different states
 RKI-H          Germany     hospit_germany                      hospitalizations per day in germany
 
-RKI-Estimation Germany     cases_all_germany_estimated         infected, deaths, recovered, recovered_estimated, deaths_estimated over time for whole Germany
-RKI-Estimation Germany     cases_all_state_estimated           infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different states    (Bundesländer)
-RKI-Estimation Germany     cases_all_county_estimated          infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different counties   (Landkreise)
-RKI-Estimation Germany     cases_all_gender_estimated          infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different gender
-RKI-Estimation Germany     cases_all_age_estimated             infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges
-RKI-Estimation Germany     cases_all_state_age_estimated       infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges and states
-RKI-Estimation Germany     cases_all_state_gender_estimated    infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different genders and states
-RKI-Estimation Germany     cases_all_county_age_estimated      infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different age ranges and counties
-RKI-Estimation Germany     cases_all_county_gender_estimated   infected, deaths, recovered, recovered_estimated, deaths_estimated over time for different genders and counties
-
 P              Germany     county_current_population[_dim401]  population for different age groups from the 2011 census, extrapolated to the current level [with Wartburgkreis and Eisenach separated]
 P              Germany     county_population[_dim401]          population for different age groups from the 2011 census [with Wartburgkreis and Eisenach separated]
 P              Germany     county_table                        raw information on the German counties and its population sizes

diff --git a/pycode/memilio-epidata/memilio/epidata/defaultDict.py b/pycode/memilio-epidata/memilio/epidata/defaultDict.py
@@ -98,6 +98,7 @@
     'vaccNotComplete': "Vacc_not_completed",
     # test data
     'positiveRate': 'Positive_rate',
+    'testPositiveRatio': 'Positive_rate',
     # NPI data
     'npiCode': 'NPI_code',
     # mobility data refs

diff --git a/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py b/pycode/memilio-epidata/memilio/epidata/getCaseDatawithEstimations.py
diff --git a/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py b/pycode/memilio-epidata/memilio/epidata/getDataIntoPandasDataFrame.py
@@ -366,7 +366,6 @@ def cli(what):
 
     cli_dict = {"divi": ['Downloads data from DIVI', 'start_date', 'end_date', 'impute_dates', 'moving_average'],
                 "cases": ['Download case data from RKI', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date', 'files'],
-                "cases_est": ['Download case data from RKI and JHU and estimate recovered and deaths', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'split_berlin', 'rep_date'],
                 "population": ['Download population data from official sources', 'username'],
                 "commuter_official": ['Download commuter data from official sources'],
                 "vaccination": ['Download vaccination data', 'start_date', 'end_date', 'impute_dates', 'moving_average', 'sanitize_data'],

diff --git a/pycode/memilio-epidata/memilio/epidata/getTestingData.py b/pycode/memilio-epidata/memilio/epidata/getTestingData.py
@@ -48,21 +48,10 @@ def download_testing_data():
 
     # get country-wide testing data without resolution per federal state
     # but from much more laboratories
-    url = 'https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx?__blob=publicationFile'
-    header = {'User-Agent': 'Mozilla/5.0'}
-    r = requests.get(url, headers=header)
-    if r.status_code != 200:  # e.g. 404
-        raise requests.exceptions.HTTPError("HTTPError: "+str(r.status_code))
-    with io.BytesIO(r.content) as fh:
-        df = pd.io.excel.ExcelFile(fh, engine=gd.Conf.excel_engine)
-        sheet_names = df.sheet_names
-        df_test[0] = pd.read_excel(
-            df, sheet_name=sheet_names[1],
-            dtype={'Positivenanteil (%)': float})
-        # start on calender week 12/2020 as in federal states sheet,
-        # below and remove sum at bottom
-        df_test[0] = df_test[0][2:-1].reset_index()
-        df_test[0] = df_test[0].drop(columns='index')
+    url = "https://github.com/robert-koch-institut/SARS-CoV-2-PCR-Testungen_in_Deutschland/raw/main/SARS-CoV-2-PCR-Testungen_in_Deutschland.csv"
+    df_test[0] = gd.get_file(url=url, read_data=False)
+    # start on calender week 12/2020 as in federal states sheet, below
+    df_test[0] = df_test[0].iloc[2:, :].reset_index(drop=True)
 
     # get testing data on federal state level (from only a subset of
     # laboratories)
@@ -97,7 +86,7 @@ def transform_weeks_to_dates(df_test):
         # use %G insteaf of %Y (for year) and %V instead of %W (for month)
         # to get ISO week definition
         df_test[0].loc[i, dd.EngEng['date']] = datetime.strftime(datetime.strptime(
-            df_test[0].loc[i, dd.EngEng['date']] + '-4', "%V/%G-%w"), "%Y-%m-%d")
+            str(df_test[0].loc[i, dd.EngEng['date']]).replace('W', '') + '-4', "%G-%V-%u"), "%Y-%m-%d")
 
     # federal state-based data
     df_test[1].rename(columns={df_test[1].columns[1]: dd.EngEng['date']}, inplace=True)
@@ -237,18 +226,18 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
 
     # drop columns
     df_test[0].drop(
-        columns=['Anzahl Testungen', 'Positiv getestet',
-                 'Anzahl übermittelnder Labore'], inplace=True)
+        columns=['tests_total', 'tests_total_accumulated', 'tests_positive',
+                 'tests_positive_accumulated',
+                 'laboratories_tests', 'capacities_daily',
+                 'capacities_weekly_theoretically', 'capacities_weeklyweek_actually',
+                 'laboratories_capacities', 'laboratories_samplebacklog',
+                 'samplebacklog'], inplace=True)
     df_test[1].drop(columns='Anzahl Gesamt', inplace=True)
 
     # remove unknown locations
     df_test[1] = df_test[1][df_test[1].State != 'unbekannt']
     df_test[1].reset_index(drop=True, inplace=True)
 
-    # correct positive rate to percentage
-    df_test[0][dd.EngEng['positiveRate']
-               ] = df_test[0][dd.EngEng['positiveRate']]/100
-
     # replace state names with IDs
     df_test[1].rename(
         columns={dd.EngEng['state']: dd.EngEng['idState']}, inplace=True)
@@ -260,7 +249,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
     df_test[0] = mdfs.impute_and_reduce_df(
         df_test[0],
         {},
-        [dd.EngEng['positiveRate']],
+        [dd.EngEng['testPositiveRatio']],
         impute='forward', moving_average=moving_average,
         min_date=start_date, max_date=end_date)
 
@@ -274,7 +263,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
         # make plot
         customPlot.plot_multiple_series(
             df_test[0][dd.EngEng['date']],
-            [df_test[0][dd.EngEng['positiveRate']]],
+            [df_test[0][dd.EngEng['testPositiveRatio']]],
             ["Germany"],
             title='Positive rate for Sars-CoV-2 testing', xlabel='Date', ylabel='Positive rate',
             fig_name="Germany_Testing_positive_rate")
@@ -283,7 +272,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
     df_test[1] = mdfs.impute_and_reduce_df(
         df_test[1],
         {dd.EngEng["idState"]: [k for k in geoger.get_state_ids()]},
-        [dd.EngEng['positiveRate']],
+        [dd.EngEng['testPositiveRatio']],
         impute='forward', moving_average=moving_average,
         min_date=start_date, max_date=end_date)
     # store positive rates for the all federal states
@@ -298,7 +287,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
             df_test[0][dd.EngEng['date']],
             [df_test[1].loc
              [df_test[1][dd.EngEng['idState']] == stateID,
-              [dd.EngEng['positiveRate']]] for stateID in geoger.get_state_ids()],
+              [dd.EngEng['testPositiveRatio']]] for stateID in geoger.get_state_ids()],
             [stateName for stateName in geoger.get_state_names()],
             title='Positive rate for Sars-CoV-2 testing', xlabel='Date', ylabel='Positive rate',
             fig_name='FederalStates_Testing_positive_rate')
@@ -321,7 +310,7 @@ def get_testing_data(read_data=dd.defaultDict['read_data'],
             columns=({dd.EngEng['idState']: dd.EngEng['idCounty']}),
             inplace=True)
         df_local[dd.EngEng['idCounty']] = county
-        df_test_counties.append(df_test_counties, df_local)
+        df_test_counties.append(df_local)
 
     df_test_counties = pd.concat(df_test_counties)