Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions pycode/memilio-epidata/memilio/epidata/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,39 +89,52 @@ optional arguments working for all are:
+---------------------------------------------+-----------------------------------------------------------+
| -n, --no-raw | Defines if raw data will be stored for further use. |
+---------------------------------------------+-----------------------------------------------------------+
| --no-progress-indicators | Disables all progress indicators (used for downloads etc.)|
+---------------------------------------------+-----------------------------------------------------------+
| --interactive | Interactive download (Handle warnings, passwords etc.). |
+---------------------------------------------+-----------------------------------------------------------+
| -v, --verbose | Increases verbosity level. |
+---------------------------------------------+-----------------------------------------------------------+
| --skip-checks | Skips sanity checks etc. |
+---------------------------------------------+-----------------------------------------------------------+

optional arguments working for some are:

+---------------------------------------------+-----------------------------------------------------------+
| -p, --make-plot | Plots the data. |
+---------------------------------------------+-----------------------------------------------------------+
| -ed, --end-date | Changes date for which data collection is stopped [divi] |
| -ed, --end-date | Changes date for which data collection is stopped |
+---------------------------------------------+-----------------------------------------------------------+
| -sd, --start-date | Changes date for which data collection is started [divi] |
| -sd, --start-date | Changes date for which data collection is started |
+---------------------------------------------+-----------------------------------------------------------+
| -i, --impute-dates | Returns dataframes with all dates instead of only dates |
| | where new cases have been reported. |
| | |
| | Note that this option will have a negative impact |
| | on performance as well as on the storage space needed. |
| | [cases] |
| | |
+---------------------------------------------+-----------------------------------------------------------+
| -m N, --moving-average N | The central N days moving average is computed for the |
| | data. |
| | |
| | Note that the --impute_dates option will be implicitly |
| | turned on, as computing the moving average requires all |
| | dates to be available. [cases] |
| | dates to be available. |
+---------------------------------------------+-----------------------------------------------------------+
| -sb, --split-berlin | Berlin data is split into different counties, |
| | instead of having only one county for Berlin. [cases] |
+---------------------------------------------+-----------------------------------------------------------+
| -- rep-date | The reporting date will be prefered over possibly given |
| --rep-date | The reporting date will be prefered over possibly given |
| | dates of disease onset. [cases] |
+---------------------------------------------+-----------------------------------------------------------+
| -- sanitize-data | Different ways to distribute vaccinations to home |
| --sanitize-data | Different ways to distribute vaccinations to home |
| | locations of vaccinated persons[vaccination] |
+---------------------------------------------+-----------------------------------------------------------+
| --username | Username for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+
| --password | Password for regionalstatistik.de [population] |
+---------------------------------------------+-----------------------------------------------------------+


Hint:
When using the "--make-plot" option close one figure-window to get the next one.
Expand Down
33 changes: 19 additions & 14 deletions pycode/memilio-epidata/memilio/epidata/cleanData.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import os

from memilio.epidata import defaultDict as dd
from memilio.epidata import getDataIntoPandasDataFrame as gd


def clean_data(
Expand Down Expand Up @@ -87,15 +88,16 @@ def clean_data(

for item in files:
if item.endswith(".json") or item.endswith(".h5"):
print("Deleting file ", os.path.join(directory, item))
gd.default_print("Info", "Deleting file " +
os.path.join(directory, item))
os.remove(os.path.join(directory, item))

# delete directories if empty
try:
os.rmdir(directory)
except OSError:
continue
print("Deleting directory ", directory)
gd.default_print("Info", "Deleting directory " + directory)

# delete further jh files
files = []
Expand All @@ -106,7 +108,8 @@ def clean_data(

for item in files:
if item.endswith(".json") or item.endswith(".h5"):
print("Deleting file ", os.path.join(out_path, item))
gd.default_print("Info", "Deleting file " +
os.path.join(out_path, item))
os.remove(os.path.join(out_path, item))

else:
Expand All @@ -128,8 +131,8 @@ def clean_data(

for item in files:
if item.endswith(ending) and "_jh" in item:
print("Deleting file ",
os.path.join(directory, item))
gd.default_print("Info", "Deleting file " +
os.path.join(directory, item))
os.remove(os.path.join(directory, item))

# delete directories
Expand All @@ -138,7 +141,7 @@ def clean_data(
except OSError:
continue

print("Deleting directory ", directory)
gd.default_print("Info", "Deleting directory " + directory)

# delete further jh files
files = []
Expand All @@ -150,7 +153,8 @@ def clean_data(
for item in files:
if item.endswith(ending):
if "_jh" in item or "JohnHopkins" in item:
print("Deleting file ", os.path.join(out_path, item))
gd.default_print(
"Info", "Deleting file " + os.path.join(out_path, item))
os.remove(os.path.join(out_path, item))

# other data is stored in the same folder
Expand Down Expand Up @@ -193,20 +197,21 @@ def clean_data(

for file in filenames:
if file in item:
print(
"Deleting file ", os.path.join(
directory, item))
gd.default_print("Info",
"Deleting file " + os.path.join(
directory, item))
os.remove(os.path.join(directory, item))

# delete directory if empty
try:
os.rmdir(directory)
print("Deleting directory ", directory)
gd.default_print("Info", "Deleting directory " + directory)
except OSError:
pass

if filenames == []:
print("Please specify what should be deleted. See --help for details.")
gd.default_print(
"Info", "Please specify what should be deleted. See --help for details.")


def cli():
Expand All @@ -220,8 +225,8 @@ def cli():
- choose file format: json or hdf5
- define path to files
"""

out_path_default = dd.defaultDict['out_folder']
conf = gd.Conf(dd.defaultDict['out_folder'])
out_path_default = conf.path_to_use

parser = argparse.ArgumentParser()

Expand Down
2 changes: 1 addition & 1 deletion pycode/memilio-epidata/memilio/epidata/customPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,4 @@ def plot_multiple_series(

gd.check_dir(path_rel)
plt.savefig(path_rel + fig_name + '.png', bbox_inches='tight', dpi=dpi)
print('Plot saved to ' + path_rel + fig_name + '.png')
gd.default_print("Info", 'Plot saved to ' + path_rel + fig_name + '.png')
34 changes: 34 additions & 0 deletions pycode/memilio-epidata/memilio/epidata/download_config.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
[SETTINGS]
# Set verbosity level
# Off, Critical, Error, Warning, Info, Debug, Trace
# Default: Info
verbosity_level = Info

# Whether to show Progress Indicator (Spinner, download bars etc.)
# True or False
show_progress = True

# Whether to run sanity checks etc.
# Will improve performance but may lead to unpredicted behaviour if something in the file format has changed
# True or False
run_checks = True

# Interactivity of the download functions
# Programm will exit with error if a user choice is needed and interactive is False.
# True or False
interactive = False

# Defines if plots are generated with matplotlib
# True or False
make_plot = False

# If out_folder or a different path should be used.
# default uses the definition of default_dict
path_to_use = default

# If raw data from source should be prevented from being written into the download directory
# True or False
no_raw = False

# matplotlib backend to use
mpl_backend = TkAgg
17 changes: 10 additions & 7 deletions pycode/memilio-epidata/memilio/epidata/geoModificationGermany.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,16 +210,19 @@ def check_for_all_counties(
missing = len(get_county_ids(merge_berlin, merge_eisenach)
)-len(unique_county_list)
if missing != 0:
print("Downloaded data is not complete. Missing " +
str(missing) + " counties.")
if missing < 0:
# Returning True if source data file contains more counties than list
print('Source data frame contains more counties than official '
'county list. This could be OK, please verify yourself.')
gd.default_print('Warning', 'Source data frame contains ' + str(abs(missing)) +
' more counties than official county list. '
'This could be OK, please verify yourself.')
return True
elif missing < 10:
print('Missing counties: ' + str(list(set(get_county_ids(merge_berlin,
merge_eisenach)).difference(unique_county_list).difference(set({11000})))))
else:
gd.default_print('Error', "Downloaded data is not complete. Missing " +
str(missing) + " counties.")
if missing < 10:
gd.default_print('Info', 'Missing counties: ' +
str(list(set(get_county_ids(merge_berlin,
merge_eisenach)).difference(unique_county_list).difference(set({11000})))))
# Returning False if source data file lacks at least one county
return False

Expand Down
Loading