Skip to content

Commit 71ba46a

Browse files
committed
Update engine argument for read_df_parquet and read_ddf_parquet
1 parent 2c3322b commit 71ba46a

File tree

3 files changed

+15
-6
lines changed

3 files changed

+15
-6
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
## [2.3.0] 2019-05-06
2+
### Changes
3+
- `read_df_parquet` and `read_ddf_parquet` now take optional `engine` argument to allow to use `pyarrow` or `fastparquet` engines for reading parquet files.
4+
15
## [2.2.0] 2019-05-06
26
### Changes
37
- Updates `slackclient` dependency to `2.0.1` and handles migration of api to v2 (https://github.com/slackapi/python-slackclient/wiki/Migrating-to-2.x)

sd_utils/sd_load.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -156,15 +156,18 @@ def read_df_excel(file_path, *,
156156

157157

158158
@sd_log.log_func
159-
def read_df_parquet(file_path: str, columns: Optional[Iterable[str]]=None,
160-
use_threads: bool=True, **pyarrow_kwargs) -> T_DF:
159+
def read_df_parquet(file_path: str,
160+
columns: Optional[Iterable[str]]=None,
161+
use_threads: bool = True,
162+
engine: str='pyarrow',
163+
**pyarrow_kwargs) -> T_DF:
161164
assert isinstance(file_path, str), '{} does not exist'.format(file_path)
162165
assert os.path.exists(file_path), 'file does not exist at {}'.format(file_path)
163166
assert columns is None or isinstance(columns, (list, tuple))
164167
assert columns is None or all(isinstance(c, str) for c in columns)
165168
assert isinstance(use_threads, bool)
166169

167-
df = pandas.read_parquet(file_path, engine='pyarrow', use_threads=use_threads, columns=columns,
170+
df = pandas.read_parquet(file_path, engine=engine, use_threads=use_threads, columns=columns,
168171
**pyarrow_kwargs)
169172

170173
# df = pyarrow.parquet.read_table(file_path, nthreads=n_threads, columns=columns,
@@ -175,14 +178,16 @@ def read_df_parquet(file_path: str, columns: Optional[Iterable[str]]=None,
175178

176179

177180
@sd_log.log_func
178-
def read_ddf_parquet(file_path: str, columns: Optional[Iterable[str]]=None,
181+
def read_ddf_parquet(file_path: str,
182+
columns: Optional[Iterable[str]] = None,
183+
engine: str='pyarrow',
179184
**dd_kwargs) -> T_DDF:
180185
assert isinstance(file_path, str), '{} does not exist'.format(file_path)
181186
assert os.path.exists(file_path), 'file does not exist at {}'.format(file_path)
182187
assert columns is None or isinstance(columns, (list, tuple))
183188
assert columns is None or all(isinstance(c, str) for c in columns)
184189

185-
return dask.dataframe.read_parquet(path=file_path, columns=columns, engine='arrow',
190+
return dask.dataframe.read_parquet(path=file_path, columns=columns, engine=engine,
186191
**dd_kwargs)
187192

188193

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
setup(
1515
name='SDUtils',
16-
version='2.2.0',
16+
version='2.3.0',
1717
packages=['sd_utils'],
1818
license='(c) 2017- StratoDem Analytics. All rights reserved.',
1919
description='StratoDem utilities',

0 commit comments

Comments
 (0)