From a3dcf11ca56d68b0306c3b352fb69fdd12da504a Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 17:30:53 -0400 Subject: [PATCH 01/15] Add 'QueryJob.query_plan' property. --- bigquery/google/cloud/bigquery/job.py | 17 ++++++++++++++ bigquery/tests/unit/test_job.py | 33 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 953a2c265580..d8caedf51555 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -14,6 +14,7 @@ """Define API Jobs.""" +import copy import threading import six @@ -1277,6 +1278,22 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.queryPlan + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + statistics = self._properties.get('statistics', {}) + query_stats = statistics.get('query', {}) + plan_entries = query_stats.get('queryPlan', ()) + return [copy.deepcopy(entry) for entry in plan_entries] + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 46326441a5e1..d8c3c24e477a 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1632,6 +1632,39 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) + def test_query_plan(self): + plan_entries = [{ + 'name': 'NAME', + 'id': 1234, + 'waitRatioAvg': 2.71828, + 'waitRatioMax': 3.14159, + 'readRatioAvg': 1.41421, + 'readRatioMax': 1.73205, + 'computeRatioAvg': 0.69315, + 'computeRatioMax': 1.09861, + 'writeRatioAvg': 3.32193, + 'writeRatioMax': 2.30258, + 'recordsRead': 100, + 'recordsWritten': 1, + 'status': 'STATUS', + 'steps': [{ + 'kind': 'KIND', + 'substeps': ['SUBSTEP1', 'SUBSTEP2'], + }], + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.query_plan, []) + + query_stats['queryPlan'] = plan_entries + self.assertEqual(job.query_plan, plan_entries) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 691f221844c12b9c14a3b7705bb760f5851603ac Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 17:36:13 -0400 Subject: [PATCH 02/15] Add 'QueryJob.total_bytes_processed' property. --- bigquery/google/cloud/bigquery/job.py | 22 ++++++++++++++++++++-- bigquery/tests/unit/test_job.py | 15 +++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index d8caedf51555..4bb196d048dc 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1278,6 +1278,11 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job + def _query_statistics(self): + """Helper for properties derived from query statistics.""" + statistics = self._properties.get('statistics', {}) + return statistics.get('query', {}) + @property def query_plan(self): """Return query plan from job statistics, if present. @@ -1289,11 +1294,24 @@ def query_plan(self): :returns: mappings describing the query plan, or an empty list if the query has not yet completed. """ - statistics = self._properties.get('statistics', {}) - query_stats = statistics.get('query', {}) + query_stats = self._query_statistics() plan_entries = query_stats.get('queryPlan', ()) return [copy.deepcopy(entry) for entry in plan_entries] + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesProcessed + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('totalBytesProcessed') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index d8c3c24e477a..ee099230e246 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1665,6 +1665,21 @@ def test_query_plan(self): query_stats['queryPlan'] = plan_entries self.assertEqual(job.query_plan, plan_entries) + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats['totalBytesProcessed'] = total_bytes + self.assertEqual(job.total_bytes_processed, total_bytes) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 913db1cc08239f86b70396cc1a9c97c2655cfbcb Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 17:38:29 -0400 Subject: [PATCH 03/15] Add 'QueryJob.total_bytes_billed' property. --- bigquery/google/cloud/bigquery/job.py | 14 ++++++++++++++ bigquery/tests/unit/test_job.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 4bb196d048dc..8ce30c542949 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1312,6 +1312,20 @@ def total_bytes_processed(self): query_stats = self._query_statistics() return query_stats.get('totalBytesProcessed') + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.totalBytesBilled + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('totalBytesBilled') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index ee099230e246..256eab5b9828 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1680,6 +1680,21 @@ def test_total_bytes_processed(self): query_stats['totalBytesProcessed'] = total_bytes self.assertEqual(job.total_bytes_processed, total_bytes) + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats['totalBytesBilled'] = total_bytes + self.assertEqual(job.total_bytes_billed, total_bytes) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 9e88976655cc6c5addf81183556b9da17a3a3c83 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 17:40:10 -0400 Subject: [PATCH 04/15] Add 'QueryJob.billing_tier' property. --- bigquery/google/cloud/bigquery/job.py | 14 ++++++++++++++ bigquery/tests/unit/test_job.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 8ce30c542949..0e40437b490d 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1326,6 +1326,20 @@ def total_bytes_billed(self): query_stats = self._query_statistics() return query_stats.get('totalBytesBilled') + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier + + :rtype: int or None + :returns: total bytes processed by the job, or None if job is not + yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('billingTier') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 256eab5b9828..222877526410 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1695,6 +1695,21 @@ def test_total_bytes_billed(self): query_stats['totalBytesBilled'] = total_bytes self.assertEqual(job.total_bytes_billed, total_bytes) + def test_billing_tier(self): + billing_tier = 1 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.billing_tier) + + query_stats['billingTier'] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 53ed9146244fe3dee767f55c83e3e6082c176bbd Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 17:42:50 -0400 Subject: [PATCH 05/15] Add 'QueryJob.cache_hit' property. --- bigquery/google/cloud/bigquery/job.py | 16 +++++++++++++++- bigquery/tests/unit/test_job.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 0e40437b490d..a7d733027254 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1334,12 +1334,26 @@ def billing_tier(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.billingTier :rtype: int or None - :returns: total bytes processed by the job, or None if job is not + :returns: billing tier used by the job, or None if job is not yet complete. """ query_stats = self._query_statistics() return query_stats.get('billingTier') + @property + def cache_hit(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.cacheHit + + :rtype: bool or None + :returns: whether the query results were returned from cache, or None + if job is not yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('cacheHit') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 222877526410..73a7b31d9526 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1710,6 +1710,20 @@ def test_billing_tier(self): query_stats['billingTier'] = billing_tier self.assertEqual(job.billing_tier, billing_tier) + def test_cache_hit(self): + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.cache_hit) + + query_stats['cacheHit'] = True + self.assertTrue(job.cache_hit) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 941ad929d88a147214d4094bdd36c33d9c846e6c Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:20:11 -0400 Subject: [PATCH 06/15] Add 'Client._clone' helper. Eases construction of a client with a new project, but the same credentials / http implementation. --- bigquery/google/cloud/bigquery/client.py | 11 +++++++++++ bigquery/tests/unit/test_client.py | 12 ++++++++++++ 2 files changed, 23 insertions(+) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index f36d80978efd..e24f41afd6b0 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -83,6 +83,17 @@ def __init__(self, project=None, credentials=None, _http=None): project=project, credentials=credentials, _http=_http) self._connection = Connection(self) + def _clone(self, project): + """Create a new client for another project. + + Helper for creating dataset / table instances in remote projects. + + :rtype: :class:`Client` + :returns: a new instance, bound to the supplied project, using + the same credentials / http object as this instance. + """ + return self.__class__(project, self._credentials, self._http) + def list_projects(self, max_results=None, page_token=None): """List projects for the project associated with this client. diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index e71f3b99fbe0..962432498946 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -45,6 +45,18 @@ def test_ctor(self): self.assertIs(client._connection.credentials, creds) self.assertIs(client._connection.http, http) + def test_clone(self): + PROJECT = 'PROJECT' + OTHER_PROJECT = 'OTHER-PROJECT' + creds = _make_credentials() + http = object() + client = self._make_one(project=PROJECT, credentials=creds, _http=http) + + cloned = client._clone(OTHER_PROJECT) + self.assertEqual(cloned.project, OTHER_PROJECT) + self.assertIs(cloned._credentials, creds) + self.assertIs(cloned._http, http) + def test_list_projects_defaults(self): import six from google.cloud.bigquery.client import Project From 1e34f2dae70e4dcc77e7a53bb36adddb8497e8da Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:20:22 -0400 Subject: [PATCH 07/15] Add 'QueryJob.referenced_tables' property. --- bigquery/google/cloud/bigquery/job.py | 36 ++++++++++++++++++++ bigquery/tests/unit/test_job.py | 47 ++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index a7d733027254..0ddfb35a9b17 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1354,6 +1354,42 @@ def cache_hit(self): query_stats = self._query_statistics() return query_stats.get('cacheHit') + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.referencedTables + + :rtype: list of dict + :returns: mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + client = self._require_client(None) + query_stats = self._query_statistics() + clients_by_project = {client.project: client} + datasets_by_project_name = {} + + for table in query_stats.get('referencedTables', ()): + + t_project = table['projectId'] + t_client = clients_by_project.get(t_project) + if t_client is None: + t_client = client._clone(t_project) + clients_by_project[t_project] = t_client + + ds_name = table['datasetId'] + t_dataset = datasets_by_project_name.get((t_project, ds_name)) + if t_dataset is None: + t_dataset = t_client.dataset(ds_name) + datasets_by_project_name[(t_project, ds_name)] = t_dataset + + t_name = table['tableId'] + tables.append(t_dataset.table(t_name)) + + return tables + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 73a7b31d9526..457ac9073fa1 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -42,7 +42,7 @@ def test_missing_reason(self): class _Base(object): PROJECT = 'project' SOURCE1 = 'http://example.com/source1.csv' - DS_NAME = 'datset_name' + DS_NAME = 'dataset_name' TABLE_NAME = 'table_name' JOB_NAME = 'job_name' @@ -1724,6 +1724,48 @@ def test_cache_hit(self): query_stats['cacheHit'] = True self.assertTrue(job.cache_hit) + def test_referenced_tables(self): + from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table + + referenced_tables = [{ + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'table', + }, { + + 'projectId': 'other-project-123', + 'datasetId': 'other-dataset', + 'tableId': 'other-table', + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats['referencedTables'] = referenced_tables + + local, remote = job.referenced_tables + + self.assertIsInstance(local, Table) + self.assertEqual(local.name, 'table') + self.assertIsInstance(local._dataset, Dataset) + self.assertEqual(local._dataset.name, 'dataset') + self.assertIs(local._dataset._client, client) + + self.assertIsInstance(remote, Table) + self.assertEqual(remote.name, 'other-table') + self.assertIsInstance(remote._dataset, Dataset) + self.assertEqual(remote._dataset.name, 'other-dataset') + self.assertIsNot(remote._dataset._client, client) + self.assertIsInstance(remote._dataset._client, _Client) + self.assertEqual(remote._dataset._client.project, 'other-project-123') + def test_query_results(self): from google.cloud.bigquery.query import QueryResults @@ -2167,6 +2209,9 @@ def __init__(self, project='project', connection=None): self.project = project self._connection = connection + def _clone(self, project): + return self.__class__(project, connection=self._connection) + def dataset(self, name): from google.cloud.bigquery.dataset import Dataset From 3042da1ee781bb33e29f27b770b9845203ec0695 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:23:35 -0400 Subject: [PATCH 08/15] Add 'QueryJob.num_dml_affected_rows' property. --- bigquery/google/cloud/bigquery/job.py | 14 ++++++++++++++ bigquery/tests/unit/test_job.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 0ddfb35a9b17..4aeb11a7b407 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1390,6 +1390,20 @@ def referenced_tables(self): return tables + @property + def num_dml_affected_rows(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.numDmlAffectedRows + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('numDmlAffectedRows') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 457ac9073fa1..787b54891441 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1766,6 +1766,21 @@ def test_referenced_tables(self): self.assertIsInstance(remote._dataset._client, _Client) self.assertEqual(remote._dataset._client.project, 'other-project-123') + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats['numDmlAffectedRows'] = num_rows + self.assertEqual(job.num_dml_affected_rows, num_rows) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From da1112ab3e34e86941eb347252ff3b07a4fd9de4 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:43:28 -0400 Subject: [PATCH 09/15] Add 'QueryJob.schema' property. --- bigquery/google/cloud/bigquery/job.py | 14 ++++++++ bigquery/tests/unit/test_job.py | 50 +++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 4aeb11a7b407..7bc3d3fa1e95 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1390,6 +1390,20 @@ def referenced_tables(self): return tables + @property + def schema(self): + """Return schema from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.schema + + :rtype: list of :class:`~google.cloud.bigquery.schema.SchemaField + :returns: fields describing the query's result set, or an empty list + if the query has not yet completed. + """ + query_stats = self._query_statistics() + return _parse_schema_resource(query_stats.get('schema', {})) + @property def num_dml_affected_rows(self): """Return total bytes billed from job statistics, if present. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 787b54891441..bc272ad674e7 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1766,6 +1766,56 @@ def test_referenced_tables(self): self.assertIsInstance(remote._dataset._client, _Client) self.assertEqual(remote._dataset._client.project, 'other-project-123') + def test_schema(self): + from google.cloud.bigquery.table import _parse_schema_resource + + schema = { + 'fields': [{ + 'name': 'full_name', + 'type': 'STRING', + 'mode': 'NULLABLE', + 'description': 'DESCRIPTION' + }, { + 'name': 'phone_number', + 'type': 'STRING', + 'mode': 'REPEATED', + }, { + 'name': 'address', + 'type': 'RECORD', + 'mode': 'REPEATED', + 'fields': [{ + 'name': 'street_address', + 'type': 'STRING', + 'mode': 'NULLABLE', + }, { + 'name': 'city', + 'type': 'STRING', + 'mode': 'NULLABLE', + }, { + 'name': 'state', + 'type': 'STRING', + 'mode': 'NULLABLE', + }, { + 'name': 'zip', + 'type': 'STRING', + 'mode': 'NULLABLE', + }], + }], + } + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.schema, ()) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.schema, ()) + + query_stats = statistics['query'] = {} + self.assertEqual(job.schema, ()) + + query_stats['schema'] = schema + + self.assertEqual(job.schema, _parse_schema_resource(schema)) + def test_num_dml_affected_rows(self): num_rows = 1234 client = _Client(self.PROJECT) From 6009e30be9ae1be48f89fb246e70fc0723245133 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:51:21 -0400 Subject: [PATCH 10/15] Add 'QueryJob.undeclared_schema_parameters' property. --- bigquery/google/cloud/bigquery/job.py | 15 ++++++++++++ bigquery/tests/unit/test_job.py | 35 +++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 7bc3d3fa1e95..24e95c4de7c8 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1418,6 +1418,21 @@ def num_dml_affected_rows(self): query_stats = self._query_statistics() return query_stats.get('numDmlAffectedRows') + @property + def undeclared_query_paramters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.undeclaredQueryParamters + + :rtype: list of dict + :returns: mappings describing the undeclared parameters, or an empty + list if the query has not yet completed. + """ + query_stats = self._query_statistics() + undeclared = query_stats.get('undeclaredQueryParamters', ()) + return [copy.deepcopy(parameter) for parameter in undeclared] + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index bc272ad674e7..813ca1dc694c 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1831,6 +1831,41 @@ def test_num_dml_affected_rows(self): query_stats['numDmlAffectedRows'] = num_rows self.assertEqual(job.num_dml_affected_rows, num_rows) + def test_undeclared_query_paramters(self): + undeclared = [{ + "name": 'my_scalar', + "parameterType": { + "type": 'STRING', + }, + }, { + "name": 'my_array', + "parameterType": { + "type": 'ARRAY', + "arrayType": 'INT64', + }, + }, { + "name": 'my_struct', + "parameterType": { + "type": 'STRUCT', + "structTypes": [{ + "name": 'count', + "type": 'INT64', + }], + }, + }] + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertEqual(job.undeclared_query_paramters, []) + + statistics = job._properties['statistics'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats = statistics['query'] = {} + self.assertEqual(job.undeclared_query_paramters, []) + + query_stats['undeclaredQueryParamters'] = undeclared + self.assertEqual(job.undeclared_query_paramters, undeclared) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 419c8b66d69ebc2fa5bd5d653b661ce43768169b Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:53:54 -0400 Subject: [PATCH 11/15] Add 'QueryJob.statement_type' property. --- bigquery/google/cloud/bigquery/job.py | 14 ++++++++++++++ bigquery/tests/unit/test_job.py | 15 +++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 24e95c4de7c8..331878eb5a2e 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1433,6 +1433,20 @@ def undeclared_query_paramters(self): undeclared = query_stats.get('undeclaredQueryParamters', ()) return [copy.deepcopy(parameter) for parameter in undeclared] + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.statementType + + :rtype: str or None + :returns: type of statement used by the job, or None if job is not + yet complete. + """ + query_stats = self._query_statistics() + return query_stats.get('statementType') + def query_results(self): """Construct a QueryResults instance, bound to this job. diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 813ca1dc694c..c94eee602324 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1866,6 +1866,21 @@ def test_undeclared_query_paramters(self): query_stats['undeclaredQueryParamters'] = undeclared self.assertEqual(job.undeclared_query_paramters, undeclared) + def test_statement_type(self): + statement_type = 'SELECT' + client = _Client(self.PROJECT) + job = self._make_one(self.JOB_NAME, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics['query'] = {} + self.assertIsNone(job.statement_type) + + query_stats['statementType'] = statement_type + self.assertEqual(job.statement_type, statement_type) + def test_query_results(self): from google.cloud.bigquery.query import QueryResults From 0f443eaecc149ae0e46d626644094d1334de2834 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 18:59:52 -0400 Subject: [PATCH 12/15] Add 'ExtractTableToStorageJob.destination_uri_file_counts' property. --- bigquery/google/cloud/bigquery/job.py | 19 +++++++++++++++++++ bigquery/tests/unit/test_job.py | 17 +++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index 331878eb5a2e..f4da130dccd6 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -965,6 +965,25 @@ def __init__(self, name, source, destination_uris, client): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ + def _extract_statistics(self): + """Helper for properties derived from extract job statistics.""" + statistics = self._properties.get('statistics', {}) + return statistics.get('extract', {}) + + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.extract.destinationUriFileCounts + + :rtype: int or None + :returns: number of DML rows affectd by the job, or None if job is not + yet complete. + """ + query_stats = self._extract_statistics() + return query_stats.get('destinationUriFileCounts') + def _populate_config_resource(self, configuration): """Helper for _build_resource: copy config properties to resource""" if self.compression is not None: diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index c94eee602324..201a6579c020 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1182,6 +1182,23 @@ def test_ctor(self): self.assertIsNone(job.field_delimiter) self.assertIsNone(job.print_header) + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _Client(self.PROJECT) + source = _Table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_NAME, source, [self.DESTINATION_URI], + client) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties['statistics'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics['extract'] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats['destinationUriFileCounts'] = file_counts + self.assertEqual(job.destination_uri_file_counts, file_counts) + def test_from_api_repr_missing_identity(self): self._setUpConstants() client = _Client(self.PROJECT) From ed3aacc8c037e5f8612c3b23b608503042942ad6 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 19:02:59 -0400 Subject: [PATCH 13/15] Factor out job-specific stats helper. --- bigquery/google/cloud/bigquery/job.py | 37 ++++++++++++--------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index f4da130dccd6..d7d6d4857e49 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -334,6 +334,11 @@ def _set_properties(self, api_response): # For Future interface self._set_future_result() + def _job_statistics(self): + """Helper for properties derived from job statistics.""" + statistics = self._properties.get('statistics', {}) + return statistics.get(self._JOB_TYPE, {}) + @classmethod def _get_resource_config(cls, resource): """Helper for :meth:`from_api_repr` @@ -965,11 +970,6 @@ def __init__(self, name, source, destination_uris, client): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.extract.printHeader """ - def _extract_statistics(self): - """Helper for properties derived from extract job statistics.""" - statistics = self._properties.get('statistics', {}) - return statistics.get('extract', {}) - @property def destination_uri_file_counts(self): """Return file counts from job statistics, if present. @@ -981,7 +981,7 @@ def destination_uri_file_counts(self): :returns: number of DML rows affectd by the job, or None if job is not yet complete. """ - query_stats = self._extract_statistics() + query_stats = self._job_statistics() return query_stats.get('destinationUriFileCounts') def _populate_config_resource(self, configuration): @@ -1297,11 +1297,6 @@ def from_api_repr(cls, resource, client): job._set_properties(resource) return job - def _query_statistics(self): - """Helper for properties derived from query statistics.""" - statistics = self._properties.get('statistics', {}) - return statistics.get('query', {}) - @property def query_plan(self): """Return query plan from job statistics, if present. @@ -1313,7 +1308,7 @@ def query_plan(self): :returns: mappings describing the query plan, or an empty list if the query has not yet completed. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() plan_entries = query_stats.get('queryPlan', ()) return [copy.deepcopy(entry) for entry in plan_entries] @@ -1328,7 +1323,7 @@ def total_bytes_processed(self): :returns: total bytes processed by the job, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('totalBytesProcessed') @property @@ -1342,7 +1337,7 @@ def total_bytes_billed(self): :returns: total bytes processed by the job, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('totalBytesBilled') @property @@ -1356,7 +1351,7 @@ def billing_tier(self): :returns: billing tier used by the job, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('billingTier') @property @@ -1370,7 +1365,7 @@ def cache_hit(self): :returns: whether the query results were returned from cache, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('cacheHit') @property @@ -1386,7 +1381,7 @@ def referenced_tables(self): """ tables = [] client = self._require_client(None) - query_stats = self._query_statistics() + query_stats = self._job_statistics() clients_by_project = {client.project: client} datasets_by_project_name = {} @@ -1420,7 +1415,7 @@ def schema(self): :returns: fields describing the query's result set, or an empty list if the query has not yet completed. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return _parse_schema_resource(query_stats.get('schema', {})) @property @@ -1434,7 +1429,7 @@ def num_dml_affected_rows(self): :returns: number of DML rows affectd by the job, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('numDmlAffectedRows') @property @@ -1448,7 +1443,7 @@ def undeclared_query_paramters(self): :returns: mappings describing the undeclared parameters, or an empty list if the query has not yet completed. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() undeclared = query_stats.get('undeclaredQueryParamters', ()) return [copy.deepcopy(parameter) for parameter in undeclared] @@ -1463,7 +1458,7 @@ def statement_type(self): :returns: type of statement used by the job, or None if job is not yet complete. """ - query_stats = self._query_statistics() + query_stats = self._job_statistics() return query_stats.get('statementType') def query_results(self): From 6380fb91a95e639957da54b39cc534e5c93326c7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 2 Aug 2017 19:23:00 -0400 Subject: [PATCH 14/15] Coverage. --- bigquery/tests/unit/test_job.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/bigquery/tests/unit/test_job.py b/bigquery/tests/unit/test_job.py index 201a6579c020..d0b7194b60fd 100644 --- a/bigquery/tests/unit/test_job.py +++ b/bigquery/tests/unit/test_job.py @@ -1748,12 +1748,16 @@ def test_referenced_tables(self): referenced_tables = [{ 'projectId': self.PROJECT, 'datasetId': 'dataset', - 'tableId': 'table', + 'tableId': 'local1', + }, { + 'projectId': self.PROJECT, + 'datasetId': 'dataset', + 'tableId': 'local2', }, { 'projectId': 'other-project-123', 'datasetId': 'other-dataset', - 'tableId': 'other-table', + 'tableId': 'remote', }] client = _Client(self.PROJECT) job = self._make_one(self.JOB_NAME, self.QUERY, client) @@ -1767,16 +1771,20 @@ def test_referenced_tables(self): query_stats['referencedTables'] = referenced_tables - local, remote = job.referenced_tables + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, Table) + self.assertEqual(local1.name, 'local1') + self.assertIsInstance(local1._dataset, Dataset) + self.assertEqual(local1._dataset.name, 'dataset') + self.assertIs(local1._dataset._client, client) - self.assertIsInstance(local, Table) - self.assertEqual(local.name, 'table') - self.assertIsInstance(local._dataset, Dataset) - self.assertEqual(local._dataset.name, 'dataset') - self.assertIs(local._dataset._client, client) + self.assertIsInstance(local2, Table) + self.assertEqual(local2.name, 'local2') + self.assertIs(local2._dataset, local1._dataset) self.assertIsInstance(remote, Table) - self.assertEqual(remote.name, 'other-table') + self.assertEqual(remote.name, 'remote') self.assertIsInstance(remote._dataset, Dataset) self.assertEqual(remote._dataset.name, 'other-dataset') self.assertIsNot(remote._dataset._client, client) From 0cd8fa5128a54ec2cd6f0c3bde591849902d4f58 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Thu, 3 Aug 2017 11:28:17 -0400 Subject: [PATCH 15/15] Fix docs build. --- bigquery/google/cloud/bigquery/job.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/job.py b/bigquery/google/cloud/bigquery/job.py index d7d6d4857e49..3a945e8728ee 100644 --- a/bigquery/google/cloud/bigquery/job.py +++ b/bigquery/google/cloud/bigquery/job.py @@ -1411,7 +1411,7 @@ def schema(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#statistics.query.schema - :rtype: list of :class:`~google.cloud.bigquery.schema.SchemaField + :rtype: list of :class:`~google.cloud.bigquery.schema.SchemaField` :returns: fields describing the query's result set, or an empty list if the query has not yet completed. """