From fe54cb72d114ebbab06df4b50f39b3cf2cb4df63 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 25 Sep 2020 11:36:05 -0700 Subject: [PATCH 001/341] chore: add CI secrets manager (via synth) (#271) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/5da1d2aa-a963-44d1-952a-3ed428de6719/targets - [ ] To automatically regenerate this PR, check this box. Source-Link: https://github.com/googleapis/synthtool/commit/27f4406999b1eee29e04b09b2423a8e4646c7e24 Source-Link: https://github.com/googleapis/synthtool/commit/dba48bb9bc6959c232bec9150ac6313b608fe7bd Source-Link: https://github.com/googleapis/synthtool/commit/257fda18168bedb76985024bd198ed1725485488 Source-Link: https://github.com/googleapis/synthtool/commit/ffcee7952b74f647cbb3ef021d95422f10816fca Source-Link: https://github.com/googleapis/synthtool/commit/d302f93d7f47e2852e585ac35ab2d15585717ec0 --- .kokoro/populate-secrets.sh | 43 +++++++++++++++++++++++++++++++ .kokoro/release/common.cfg | 50 ++++++++++--------------------------- .kokoro/trampoline.sh | 15 +++++++---- docs/conf.py | 3 ++- scripts/decrypt-secrets.sh | 15 ++++++++++- synth.metadata | 7 +++--- 6 files changed, 86 insertions(+), 47 deletions(-) create mode 100755 .kokoro/populate-secrets.sh diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh new file mode 100755 index 000000000..f52514257 --- /dev/null +++ b/.kokoro/populate-secrets.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright 2020 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eo pipefail + +function now { date +"%Y-%m-%d %H:%M:%S" | tr -d '\n' ;} +function msg { println "$*" >&2 ;} +function println { printf '%s\n' "$(now) $*" ;} + + +# Populates requested secrets set in SECRET_MANAGER_KEYS from service account: +# kokoro-trampoline@cloud-devrel-kokoro-resources.iam.gserviceaccount.com +SECRET_LOCATION="${KOKORO_GFILE_DIR}/secret_manager" +msg "Creating folder on disk for secrets: ${SECRET_LOCATION}" +mkdir -p ${SECRET_LOCATION} +for key in $(echo ${SECRET_MANAGER_KEYS} | sed "s/,/ /g") +do + msg "Retrieving secret ${key}" + docker run --entrypoint=gcloud \ + --volume=${KOKORO_GFILE_DIR}:${KOKORO_GFILE_DIR} \ + gcr.io/google.com/cloudsdktool/cloud-sdk \ + secrets versions access latest \ + --project cloud-devrel-kokoro-resources \ + --secret ${key} > \ + "${SECRET_LOCATION}/${key}" + if [[ $? == 0 ]]; then + msg "Secret written to ${SECRET_LOCATION}/${key}" + else + msg "Error retrieving secret ${key}" + fi +done diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 661a04481..18b417709 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,42 +23,18 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch the token needed for reporting release status to GitHub -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "yoshi-automation-github-key" - } - } -} - -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - -# Fetch magictoken to use with Magic Github Proxy -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "releasetool-magictoken" - } - } +# Fetch PyPI password +before_action { + fetch_keystore { + keystore_resource { + keystore_config_id: 73713 + keyname: "google_cloud_pypi_password" + } + } } -# Fetch api key to use with Magic Github Proxy -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "magic-github-proxy-api-key" - } - } -} +# Tokens needed to report release status back to GitHub +env_vars: { + key: "SECRET_MANAGER_KEYS" + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" +} \ No newline at end of file diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh index e8c4251f3..f39236e94 100755 --- a/.kokoro/trampoline.sh +++ b/.kokoro/trampoline.sh @@ -15,9 +15,14 @@ set -eo pipefail -python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" || ret_code=$? +# Always run the cleanup script, regardless of the success of bouncing into +# the container. +function cleanup() { + chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh + ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh + echo "cleanup"; +} +trap cleanup EXIT -chmod +x ${KOKORO_GFILE_DIR}/trampoline_cleanup.sh -${KOKORO_GFILE_DIR}/trampoline_cleanup.sh || true - -exit ${ret_code} +$(dirname $0)/populate-secrets.sh # Secret Manager secrets. +python3 "${KOKORO_GFILE_DIR}/trampoline_v1.py" \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 155606c97..b38bdd1ff 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,7 @@ # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = "1.6.3" +needs_sphinx = "1.5.5" # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom @@ -39,6 +39,7 @@ "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.coverage", + "sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.todo", "sphinx.ext.viewcode", diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh index ff599eb2a..21f6d2a26 100755 --- a/scripts/decrypt-secrets.sh +++ b/scripts/decrypt-secrets.sh @@ -20,14 +20,27 @@ ROOT=$( dirname "$DIR" ) # Work from the project root. cd $ROOT +# Prevent it from overriding files. +# We recommend that sample authors use their own service account files and cloud project. +# In that case, they are supposed to prepare these files by themselves. +if [[ -f "testing/test-env.sh" ]] || \ + [[ -f "testing/service-account.json" ]] || \ + [[ -f "testing/client-secrets.json" ]]; then + echo "One or more target files exist, aborting." + exit 1 +fi + # Use SECRET_MANAGER_PROJECT if set, fallback to cloud-devrel-kokoro-resources. PROJECT_ID="${SECRET_MANAGER_PROJECT:-cloud-devrel-kokoro-resources}" gcloud secrets versions access latest --secret="python-docs-samples-test-env" \ + --project="${PROJECT_ID}" \ > testing/test-env.sh gcloud secrets versions access latest \ --secret="python-docs-samples-service-account" \ + --project="${PROJECT_ID}" \ > testing/service-account.json gcloud secrets versions access latest \ --secret="python-docs-samples-client-secrets" \ - > testing/client-secrets.json \ No newline at end of file + --project="${PROJECT_ID}" \ + > testing/client-secrets.json diff --git a/synth.metadata b/synth.metadata index efee17785..e786fb364 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "a125160696d1453b04a66c967819f90e70e03a52" + "sha": "b716e1c8ecd90142b498b95e7f8830835529cf4a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "32c758f11b8c578f515a746c9d263b82a615a77c" + "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" } } ], @@ -60,6 +60,7 @@ ".kokoro/docs/common.cfg", ".kokoro/docs/docs-presubmit.cfg", ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", ".kokoro/presubmit/common.cfg", ".kokoro/presubmit/presubmit.cfg", ".kokoro/presubmit/system-2.7.cfg", From fbbe0cb0ea22161d81f1e5504bb89b55e4198634 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 25 Sep 2020 13:20:06 -0700 Subject: [PATCH 002/341] chore: update protoc-generated comments (via synth) (#270) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/5da1d2aa-a963-44d1-952a-3ed428de6719/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 327026955 Source-Link: https://github.com/googleapis/googleapis/commit/0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c --- google/cloud/bigquery_v2/proto/encryption_config_pb2.py | 2 +- google/cloud/bigquery_v2/proto/model_pb2.py | 2 +- google/cloud/bigquery_v2/proto/model_reference_pb2.py | 2 +- google/cloud/bigquery_v2/proto/standard_sql_pb2.py | 2 +- synth.metadata | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py index 5147743b6..5ae21ea6f 100644 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/encryption_config.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py index f485c4568..7b66be8f7 100644 --- a/google/cloud/bigquery_v2/proto/model_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py index 07d7e4c4b..2411c4863 100644 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ b/google/cloud/bigquery_v2/proto/model_reference_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/model_reference.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py index 15f6715a2..bfe77f934 100644 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: google/cloud/bigquery_v2/proto/standard_sql.proto - +"""Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message from google.protobuf import reflection as _reflection diff --git a/synth.metadata b/synth.metadata index e786fb364..7fdc4fb28 100644 --- a/synth.metadata +++ b/synth.metadata @@ -11,8 +11,8 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "868615a5c1c1059c636bb3d82a555edb1d5a251e", - "internalRef": "324294521" + "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", + "internalRef": "327026955" } }, { From cbcb4b86d2d01fb983c20271958b11665e8f7d03 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 30 Sep 2020 23:47:22 +0200 Subject: [PATCH 003/341] chore: Prepare for 2.0 release (#278) * Remove BQ Storage v1beta1 compatibility code * Adjust code to new BQ Storage 2.0 * Remove Python 2/3 compatibility code * Bump test coverage to 100% * Update supported Python versions in README * Add UPGRADING guide. * Regenerate bigquery_v2 code with microgenerator * Adjust hand-written unit tests to regened BQ v2 * Adjust samples to BQ v2 regenerated code * Adjust system tests to regenerated BQ v2 * Skip failing generated unit test The assertion seems to fail for a banal reason, i.e. an extra newline in the string representation. * Delete Kokoro config for Python 2.7 * Fix docs build * Undelete failing test, but mark as skipped * Fix namespace name in docstrings and comments * Define minimum dependency versions for Python 3.6 * Exclude autogenerated docs from docs index * Exclude generated services from the library There are currently no public API endpoints for the ModelServiceClient, thus there is no point in generating that code in the first place. * Bump minumum proto-plus version to 1.10.0 The old pin (1.4.0) does not work, tests detected some problem. * Include generated types in the docs and rebuild * Ignore skipped test in coverage check * Explain moved enums in UPGRADING guide --- .kokoro/presubmit/presubmit.cfg | 8 +- .kokoro/presubmit/system-2.7.cfg | 7 - .kokoro/samples/python3.6/common.cfg | 6 + .kokoro/samples/python3.7/common.cfg | 6 + .kokoro/samples/python3.8/common.cfg | 6 + CONTRIBUTING.rst | 19 - README.rst | 11 +- UPGRADING.md | 59 ++ docs/UPGRADING.md | 1 + docs/bigquery_v2/services.rst | 6 + docs/bigquery_v2/types.rst | 5 + docs/conf.py | 1 + docs/gapic/v2/enums.rst | 8 - docs/gapic/v2/types.rst | 6 - docs/index.rst | 10 + docs/reference.rst | 4 +- google/cloud/bigquery/_pandas_helpers.py | 77 +- google/cloud/bigquery/client.py | 12 +- google/cloud/bigquery/dbapi/_helpers.py | 5 +- google/cloud/bigquery/dbapi/connection.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 61 +- google/cloud/bigquery/enums.py | 6 +- google/cloud/bigquery/magics/magics.py | 8 +- google/cloud/bigquery/model.py | 13 +- google/cloud/bigquery/routine.py | 18 +- google/cloud/bigquery/schema.py | 52 +- google/cloud/bigquery/table.py | 102 +- google/cloud/bigquery_v2/__init__.py | 41 +- google/cloud/bigquery_v2/gapic/__init__.py | 0 google/cloud/bigquery_v2/gapic/enums.py | 171 ---- google/cloud/bigquery_v2/proto/__init__.py | 0 .../proto/encryption_config_pb2_grpc.py | 3 - .../proto/location_metadata_pb2.py | 98 -- .../proto/location_metadata_pb2_grpc.py | 2 - .../cloud/bigquery_v2/proto/model_pb2_grpc.py | 214 ---- .../proto/model_reference_pb2_grpc.py | 3 - .../proto/standard_sql_pb2_grpc.py | 3 - google/cloud/bigquery_v2/py.typed | 2 + google/cloud/bigquery_v2/types.py | 58 -- google/cloud/bigquery_v2/types/__init__.py | 47 + .../bigquery_v2/types/encryption_config.py | 44 + google/cloud/bigquery_v2/types/model.py | 966 ++++++++++++++++++ .../bigquery_v2/types/model_reference.py | 49 + .../cloud/bigquery_v2/types/standard_sql.py | 106 ++ noxfile.py | 33 +- samples/create_routine.py | 2 +- samples/tests/conftest.py | 2 +- samples/tests/test_routine_samples.py | 8 +- scripts/fixup_bigquery_v2_keywords.py | 181 ++++ setup.py | 32 +- synth.metadata | 105 +- synth.py | 80 +- testing/constraints-2.7.txt | 9 - testing/constraints-3.5.txt | 12 - testing/constraints-3.6.txt | 16 + tests/system.py | 107 +- .../enums/test_standard_sql_data_types.py | 7 +- tests/unit/model/test_model.py | 6 +- tests/unit/routine/test_routine.py | 9 +- tests/unit/routine/test_routine_argument.py | 6 +- tests/unit/test__pandas_helpers.py | 40 +- tests/unit/test_client.py | 111 +- tests/unit/test_dbapi_connection.py | 36 +- tests/unit/test_dbapi_cursor.py | 108 +- tests/unit/test_job.py | 24 +- tests/unit/test_magics.py | 48 +- tests/unit/test_opentelemetry_tracing.py | 2 +- tests/unit/test_schema.py | 42 +- tests/unit/test_table.py | 304 ++---- 69 files changed, 1974 insertions(+), 1682 deletions(-) delete mode 100644 .kokoro/presubmit/system-2.7.cfg create mode 100644 UPGRADING.md create mode 120000 docs/UPGRADING.md create mode 100644 docs/bigquery_v2/services.rst create mode 100644 docs/bigquery_v2/types.rst delete mode 100644 docs/gapic/v2/enums.rst delete mode 100644 docs/gapic/v2/types.rst delete mode 100644 google/cloud/bigquery_v2/gapic/__init__.py delete mode 100644 google/cloud/bigquery_v2/gapic/enums.py delete mode 100644 google/cloud/bigquery_v2/proto/__init__.py delete mode 100644 google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py delete mode 100644 google/cloud/bigquery_v2/proto/location_metadata_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py delete mode 100644 google/cloud/bigquery_v2/proto/model_pb2_grpc.py delete mode 100644 google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py delete mode 100644 google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py create mode 100644 google/cloud/bigquery_v2/py.typed delete mode 100644 google/cloud/bigquery_v2/types.py create mode 100644 google/cloud/bigquery_v2/types/__init__.py create mode 100644 google/cloud/bigquery_v2/types/encryption_config.py create mode 100644 google/cloud/bigquery_v2/types/model.py create mode 100644 google/cloud/bigquery_v2/types/model_reference.py create mode 100644 google/cloud/bigquery_v2/types/standard_sql.py create mode 100644 scripts/fixup_bigquery_v2_keywords.py delete mode 100644 testing/constraints-2.7.txt delete mode 100644 testing/constraints-3.5.txt diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index b158096f0..8f43917d9 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1,7 +1 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "RUN_SYSTEM_TESTS" - value: "false" -} +# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/presubmit/system-2.7.cfg b/.kokoro/presubmit/system-2.7.cfg deleted file mode 100644 index 3b6523a19..000000000 --- a/.kokoro/presubmit/system-2.7.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "system-2.7" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg index a56768eae..f3b930960 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.6/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.6" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py36" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg index c93747180..fc0654565 100644 --- a/.kokoro/samples/python3.7/common.cfg +++ b/.kokoro/samples/python3.7/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.7" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py37" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg index 9808f15e3..2b0bf59b3 100644 --- a/.kokoro/samples/python3.8/common.cfg +++ b/.kokoro/samples/python3.8/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.8" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py38" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 3366287d6..b3b802b49 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -80,25 +80,6 @@ We use `nox `__ to instrument our tests. .. nox: https://pypi.org/project/nox/ -Note on Editable Installs / Develop Mode -======================================== - -- As mentioned previously, using ``setuptools`` in `develop mode`_ - or a ``pip`` `editable install`_ is not possible with this - library. This is because this library uses `namespace packages`_. - For context see `Issue #2316`_ and the relevant `PyPA issue`_. - - Since ``editable`` / ``develop`` mode can't be used, packages - need to be installed directly. Hence your changes to the source - tree don't get incorporated into the **already installed** - package. - -.. _namespace packages: https://www.python.org/dev/peps/pep-0420/ -.. _Issue #2316: https://github.com/GoogleCloudPlatform/google-cloud-python/issues/2316 -.. _PyPA issue: https://github.com/pypa/packaging-problems/issues/12 -.. _develop mode: https://setuptools.readthedocs.io/en/latest/setuptools.html#development-mode -.. _editable install: https://pip.pypa.io/en/stable/reference/pip_install/#editable-installs - ***************************************** I'm getting weird errors... Can you help? ***************************************** diff --git a/README.rst b/README.rst index c6bc17834..c7d50d729 100644 --- a/README.rst +++ b/README.rst @@ -52,11 +52,14 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.5 +Python >= 3.6 -Deprecated Python Versions -^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7. Python 2.7 support will be removed on January 1, 2020. +Unsupported Python Versions +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Python == 2.7, Python == 3.5. + +The last version of this library compatible with Python 2.7 and 3.5 is +`google-cloud-bigquery==1.28.0`. Mac/Linux diff --git a/UPGRADING.md b/UPGRADING.md new file mode 100644 index 000000000..a4ba0efd2 --- /dev/null +++ b/UPGRADING.md @@ -0,0 +1,59 @@ + + + +# 2.0.0 Migration Guide + +The 2.0 release of the `google-cloud-bigquery` client drops support for Python +versions below 3.6. The client surface itself has not changed, but the 1.x series +will not be receiving any more feature updates or bug fixes. You are thus +encouraged to upgrade to the 2.x series. + +If you experience issues or have questions, please file an +[issue](https://github.com/googleapis/python-bigquery/issues). + + +## Supported Python Versions + +> **WARNING**: Breaking change + +The 2.0.0 release requires Python 3.6+. + + +## Supported BigQuery Storage Clients + +The 2.0.0 release requires BigQuery Storage `>= 2.0.0`, which dropped support +for `v1beta1` and `v1beta2` versions of the BigQuery Storage API. If you want to +use a BigQuery Storage client, it must be the one supporting the `v1` API version. + + +## Changed GAPIC Enums Path + +> **WARNING**: Breaking change + +Generated GAPIC enum types have been moved under `types`. Import paths need to be +adjusted. + +**Before:** +```py +from google.cloud.bigquery_v2.gapic import enums + +distance_type = enums.Model.DistanceType.COSINE +``` + +**After:** +```py +from google.cloud.bigquery_v2 import types + +distance_type = types.Model.DistanceType.COSINE +``` \ No newline at end of file diff --git a/docs/UPGRADING.md b/docs/UPGRADING.md new file mode 120000 index 000000000..01097c8c0 --- /dev/null +++ b/docs/UPGRADING.md @@ -0,0 +1 @@ +../UPGRADING.md \ No newline at end of file diff --git a/docs/bigquery_v2/services.rst b/docs/bigquery_v2/services.rst new file mode 100644 index 000000000..65fbb438c --- /dev/null +++ b/docs/bigquery_v2/services.rst @@ -0,0 +1,6 @@ +Services for Google Cloud Bigquery v2 API +========================================= + +.. automodule:: google.cloud.bigquery_v2.services.model_service + :members: + :inherited-members: diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery_v2/types.rst new file mode 100644 index 000000000..f43809958 --- /dev/null +++ b/docs/bigquery_v2/types.rst @@ -0,0 +1,5 @@ +Types for Google Cloud Bigquery v2 API +====================================== + +.. automodule:: google.cloud.bigquery_v2.types + :members: diff --git a/docs/conf.py b/docs/conf.py index b38bdd1ff..ee59f3492 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -100,6 +100,7 @@ "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", + "bigquery_v2/services.rst", # generated by the code generator ] # The reST default role (used for this markup: `text`) to use for all diff --git a/docs/gapic/v2/enums.rst b/docs/gapic/v2/enums.rst deleted file mode 100644 index 0e0f05ada..000000000 --- a/docs/gapic/v2/enums.rst +++ /dev/null @@ -1,8 +0,0 @@ -Enums for BigQuery API Client -============================= - -.. autoclass:: google.cloud.bigquery_v2.gapic.enums.Model - :members: - -.. autoclass:: google.cloud.bigquery_v2.gapic.enums.StandardSqlDataType - :members: diff --git a/docs/gapic/v2/types.rst b/docs/gapic/v2/types.rst deleted file mode 100644 index 99b954eca..000000000 --- a/docs/gapic/v2/types.rst +++ /dev/null @@ -1,6 +0,0 @@ -Types for BigQuery API Client -============================= - -.. automodule:: google.cloud.bigquery_v2.types - :members: - :noindex: \ No newline at end of file diff --git a/docs/index.rst b/docs/index.rst index 62a82e0e9..3f8ba2304 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -27,6 +27,16 @@ API Reference reference dbapi +Migration Guide +--------------- + +See the guide below for instructions on migrating to the 2.x release of this library. + +.. toctree:: + :maxdepth: 2 + + UPGRADING + Changelog --------- diff --git a/docs/reference.rst b/docs/reference.rst index 981059de5..21dd8e43d 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -182,6 +182,7 @@ Encryption Configuration encryption_configuration.EncryptionConfiguration + Additional Types ================ @@ -190,5 +191,4 @@ Protocol buffer classes for working with the Models API. .. toctree:: :maxdepth: 2 - gapic/v2/enums - gapic/v2/types + bigquery_v2/types diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 953b7d0fe..57c8f95f6 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -22,11 +22,6 @@ import six from six.moves import queue -try: - from google.cloud import bigquery_storage_v1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - try: import pandas except ImportError: # pragma: NO COVER @@ -287,14 +282,6 @@ def dataframe_to_bq_schema(dataframe, bq_schema): """ if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) - if six.PY2: - for field in bq_schema: - if field.field_type in schema._STRUCT_TYPES: - raise ValueError( - "Uploading dataframes with struct (record) column types " - "is not supported under Python2. See: " - "https://github.com/googleapis/python-bigquery/issues/21" - ) bq_schema_index = {field.name: field for field in bq_schema} bq_schema_unused = set(bq_schema_index.keys()) else: @@ -578,19 +565,7 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - # Passing a BQ Storage client in implies that the BigQuery Storage library - # is available and can be imported. - from google.cloud import bigquery_storage_v1beta1 - - # We want to preserve comaptibility with the v1beta1 BQ Storage clients, - # thus adjust constructing the rowstream if needed. - # The assumption is that the caller provides a BQ Storage `session` that is - # compatible with the version of the BQ Storage client passed in. - if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): - position = bigquery_storage_v1beta1.types.StreamPosition(stream=stream) - rowstream = bqstorage_client.read_rows(position).rows(session) - else: - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + rowstream = bqstorage_client.read_rows(stream.name).rows(session) for page in rowstream.pages: if download_state.done: @@ -625,8 +600,7 @@ def _download_table_bqstorage( # Passing a BQ Storage client in implies that the BigQuery Storage library # is available and can be imported. - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage if "$" in table.table_id: raise ValueError( @@ -637,41 +611,18 @@ def _download_table_bqstorage( requested_streams = 1 if preserve_order else 0 - # We want to preserve comaptibility with the v1beta1 BQ Storage clients, - # thus adjust the session creation if needed. - if isinstance(bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient): - warnings.warn( - "Support for BigQuery Storage v1beta1 clients is deprecated, please " - "consider upgrading the client to BigQuery Storage v1 stable version.", - category=DeprecationWarning, - ) - read_options = bigquery_storage_v1beta1.types.TableReadOptions() - - if selected_fields is not None: - for field in selected_fields: - read_options.selected_fields.append(field.name) - - session = bqstorage_client.create_read_session( - table.to_bqstorage(v1beta1=True), - "projects/{}".format(project_id), - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - read_options=read_options, - requested_streams=requested_streams, - ) - else: - requested_session = bigquery_storage_v1.types.ReadSession( - table=table.to_bqstorage(), - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, - ) - if selected_fields is not None: - for field in selected_fields: - requested_session.read_options.selected_fields.append(field.name) - - session = bqstorage_client.create_read_session( - parent="projects/{}".format(project_id), - read_session=requested_session, - max_stream_count=requested_streams, - ) + requested_session = bigquery_storage.types.ReadSession( + table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW + ) + if selected_fields is not None: + for field in selected_fields: + requested_session.read_options.selected_fields.append(field.name) + + session = bqstorage_client.create_read_session( + parent="projects/{}".format(project_id), + read_session=requested_session, + max_stream_count=requested_streams, + ) _LOGGER.debug( "Started reading table '{}.{}.{}' with BQ Storage API session '{}'.".format( diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index d2aa45999..fcb18385d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -17,11 +17,7 @@ from __future__ import absolute_import from __future__ import division -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc - +from collections import abc as collections_abc import copy import functools import gzip @@ -435,11 +431,11 @@ def _create_bqstorage_client(self): warning and return ``None``. Returns: - Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]: + Optional[google.cloud.bigquery_storage.BigQueryReadClient]: A BigQuery Storage API client. """ try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError: warnings.warn( "Cannot create BigQuery Storage client, the dependency " @@ -447,7 +443,7 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage_v1.BigQueryReadClient(credentials=self._credentials) + return bigquery_storage.BigQueryReadClient(credentials=self._credentials) def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 1bcf45f31..fdf4e17c3 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -12,11 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc +from collections import abc as collections_abc import datetime import decimal import functools diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 464b0fd06..300c77dc9 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -73,7 +73,7 @@ def close(self): if self._owns_bqstorage_client: # There is no close() on the BQ Storage client itself. - self._bqstorage_client.transport.channel.close() + self._bqstorage_client._transport.grpc_channel.close() for cursor_ in self._cursors_created: cursor_.close() diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 7a10637f0..63264e9ab 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -15,14 +15,8 @@ """Cursor for the Google BigQuery DB-API.""" import collections +from collections import abc as collections_abc import copy -import warnings - -try: - from collections import abc as collections_abc -except ImportError: # Python 2.7 - import collections as collections_abc - import logging import six @@ -267,54 +261,27 @@ def _bqstorage_fetch(self, bqstorage_client): A sequence of rows, represented as dictionaries. """ # Hitting this code path with a BQ Storage client instance implies that - # bigquery_storage_v1* can indeed be imported here without errors. - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + # bigquery_storage can indeed be imported here without errors. + from google.cloud import bigquery_storage table_reference = self._query_job.destination - is_v1beta1_client = isinstance( - bqstorage_client, bigquery_storage_v1beta1.BigQueryStorageClient + requested_session = bigquery_storage.types.ReadSession( + table=table_reference.to_bqstorage(), + data_format=bigquery_storage.types.DataFormat.ARROW, + ) + read_session = bqstorage_client.create_read_session( + parent="projects/{}".format(table_reference.project), + read_session=requested_session, + # a single stream only, as DB API is not well-suited for multithreading + max_stream_count=1, ) - - # We want to preserve compatibility with the v1beta1 BQ Storage clients, - # thus adjust the session creation if needed. - if is_v1beta1_client: - warnings.warn( - "Support for BigQuery Storage v1beta1 clients is deprecated, please " - "consider upgrading the client to BigQuery Storage v1 stable version.", - category=DeprecationWarning, - ) - read_session = bqstorage_client.create_read_session( - table_reference.to_bqstorage(v1beta1=True), - "projects/{}".format(table_reference.project), - # a single stream only, as DB API is not well-suited for multithreading - requested_streams=1, - format_=bigquery_storage_v1beta1.enums.DataFormat.ARROW, - ) - else: - requested_session = bigquery_storage_v1.types.ReadSession( - table=table_reference.to_bqstorage(), - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, - ) - read_session = bqstorage_client.create_read_session( - parent="projects/{}".format(table_reference.project), - read_session=requested_session, - # a single stream only, as DB API is not well-suited for multithreading - max_stream_count=1, - ) if not read_session.streams: return iter([]) # empty table, nothing to read - if is_v1beta1_client: - read_position = bigquery_storage_v1beta1.types.StreamPosition( - stream=read_session.streams[0], - ) - read_rows_stream = bqstorage_client.read_rows(read_position) - else: - stream_name = read_session.streams[0].name - read_rows_stream = bqstorage_client.read_rows(stream_name) + stream_name = read_session.streams[0].name + read_rows_stream = bqstorage_client.read_rows(stream_name) rows_iterable = read_rows_stream.rows(read_session) return rows_iterable diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 29fe543f6..3247372e3 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -17,7 +17,7 @@ import enum import six -from google.cloud.bigquery_v2.gapic import enums as gapic_enums +from google.cloud.bigquery_v2 import types as gapic_types _SQL_SCALAR_TYPES = frozenset( @@ -46,13 +46,13 @@ def _make_sql_scalars_enum(): "StandardSqlDataTypes", ( (member.name, member.value) - for member in gapic_enums.StandardSqlDataType.TypeKind + for member in gapic_types.StandardSqlDataType.TypeKind if member.name in _SQL_SCALAR_TYPES ), ) # make sure the docstring for the new enum is also correct - orig_doc = gapic_enums.StandardSqlDataType.TypeKind.__doc__ + orig_doc = gapic_types.StandardSqlDataType.TypeKind.__doc__ skip_pattern = re.compile( "|".join(_SQL_NONSCALAR_TYPES) + "|because a JSON object" # the second description line of STRUCT member diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 4842c7680..22175ee45 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -637,7 +637,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): return None try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -655,7 +655,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): ) six.raise_from(customized_error, err) - return bigquery_storage_v1.BigQueryReadClient( + return bigquery_storage.BigQueryReadClient( credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) @@ -670,10 +670,10 @@ def _close_transports(client, bqstorage_client): Args: client (:class:`~google.cloud.bigquery.client.Client`): bqstorage_client - (Optional[:class:`~google.cloud.bigquery_storage_v1.BigQueryReadClient`]): + (Optional[:class:`~google.cloud.bigquery_storage.BigQueryReadClient`]): A client for the BigQuery Storage API. """ client.close() if bqstorage_client is not None: - bqstorage_client.transport.channel.close() + bqstorage_client._transport.grpc_channel.close() diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index d3fe8a937..092d98c2e 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -55,7 +55,7 @@ class Model(object): def __init__(self, model_ref): # Use _proto on read-only properties to use it's built-in type # conversion. - self._proto = types.Model() + self._proto = types.Model()._pb # Use _properties on read-write properties to match the REST API # semantics. The BigQuery API makes a distinction between an unset @@ -151,13 +151,13 @@ def modified(self): @property def model_type(self): - """google.cloud.bigquery_v2.gapic.enums.Model.ModelType: Type of the + """google.cloud.bigquery_v2.types.Model.ModelType: Type of the model resource. Read-only. The value is one of elements of the - :class:`~google.cloud.bigquery_v2.gapic.enums.Model.ModelType` + :class:`~google.cloud.bigquery_v2.types.Model.ModelType` enumeration. """ return self._proto.model_type @@ -306,7 +306,7 @@ def from_api_repr(cls, resource): training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) this._proto = json_format.ParseDict( - resource, types.Model(), ignore_unknown_fields=True + resource, types.Model()._pb, ignore_unknown_fields=True ) return this @@ -326,7 +326,7 @@ class ModelReference(object): """ def __init__(self): - self._proto = types.ModelReference() + self._proto = types.ModelReference()._pb self._properties = {} @property @@ -370,8 +370,9 @@ def from_api_repr(cls, resource): # field values. ref._properties = resource ref._proto = json_format.ParseDict( - resource, types.ModelReference(), ignore_unknown_fields=True + resource, types.ModelReference()._pb, ignore_unknown_fields=True ) + return ref @classmethod diff --git a/google/cloud/bigquery/routine.py b/google/cloud/bigquery/routine.py index 03423c01b..6a0ed9fb0 100644 --- a/google/cloud/bigquery/routine.py +++ b/google/cloud/bigquery/routine.py @@ -189,14 +189,17 @@ def return_type(self): resource = self._properties.get(self._PROPERTY_TO_API_FIELD["return_type"]) if not resource: return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() - output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) - return output + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) @return_type.setter def return_type(self, value): if value: - resource = json_format.MessageToDict(value) + resource = json_format.MessageToDict(value._pb) else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @@ -357,14 +360,17 @@ def data_type(self): resource = self._properties.get(self._PROPERTY_TO_API_FIELD["data_type"]) if not resource: return resource + output = google.cloud.bigquery_v2.types.StandardSqlDataType() - output = json_format.ParseDict(resource, output, ignore_unknown_fields=True) - return output + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) @data_type.setter def data_type(self, value): if value: - resource = json_format.MessageToDict(value) + resource = json_format.MessageToDict(value._pb) else: resource = None self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index c1b2588be..8ae0a3a85 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -25,22 +25,22 @@ # https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { - "STRING": types.StandardSqlDataType.STRING, - "BYTES": types.StandardSqlDataType.BYTES, - "INTEGER": types.StandardSqlDataType.INT64, - "INT64": types.StandardSqlDataType.INT64, - "FLOAT": types.StandardSqlDataType.FLOAT64, - "FLOAT64": types.StandardSqlDataType.FLOAT64, - "NUMERIC": types.StandardSqlDataType.NUMERIC, - "BOOLEAN": types.StandardSqlDataType.BOOL, - "BOOL": types.StandardSqlDataType.BOOL, - "GEOGRAPHY": types.StandardSqlDataType.GEOGRAPHY, - "RECORD": types.StandardSqlDataType.STRUCT, - "STRUCT": types.StandardSqlDataType.STRUCT, - "TIMESTAMP": types.StandardSqlDataType.TIMESTAMP, - "DATE": types.StandardSqlDataType.DATE, - "TIME": types.StandardSqlDataType.TIME, - "DATETIME": types.StandardSqlDataType.DATETIME, + "STRING": types.StandardSqlDataType.TypeKind.STRING, + "BYTES": types.StandardSqlDataType.TypeKind.BYTES, + "INTEGER": types.StandardSqlDataType.TypeKind.INT64, + "INT64": types.StandardSqlDataType.TypeKind.INT64, + "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, + "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, + "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, + "BOOL": types.StandardSqlDataType.TypeKind.BOOL, + "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, + "RECORD": types.StandardSqlDataType.TypeKind.STRUCT, + "STRUCT": types.StandardSqlDataType.TypeKind.STRUCT, + "TIMESTAMP": types.StandardSqlDataType.TypeKind.TIMESTAMP, + "DATE": types.StandardSqlDataType.TypeKind.DATE, + "TIME": types.StandardSqlDataType.TypeKind.TIME, + "DATETIME": types.StandardSqlDataType.TypeKind.DATETIME, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL types.""" @@ -209,26 +209,34 @@ def to_standard_sql(self): sql_type = types.StandardSqlDataType() if self.mode == "REPEATED": - sql_type.type_kind = types.StandardSqlDataType.ARRAY + sql_type.type_kind = types.StandardSqlDataType.TypeKind.ARRAY else: sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + self.field_type, + types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, ) - if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721 + if sql_type.type_kind == types.StandardSqlDataType.TypeKind.ARRAY: # noqa: E721 array_element_type = LEGACY_TO_STANDARD_TYPES.get( - self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED + self.field_type, + types.StandardSqlDataType.TypeKind.TYPE_KIND_UNSPECIFIED, ) sql_type.array_element_type.type_kind = array_element_type # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type - if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721 + if ( + array_element_type + == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 + ): sql_type.array_element_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) - elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721 + elif ( + sql_type.type_kind + == types.StandardSqlDataType.TypeKind.STRUCT # noqa: E721 + ): sql_type.struct_type.fields.extend( field.to_standard_sql() for field in self.fields ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d9e5f7773..902a7040a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -26,12 +26,6 @@ import six -try: - # Needed for the to_bqstorage() method. - from google.cloud import bigquery_storage_v1beta1 -except ImportError: # pragma: NO COVER - bigquery_storage_v1beta1 = None - try: import pandas except ImportError: # pragma: NO COVER @@ -228,7 +222,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -237,41 +231,21 @@ def to_bqstorage(self, v1beta1=False): If the ``table_id`` contains a partition identifier (e.g. ``my_table$201812``) or a snapshot identifier (e.g. ``mytable@1234567890``), it is ignored. Use - :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions` + :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions` to filter rows by partition. Use - :class:`google.cloud.bigquery_storage_v1.types.ReadSession.TableModifiers` + :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers` to select a specific snapshot to read from. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. - - Raises: - ValueError: - If ``v1beta1`` compatibility is requested, but the - :mod:`google.cloud.bigquery_storage_v1beta1` module cannot be imported. + str: A reference to this table in the BigQuery Storage API. """ - if v1beta1 and bigquery_storage_v1beta1 is None: - raise ValueError(_NO_BQSTORAGE_ERROR) table_id, _, _ = self._table_id.partition("@") table_id, _, _ = table_id.partition("$") - if v1beta1: - table_ref = bigquery_storage_v1beta1.types.TableReference( - project_id=self._project, - dataset_id=self._dataset_id, - table_id=table_id, - ) - else: - table_ref = "projects/{}/datasets/{}/tables/{}".format( - self._project, self._dataset_id, table_id, - ) + table_ref = "projects/{}/datasets/{}/tables/{}".format( + self._project, self._dataset_id, table_id, + ) return table_ref @@ -876,19 +850,13 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. + str: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage(v1beta1=v1beta1) + return self.reference.to_bqstorage() def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" @@ -1096,19 +1064,13 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self, v1beta1=False): + def to_bqstorage(self): """Construct a BigQuery Storage API representation of this table. - Args: - v1beta1 (Optiona[bool]): - If :data:`True`, return representation compatible with BigQuery - Storage ``v1beta1`` version. Defaults to :data:`False`. - Returns: - Union[str, google.cloud.bigquery_storage_v1beta1.types.TableReference:]: - A reference to this table in the BigQuery Storage API. + str: A reference to this table in the BigQuery Storage API. """ - return self.reference.to_bqstorage(v1beta1=v1beta1) + return self.reference.to_bqstorage() def _row_from_mapping(mapping, schema): @@ -1559,7 +1521,7 @@ def to_arrow( progress_bar.close() finally: if owns_bqstorage_client: - bqstorage_client.transport.channel.close() + bqstorage_client._transport.grpc_channel.close() if record_batches: return pyarrow.Table.from_batches(record_batches) @@ -1731,28 +1693,22 @@ def to_dataframe( # When converting timestamp values to nanosecond precision, the result # can be out of pyarrow bounds. To avoid the error when converting to # Pandas, we set the timestamp_as_object parameter to True, if necessary. - # - # NOTE: Python 3+ only, as timestamp_as_object parameter is only supported - # in pyarrow>=1.0, but the latter is not compatible with Python 2. - if six.PY2: - extra_kwargs = {} + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } + + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break else: - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False - - extra_kwargs = {"timestamp_as_object": timestamp_as_object} + timestamp_as_object = False + + extra_kwargs = {"timestamp_as_object": timestamp_as_object} df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index e58221432..c1989c3b0 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -1,33 +1,44 @@ # -*- coding: utf-8 -*- -# -# Copyright 2018 Google LLC + +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# https://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# -from __future__ import absolute_import - -import pkg_resources - -__version__ = pkg_resources.get_distribution("google-cloud-bigquery").version # noqa -from google.cloud.bigquery_v2 import types -from google.cloud.bigquery_v2.gapic import enums +from .types.encryption_config import EncryptionConfiguration +from .types.model import DeleteModelRequest +from .types.model import GetModelRequest +from .types.model import ListModelsRequest +from .types.model import ListModelsResponse +from .types.model import Model +from .types.model import PatchModelRequest +from .types.model_reference import ModelReference +from .types.standard_sql import StandardSqlDataType +from .types.standard_sql import StandardSqlField +from .types.standard_sql import StandardSqlStructType __all__ = ( - # google.cloud.bigquery_v2 - "__version__", - "types", - # google.cloud.bigquery_v2 - "enums", + "DeleteModelRequest", + "EncryptionConfiguration", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "ModelReference", + "PatchModelRequest", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", ) diff --git a/google/cloud/bigquery_v2/gapic/__init__.py b/google/cloud/bigquery_v2/gapic/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/google/cloud/bigquery_v2/gapic/enums.py b/google/cloud/bigquery_v2/gapic/enums.py deleted file mode 100644 index 10d7c2517..000000000 --- a/google/cloud/bigquery_v2/gapic/enums.py +++ /dev/null @@ -1,171 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Wrappers for protocol buffer enum types.""" - -import enum - - -class Model(object): - class DataSplitMethod(enum.IntEnum): - """ - Indicates the method to split input data into multiple tables. - - Attributes: - DATA_SPLIT_METHOD_UNSPECIFIED (int) - RANDOM (int): Splits data randomly. - CUSTOM (int): Splits data with the user provided tags. - SEQUENTIAL (int): Splits data sequentially. - NO_SPLIT (int): Data split will be skipped. - AUTO_SPLIT (int): Splits data automatically: Uses NO_SPLIT if the data size is small. - Otherwise uses RANDOM. - """ - - DATA_SPLIT_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - SEQUENTIAL = 3 - NO_SPLIT = 4 - AUTO_SPLIT = 5 - - class DistanceType(enum.IntEnum): - """ - Distance metric used to compute the distance between two points. - - Attributes: - DISTANCE_TYPE_UNSPECIFIED (int) - EUCLIDEAN (int): Eculidean distance. - COSINE (int): Cosine distance. - """ - - DISTANCE_TYPE_UNSPECIFIED = 0 - EUCLIDEAN = 1 - COSINE = 2 - - class LearnRateStrategy(enum.IntEnum): - """ - Indicates the learning rate optimization strategy to use. - - Attributes: - LEARN_RATE_STRATEGY_UNSPECIFIED (int) - LINE_SEARCH (int): Use line search to determine learning rate. - CONSTANT (int): Use a constant learning rate. - """ - - LEARN_RATE_STRATEGY_UNSPECIFIED = 0 - LINE_SEARCH = 1 - CONSTANT = 2 - - class LossType(enum.IntEnum): - """ - Loss metric to evaluate model training performance. - - Attributes: - LOSS_TYPE_UNSPECIFIED (int) - MEAN_SQUARED_LOSS (int): Mean squared loss, used for linear regression. - MEAN_LOG_LOSS (int): Mean log loss, used for logistic regression. - """ - - LOSS_TYPE_UNSPECIFIED = 0 - MEAN_SQUARED_LOSS = 1 - MEAN_LOG_LOSS = 2 - - class ModelType(enum.IntEnum): - """ - Indicates the type of the Model. - - Attributes: - MODEL_TYPE_UNSPECIFIED (int) - LINEAR_REGRESSION (int): Linear regression model. - LOGISTIC_REGRESSION (int): Logistic regression based classification model. - KMEANS (int): K-means clustering model. - TENSORFLOW (int): [Beta] An imported TensorFlow model. - """ - - MODEL_TYPE_UNSPECIFIED = 0 - LINEAR_REGRESSION = 1 - LOGISTIC_REGRESSION = 2 - KMEANS = 3 - TENSORFLOW = 6 - - class OptimizationStrategy(enum.IntEnum): - """ - Indicates the optimization strategy used for training. - - Attributes: - OPTIMIZATION_STRATEGY_UNSPECIFIED (int) - BATCH_GRADIENT_DESCENT (int): Uses an iterative batch gradient descent algorithm. - NORMAL_EQUATION (int): Uses a normal equation to solve linear regression problem. - """ - - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 - BATCH_GRADIENT_DESCENT = 1 - NORMAL_EQUATION = 2 - - class KmeansEnums(object): - class KmeansInitializationMethod(enum.IntEnum): - """ - Indicates the method used to initialize the centroids for KMeans - clustering algorithm. - - Attributes: - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED (int) - RANDOM (int): Initializes the centroids randomly. - CUSTOM (int): Initializes the centroids using data specified in - kmeans_initialization_column. - """ - - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 - RANDOM = 1 - CUSTOM = 2 - - -class StandardSqlDataType(object): - class TypeKind(enum.IntEnum): - """ - Attributes: - TYPE_KIND_UNSPECIFIED (int): Invalid type. - INT64 (int): Encoded as a string in decimal format. - BOOL (int): Encoded as a boolean "false" or "true". - FLOAT64 (int): Encoded as a number, or string "NaN", "Infinity" or "-Infinity". - STRING (int): Encoded as a string value. - BYTES (int): Encoded as a base64 string per RFC 4648, section 4. - TIMESTAMP (int): Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: - 1985-04-12T23:20:50.52Z - DATE (int): Encoded as RFC 3339 full-date format string: 1985-04-12 - TIME (int): Encoded as RFC 3339 partial-time format string: 23:20:50.52 - DATETIME (int): Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 - GEOGRAPHY (int): Encoded as WKT - NUMERIC (int): Encoded as a decimal string. - ARRAY (int): Encoded as a list with types matching Type.array_type. - STRUCT (int): Encoded as a list with fields of type Type.struct_type[i]. List is - used because a JSON object cannot have duplicate field names. - """ - - TYPE_KIND_UNSPECIFIED = 0 - INT64 = 2 - BOOL = 5 - FLOAT64 = 7 - STRING = 8 - BYTES = 9 - TIMESTAMP = 19 - DATE = 10 - TIME = 20 - DATETIME = 21 - GEOGRAPHY = 22 - NUMERIC = 23 - ARRAY = 16 - STRUCT = 17 diff --git a/google/cloud/bigquery_v2/proto/__init__.py b/google/cloud/bigquery_v2/proto/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py deleted file mode 100644 index 8a9393943..000000000 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_v2/proto/location_metadata_pb2.py b/google/cloud/bigquery_v2/proto/location_metadata_pb2.py deleted file mode 100644 index 6dd9da52e..000000000 --- a/google/cloud/bigquery_v2/proto/location_metadata_pb2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/location_metadata.proto - -import sys - -_b = sys.version_info[0] < 3 and (lambda x: x) or (lambda x: x.encode("latin1")) -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/location_metadata.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=_b( - "\n\034com.google.cloud.bigquery.v2B\025LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery" - ), - serialized_pb=_b( - '\n6google/cloud/bigquery_v2/proto/location_metadata.proto\x12\x18google.cloud.bigquery.v2\x1a\x1cgoogle/api/annotations.proto".\n\x10LocationMetadata\x12\x1a\n\x12legacy_location_id\x18\x01 \x01(\tBw\n\x1c\x63om.google.cloud.bigquery.v2B\x15LocationMetadataProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3' - ), - dependencies=[google_dot_api_dot_annotations__pb2.DESCRIPTOR], -) - - -_LOCATIONMETADATA = _descriptor.Descriptor( - name="LocationMetadata", - full_name="google.cloud.bigquery.v2.LocationMetadata", - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name="legacy_location_id", - full_name="google.cloud.bigquery.v2.LocationMetadata.legacy_location_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=_b("").decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - ) - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=114, - serialized_end=160, -) - -DESCRIPTOR.message_types_by_name["LocationMetadata"] = _LOCATIONMETADATA -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -LocationMetadata = _reflection.GeneratedProtocolMessageType( - "LocationMetadata", - (_message.Message,), - dict( - DESCRIPTOR=_LOCATIONMETADATA, - __module__="google.cloud.bigquery_v2.proto.location_metadata_pb2", - __doc__="""BigQuery-specific metadata about a location. This will be set on - google.cloud.location.Location.metadata in Cloud Location API responses. - - - Attributes: - legacy_location_id: - The legacy BigQuery location ID, e.g. ``EU`` for the ``europe`` - location. This is for any API consumers that need the legacy - ``US`` and ``EU`` locations. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.LocationMetadata) - ), -) -_sym_db.RegisterMessage(LocationMetadata) - - -DESCRIPTOR._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py b/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py deleted file mode 100644 index 07cb78fe0..000000000 --- a/google/cloud/bigquery_v2/proto/location_metadata_pb2_grpc.py +++ /dev/null @@ -1,2 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -import grpc diff --git a/google/cloud/bigquery_v2/proto/model_pb2_grpc.py b/google/cloud/bigquery_v2/proto/model_pb2_grpc.py deleted file mode 100644 index 13db95717..000000000 --- a/google/cloud/bigquery_v2/proto/model_pb2_grpc.py +++ /dev/null @@ -1,214 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc - -from google.cloud.bigquery_v2.proto import ( - model_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 - - -class ModelServiceStub(object): - """Missing associated documentation comment in .proto file.""" - - def __init__(self, channel): - """Constructor. - - Args: - channel: A grpc.Channel. - """ - self.GetModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/GetModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - ) - self.ListModels = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/ListModels", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, - ) - self.PatchModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/PatchModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, - response_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - ) - self.DeleteModel = channel.unary_unary( - "/google.cloud.bigquery.v2.ModelService/DeleteModel", - request_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, - response_deserializer=google_dot_protobuf_dot_empty__pb2.Empty.FromString, - ) - - -class ModelServiceServicer(object): - """Missing associated documentation comment in .proto file.""" - - def GetModel(self, request, context): - """Gets the specified model resource by model ID. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def ListModels(self, request, context): - """Lists all models in the specified dataset. Requires the READER dataset - role. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def PatchModel(self, request, context): - """Patch specific fields in the specified model. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - def DeleteModel(self, request, context): - """Deletes the model specified by modelId from the dataset. - """ - context.set_code(grpc.StatusCode.UNIMPLEMENTED) - context.set_details("Method not implemented!") - raise NotImplementedError("Method not implemented!") - - -def add_ModelServiceServicer_to_server(servicer, server): - rpc_method_handlers = { - "GetModel": grpc.unary_unary_rpc_method_handler( - servicer.GetModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, - ), - "ListModels": grpc.unary_unary_rpc_method_handler( - servicer.ListModels, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.SerializeToString, - ), - "PatchModel": grpc.unary_unary_rpc_method_handler( - servicer.PatchModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.FromString, - response_serializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.SerializeToString, - ), - "DeleteModel": grpc.unary_unary_rpc_method_handler( - servicer.DeleteModel, - request_deserializer=google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.FromString, - response_serializer=google_dot_protobuf_dot_empty__pb2.Empty.SerializeToString, - ), - } - generic_handler = grpc.method_handlers_generic_handler( - "google.cloud.bigquery.v2.ModelService", rpc_method_handlers - ) - server.add_generic_rpc_handlers((generic_handler,)) - - -# This class is part of an EXPERIMENTAL API. -class ModelService(object): - """Missing associated documentation comment in .proto file.""" - - @staticmethod - def GetModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/GetModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.GetModelRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def ListModels( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/ListModels", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.ListModelsResponse.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def PatchModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/PatchModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.PatchModelRequest.SerializeToString, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.Model.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) - - @staticmethod - def DeleteModel( - request, - target, - options=(), - channel_credentials=None, - call_credentials=None, - compression=None, - wait_for_ready=None, - timeout=None, - metadata=None, - ): - return grpc.experimental.unary_unary( - request, - target, - "/google.cloud.bigquery.v2.ModelService/DeleteModel", - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__pb2.DeleteModelRequest.SerializeToString, - google_dot_protobuf_dot_empty__pb2.Empty.FromString, - options, - channel_credentials, - call_credentials, - compression, - wait_for_ready, - timeout, - metadata, - ) diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py b/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py deleted file mode 100644 index 8a9393943..000000000 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py deleted file mode 100644 index 8a9393943..000000000 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py +++ /dev/null @@ -1,3 +0,0 @@ -# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! -"""Client and server classes corresponding to protobuf-defined services.""" -import grpc diff --git a/google/cloud/bigquery_v2/py.typed b/google/cloud/bigquery_v2/py.typed new file mode 100644 index 000000000..e73777993 --- /dev/null +++ b/google/cloud/bigquery_v2/py.typed @@ -0,0 +1,2 @@ +# Marker file for PEP 561. +# The google-cloud-bigquery package uses inline types. diff --git a/google/cloud/bigquery_v2/types.py b/google/cloud/bigquery_v2/types.py deleted file mode 100644 index 7d4f9b732..000000000 --- a/google/cloud/bigquery_v2/types.py +++ /dev/null @@ -1,58 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -from __future__ import absolute_import -import sys - -from google.api_core.protobuf_helpers import get_messages - -from google.cloud.bigquery_v2.proto import encryption_config_pb2 -from google.cloud.bigquery_v2.proto import model_pb2 -from google.cloud.bigquery_v2.proto import model_reference_pb2 -from google.cloud.bigquery_v2.proto import standard_sql_pb2 -from google.protobuf import empty_pb2 -from google.protobuf import timestamp_pb2 -from google.protobuf import wrappers_pb2 - - -_shared_modules = [ - empty_pb2, - timestamp_pb2, - wrappers_pb2, -] - -_local_modules = [ - encryption_config_pb2, - model_pb2, - model_reference_pb2, - standard_sql_pb2, -] - -names = [] - -for module in _shared_modules: # pragma: NO COVER - for name, message in get_messages(module).items(): - setattr(sys.modules[__name__], name, message) - names.append(name) -for module in _local_modules: - for name, message in get_messages(module).items(): - message.__module__ = "google.cloud.bigquery_v2.types" - setattr(sys.modules[__name__], name, message) - names.append(name) - - -__all__ = tuple(sorted(names)) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py new file mode 100644 index 000000000..a8839c74e --- /dev/null +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from .encryption_config import EncryptionConfiguration +from .model_reference import ModelReference +from .standard_sql import ( + StandardSqlDataType, + StandardSqlField, + StandardSqlStructType, +) +from .model import ( + Model, + GetModelRequest, + PatchModelRequest, + DeleteModelRequest, + ListModelsRequest, + ListModelsResponse, +) + + +__all__ = ( + "EncryptionConfiguration", + "ModelReference", + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "Model", + "GetModelRequest", + "PatchModelRequest", + "DeleteModelRequest", + "ListModelsRequest", + "ListModelsResponse", +) diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py new file mode 100644 index 000000000..6fb90f340 --- /dev/null +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.protobuf import wrappers_pb2 as wrappers # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"EncryptionConfiguration",}, +) + + +class EncryptionConfiguration(proto.Message): + r""" + + Attributes: + kms_key_name (~.wrappers.StringValue): + Optional. Describes the Cloud KMS encryption + key that will be used to protect destination + BigQuery table. The BigQuery Service Account + associated with your project requires access to + this encryption key. + """ + + kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py new file mode 100644 index 000000000..a00720d48 --- /dev/null +++ b/google/cloud/bigquery_v2/types/model.py @@ -0,0 +1,966 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +from google.cloud.bigquery_v2.types import encryption_config +from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference +from google.cloud.bigquery_v2.types import standard_sql +from google.protobuf import timestamp_pb2 as timestamp # type: ignore +from google.protobuf import wrappers_pb2 as wrappers # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={ + "Model", + "GetModelRequest", + "PatchModelRequest", + "DeleteModelRequest", + "ListModelsRequest", + "ListModelsResponse", + }, +) + + +class Model(proto.Message): + r""" + + Attributes: + etag (str): + Output only. A hash of this resource. + model_reference (~.gcb_model_reference.ModelReference): + Required. Unique identifier for this model. + creation_time (int): + Output only. The time when this model was + created, in millisecs since the epoch. + last_modified_time (int): + Output only. The time when this model was + last modified, in millisecs since the epoch. + description (str): + Optional. A user-friendly description of this + model. + friendly_name (str): + Optional. A descriptive name for this model. + labels (Sequence[~.gcb_model.Model.LabelsEntry]): + The labels associated with this model. You + can use these to organize and group your models. + Label keys and values can be no longer than 63 + characters, can only contain lowercase letters, + numeric characters, underscores and dashes. + International characters are allowed. Label + values are optional. Label keys must start with + a letter and each label in the list must have a + different key. + expiration_time (int): + Optional. The time when this model expires, + in milliseconds since the epoch. If not present, + the model will persist indefinitely. Expired + models will be deleted and their storage + reclaimed. The defaultTableExpirationMs + property of the encapsulating dataset can be + used to set a default expirationTime on newly + created models. + location (str): + Output only. The geographic location where + the model resides. This value is inherited from + the dataset. + encryption_configuration (~.encryption_config.EncryptionConfiguration): + Custom encryption configuration (e.g., Cloud + KMS keys). This shows the encryption + configuration of the model data while stored in + BigQuery storage. + model_type (~.gcb_model.Model.ModelType): + Output only. Type of the model resource. + training_runs (Sequence[~.gcb_model.Model.TrainingRun]): + Output only. Information for all training runs in increasing + order of start_time. + feature_columns (Sequence[~.standard_sql.StandardSqlField]): + Output only. Input feature columns that were + used to train this model. + label_columns (Sequence[~.standard_sql.StandardSqlField]): + Output only. Label columns that were used to train this + model. The output of the model will have a `predicted_` + prefix to these columns. + """ + + class ModelType(proto.Enum): + r"""Indicates the type of the Model.""" + MODEL_TYPE_UNSPECIFIED = 0 + LINEAR_REGRESSION = 1 + LOGISTIC_REGRESSION = 2 + KMEANS = 3 + TENSORFLOW = 6 + + class LossType(proto.Enum): + r"""Loss metric to evaluate model training performance.""" + LOSS_TYPE_UNSPECIFIED = 0 + MEAN_SQUARED_LOSS = 1 + MEAN_LOG_LOSS = 2 + + class DistanceType(proto.Enum): + r"""Distance metric used to compute the distance between two + points. + """ + DISTANCE_TYPE_UNSPECIFIED = 0 + EUCLIDEAN = 1 + COSINE = 2 + + class DataSplitMethod(proto.Enum): + r"""Indicates the method to split input data into multiple + tables. + """ + DATA_SPLIT_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + SEQUENTIAL = 3 + NO_SPLIT = 4 + AUTO_SPLIT = 5 + + class LearnRateStrategy(proto.Enum): + r"""Indicates the learning rate optimization strategy to use.""" + LEARN_RATE_STRATEGY_UNSPECIFIED = 0 + LINE_SEARCH = 1 + CONSTANT = 2 + + class OptimizationStrategy(proto.Enum): + r"""Indicates the optimization strategy used for training.""" + OPTIMIZATION_STRATEGY_UNSPECIFIED = 0 + BATCH_GRADIENT_DESCENT = 1 + NORMAL_EQUATION = 2 + + class KmeansEnums(proto.Message): + r"""""" + + class KmeansInitializationMethod(proto.Enum): + r"""Indicates the method used to initialize the centroids for + KMeans clustering algorithm. + """ + KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 + RANDOM = 1 + CUSTOM = 2 + + class RegressionMetrics(proto.Message): + r"""Evaluation metrics for regression and explicit feedback type + matrix factorization models. + + Attributes: + mean_absolute_error (~.wrappers.DoubleValue): + Mean absolute error. + mean_squared_error (~.wrappers.DoubleValue): + Mean squared error. + mean_squared_log_error (~.wrappers.DoubleValue): + Mean squared log error. + median_absolute_error (~.wrappers.DoubleValue): + Median absolute error. + r_squared (~.wrappers.DoubleValue): + R^2 score. + """ + + mean_absolute_error = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + mean_squared_log_error = proto.Field( + proto.MESSAGE, number=3, message=wrappers.DoubleValue, + ) + + median_absolute_error = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) + + class AggregateClassificationMetrics(proto.Message): + r"""Aggregate metrics for classification/classifier models. For + multi-class models, the metrics are either macro-averaged or + micro-averaged. When macro-averaged, the metrics are calculated + for each label and then an unweighted average is taken of those + values. When micro-averaged, the metric is calculated globally + by counting the total number of correctly predicted rows. + + Attributes: + precision (~.wrappers.DoubleValue): + Precision is the fraction of actual positive + predictions that had positive actual labels. For + multiclass this is a macro-averaged metric + treating each class as a binary classifier. + recall (~.wrappers.DoubleValue): + Recall is the fraction of actual positive + labels that were given a positive prediction. + For multiclass this is a macro-averaged metric. + accuracy (~.wrappers.DoubleValue): + Accuracy is the fraction of predictions given + the correct label. For multiclass this is a + micro-averaged metric. + threshold (~.wrappers.DoubleValue): + Threshold at which the metrics are computed. + For binary classification models this is the + positive class threshold. For multi-class + classfication models this is the confidence + threshold. + f1_score (~.wrappers.DoubleValue): + The F1 score is an average of recall and + precision. For multiclass this is a macro- + averaged metric. + log_loss (~.wrappers.DoubleValue): + Logarithmic Loss. For multiclass this is a + macro-averaged metric. + roc_auc (~.wrappers.DoubleValue): + Area Under a ROC Curve. For multiclass this + is a macro-averaged metric. + """ + + precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,) + + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,) + + accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,) + + threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,) + + f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) + + log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,) + + roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + + class BinaryClassificationMetrics(proto.Message): + r"""Evaluation metrics for binary classification/classifier + models. + + Attributes: + aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + Binary confusion matrix at multiple + thresholds. + positive_label (str): + Label representing the positive class. + negative_label (str): + Label representing the negative class. + """ + + class BinaryConfusionMatrix(proto.Message): + r"""Confusion matrix for binary classification models. + + Attributes: + positive_class_threshold (~.wrappers.DoubleValue): + Threshold value used when computing each of + the following metric. + true_positives (~.wrappers.Int64Value): + Number of true samples predicted as true. + false_positives (~.wrappers.Int64Value): + Number of false samples predicted as true. + true_negatives (~.wrappers.Int64Value): + Number of true samples predicted as false. + false_negatives (~.wrappers.Int64Value): + Number of false samples predicted as false. + precision (~.wrappers.DoubleValue): + The fraction of actual positive predictions + that had positive actual labels. + recall (~.wrappers.DoubleValue): + The fraction of actual positive labels that + were given a positive prediction. + f1_score (~.wrappers.DoubleValue): + The equally weighted average of recall and + precision. + accuracy (~.wrappers.DoubleValue): + The fraction of predictions given the correct + label. + """ + + positive_class_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + true_positives = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + false_positives = proto.Field( + proto.MESSAGE, number=3, message=wrappers.Int64Value, + ) + + true_negatives = proto.Field( + proto.MESSAGE, number=4, message=wrappers.Int64Value, + ) + + false_negatives = proto.Field( + proto.MESSAGE, number=5, message=wrappers.Int64Value, + ) + + precision = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + + f1_score = proto.Field( + proto.MESSAGE, number=8, message=wrappers.DoubleValue, + ) + + accuracy = proto.Field( + proto.MESSAGE, number=9, message=wrappers.DoubleValue, + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + + binary_confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", + ) + + positive_label = proto.Field(proto.STRING, number=3) + + negative_label = proto.Field(proto.STRING, number=4) + + class MultiClassClassificationMetrics(proto.Message): + r"""Evaluation metrics for multi-class classification/classifier + models. + + Attributes: + aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + Aggregate classification metrics. + confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + Confusion matrix at different thresholds. + """ + + class ConfusionMatrix(proto.Message): + r"""Confusion matrix for multi-class classification models. + + Attributes: + confidence_threshold (~.wrappers.DoubleValue): + Confidence threshold used when computing the + entries of the confusion matrix. + rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + One row per actual label. + """ + + class Entry(proto.Message): + r"""A single entry in the confusion matrix. + + Attributes: + predicted_label (str): + The predicted label. For confidence_threshold > 0, we will + also add an entry indicating the number of items under the + confidence threshold. + item_count (~.wrappers.Int64Value): + Number of items being predicted as this + label. + """ + + predicted_label = proto.Field(proto.STRING, number=1) + + item_count = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + class Row(proto.Message): + r"""A single row in the confusion matrix. + + Attributes: + actual_label (str): + The original label of this row. + entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + Info describing predicted label distribution. + """ + + actual_label = proto.Field(proto.STRING, number=1) + + entries = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", + ) + + confidence_threshold = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + rows = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", + ) + + aggregate_classification_metrics = proto.Field( + proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", + ) + + confusion_matrix_list = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.MultiClassClassificationMetrics.ConfusionMatrix", + ) + + class ClusteringMetrics(proto.Message): + r"""Evaluation metrics for clustering models. + + Attributes: + davies_bouldin_index (~.wrappers.DoubleValue): + Davies-Bouldin index. + mean_squared_distance (~.wrappers.DoubleValue): + Mean of squared distances between each sample + to its cluster centroid. + clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]): + [Beta] Information for all clusters. + """ + + class Cluster(proto.Message): + r"""Message containing the information about one cluster. + + Attributes: + centroid_id (int): + Centroid id. + feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]): + Values of highly variant features for this + cluster. + count (~.wrappers.Int64Value): + Count of training data rows that were + assigned to this cluster. + """ + + class FeatureValue(proto.Message): + r"""Representative value of a single feature within the cluster. + + Attributes: + feature_column (str): + The feature column name. + numerical_value (~.wrappers.DoubleValue): + The numerical feature value. This is the + centroid value for this feature. + categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + The categorical feature value. + """ + + class CategoricalValue(proto.Message): + r"""Representative value of a categorical feature. + + Attributes: + category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + Counts of all categories for the categorical feature. If + there are more than ten categories, we return top ten (by + count) and return one more CategoryCount with category + "*OTHER*" and count as aggregate counts of remaining + categories. + """ + + class CategoryCount(proto.Message): + r"""Represents the count of a single category within the cluster. + + Attributes: + category (str): + The name of category. + count (~.wrappers.Int64Value): + The count of training samples matching the + category within the cluster. + """ + + category = proto.Field(proto.STRING, number=1) + + count = proto.Field( + proto.MESSAGE, number=2, message=wrappers.Int64Value, + ) + + category_counts = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", + ) + + feature_column = proto.Field(proto.STRING, number=1) + + numerical_value = proto.Field( + proto.MESSAGE, + number=2, + oneof="value", + message=wrappers.DoubleValue, + ) + + categorical_value = proto.Field( + proto.MESSAGE, + number=3, + oneof="value", + message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", + ) + + centroid_id = proto.Field(proto.INT64, number=1) + + feature_values = proto.RepeatedField( + proto.MESSAGE, + number=2, + message="Model.ClusteringMetrics.Cluster.FeatureValue", + ) + + count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,) + + davies_bouldin_index = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_distance = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + clusters = proto.RepeatedField( + proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", + ) + + class EvaluationMetrics(proto.Message): + r"""Evaluation metrics of a model. These are either computed on + all training data or just the eval data based on whether eval + data was used during training. These are not present for + imported models. + + Attributes: + regression_metrics (~.gcb_model.Model.RegressionMetrics): + Populated for regression models and explicit + feedback type matrix factorization models. + binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics): + Populated for binary + classification/classifier models. + multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics): + Populated for multi-class + classification/classifier models. + clustering_metrics (~.gcb_model.Model.ClusteringMetrics): + Populated for clustering models. + """ + + regression_metrics = proto.Field( + proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", + ) + + binary_classification_metrics = proto.Field( + proto.MESSAGE, + number=2, + oneof="metrics", + message="Model.BinaryClassificationMetrics", + ) + + multi_class_classification_metrics = proto.Field( + proto.MESSAGE, + number=3, + oneof="metrics", + message="Model.MultiClassClassificationMetrics", + ) + + clustering_metrics = proto.Field( + proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", + ) + + class TrainingRun(proto.Message): + r"""Information about a single training query run for the model. + + Attributes: + training_options (~.gcb_model.Model.TrainingRun.TrainingOptions): + Options that were used for this training run, + includes user specified and default options that + were used. + start_time (~.timestamp.Timestamp): + The start time of this training run. + results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]): + Output of each iteration run, results.size() <= + max_iterations. + evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): + The evaluation metrics over training/eval + data that were computed at the end of training. + """ + + class TrainingOptions(proto.Message): + r""" + + Attributes: + max_iterations (int): + The maximum number of iterations in training. + Used only for iterative training algorithms. + loss_type (~.gcb_model.Model.LossType): + Type of loss function used during training + run. + learn_rate (float): + Learning rate in training. Used only for + iterative training algorithms. + l1_regularization (~.wrappers.DoubleValue): + L1 regularization coefficient. + l2_regularization (~.wrappers.DoubleValue): + L2 regularization coefficient. + min_relative_progress (~.wrappers.DoubleValue): + When early_stop is true, stops training when accuracy + improvement is less than 'min_relative_progress'. Used only + for iterative training algorithms. + warm_start (~.wrappers.BoolValue): + Whether to train a model from the last + checkpoint. + early_stop (~.wrappers.BoolValue): + Whether to stop early when the loss doesn't improve + significantly any more (compared to min_relative_progress). + Used only for iterative training algorithms. + input_label_columns (Sequence[str]): + Name of input label columns in training data. + data_split_method (~.gcb_model.Model.DataSplitMethod): + The data split type for training and + evaluation, e.g. RANDOM. + data_split_eval_fraction (float): + The fraction of evaluation data over the + whole input data. The rest of data will be used + as training data. The format should be double. + Accurate to two decimal places. + Default value is 0.2. + data_split_column (str): + The column to split data with. This column won't be used as + a feature. + + 1. When data_split_method is CUSTOM, the corresponding + column should be boolean. The rows with true value tag + are eval data, and the false are training data. + 2. When data_split_method is SEQ, the first + DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) + in the corresponding column are used as training data, + and the rest are eval data. It respects the order in + Orderable data types: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties + learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy): + The strategy to determine learn rate for the + current iteration. + initial_learn_rate (float): + Specifies the initial learning rate for the + line search learn rate strategy. + label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + Weights associated with each label class, for + rebalancing the training data. Only applicable + for classification models. + distance_type (~.gcb_model.Model.DistanceType): + Distance type for clustering models. + num_clusters (int): + Number of clusters for clustering models. + model_uri (str): + [Beta] Google Cloud Storage URI from which the model was + imported. Only applicable for imported models. + optimization_strategy (~.gcb_model.Model.OptimizationStrategy): + Optimization strategy for training linear + regression models. + kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): + The method used to initialize the centroids + for kmeans algorithm. + kmeans_initialization_column (str): + The column used to provide the initial centroids for kmeans + algorithm when kmeans_initialization_method is CUSTOM. + """ + + max_iterations = proto.Field(proto.INT64, number=1) + + loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) + + learn_rate = proto.Field(proto.DOUBLE, number=3) + + l1_regularization = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + l2_regularization = proto.Field( + proto.MESSAGE, number=5, message=wrappers.DoubleValue, + ) + + min_relative_progress = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + warm_start = proto.Field( + proto.MESSAGE, number=7, message=wrappers.BoolValue, + ) + + early_stop = proto.Field( + proto.MESSAGE, number=8, message=wrappers.BoolValue, + ) + + input_label_columns = proto.RepeatedField(proto.STRING, number=9) + + data_split_method = proto.Field( + proto.ENUM, number=10, enum="Model.DataSplitMethod", + ) + + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11) + + data_split_column = proto.Field(proto.STRING, number=12) + + learn_rate_strategy = proto.Field( + proto.ENUM, number=13, enum="Model.LearnRateStrategy", + ) + + initial_learn_rate = proto.Field(proto.DOUBLE, number=16) + + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) + + distance_type = proto.Field( + proto.ENUM, number=20, enum="Model.DistanceType", + ) + + num_clusters = proto.Field(proto.INT64, number=21) + + model_uri = proto.Field(proto.STRING, number=22) + + optimization_strategy = proto.Field( + proto.ENUM, number=23, enum="Model.OptimizationStrategy", + ) + + kmeans_initialization_method = proto.Field( + proto.ENUM, + number=33, + enum="Model.KmeansEnums.KmeansInitializationMethod", + ) + + kmeans_initialization_column = proto.Field(proto.STRING, number=34) + + class IterationResult(proto.Message): + r"""Information about a single iteration of the training run. + + Attributes: + index (~.wrappers.Int32Value): + Index of the iteration, 0 based. + duration_ms (~.wrappers.Int64Value): + Time taken to run the iteration in + milliseconds. + training_loss (~.wrappers.DoubleValue): + Loss computed on the training data at the end + of iteration. + eval_loss (~.wrappers.DoubleValue): + Loss computed on the eval data at the end of + iteration. + learn_rate (float): + Learn rate used for this iteration. + cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): + Information about top clusters for clustering + models. + """ + + class ClusterInfo(proto.Message): + r"""Information about a single cluster for clustering model. + + Attributes: + centroid_id (int): + Centroid id. + cluster_radius (~.wrappers.DoubleValue): + Cluster radius, the average distance from + centroid to each point assigned to the cluster. + cluster_size (~.wrappers.Int64Value): + Cluster size, the total number of points + assigned to the cluster. + """ + + centroid_id = proto.Field(proto.INT64, number=1) + + cluster_radius = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + cluster_size = proto.Field( + proto.MESSAGE, number=3, message=wrappers.Int64Value, + ) + + index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) + + duration_ms = proto.Field( + proto.MESSAGE, number=4, message=wrappers.Int64Value, + ) + + training_loss = proto.Field( + proto.MESSAGE, number=5, message=wrappers.DoubleValue, + ) + + eval_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers.DoubleValue, + ) + + learn_rate = proto.Field(proto.DOUBLE, number=7) + + cluster_infos = proto.RepeatedField( + proto.MESSAGE, + number=8, + message="Model.TrainingRun.IterationResult.ClusterInfo", + ) + + training_options = proto.Field( + proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", + ) + + start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,) + + results = proto.RepeatedField( + proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", + ) + + evaluation_metrics = proto.Field( + proto.MESSAGE, number=7, message="Model.EvaluationMetrics", + ) + + etag = proto.Field(proto.STRING, number=1) + + model_reference = proto.Field( + proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, + ) + + creation_time = proto.Field(proto.INT64, number=5) + + last_modified_time = proto.Field(proto.INT64, number=6) + + description = proto.Field(proto.STRING, number=12) + + friendly_name = proto.Field(proto.STRING, number=14) + + labels = proto.MapField(proto.STRING, proto.STRING, number=15) + + expiration_time = proto.Field(proto.INT64, number=16) + + location = proto.Field(proto.STRING, number=13) + + encryption_configuration = proto.Field( + proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, + ) + + model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) + + training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) + + feature_columns = proto.RepeatedField( + proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, + ) + + label_columns = proto.RepeatedField( + proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, + ) + + +class GetModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the requested model. + dataset_id (str): + Required. Dataset ID of the requested model. + model_id (str): + Required. Model ID of the requested model. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +class PatchModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the model to patch. + dataset_id (str): + Required. Dataset ID of the model to patch. + model_id (str): + Required. Model ID of the model to patch. + model (~.gcb_model.Model): + Required. Patched model. + Follows RFC5789 patch semantics. Missing fields + are not updated. To clear a field, explicitly + set to default value. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + model = proto.Field(proto.MESSAGE, number=4, message=Model,) + + +class DeleteModelRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the model to delete. + dataset_id (str): + Required. Dataset ID of the model to delete. + model_id (str): + Required. Model ID of the model to delete. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +class ListModelsRequest(proto.Message): + r""" + + Attributes: + project_id (str): + Required. Project ID of the models to list. + dataset_id (str): + Required. Dataset ID of the models to list. + max_results (~.wrappers.UInt32Value): + The maximum number of results to return in a + single response page. Leverage the page tokens + to iterate through the entire collection. + page_token (str): + Page token, returned by a previous call to + request the next page of results + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,) + + page_token = proto.Field(proto.STRING, number=4) + + +class ListModelsResponse(proto.Message): + r""" + + Attributes: + models (Sequence[~.gcb_model.Model]): + Models in the requested dataset. Only the following fields + are populated: model_reference, model_type, creation_time, + last_modified_time and labels. + next_page_token (str): + A token to request the next page of results. + """ + + @property + def raw_page(self): + return self + + models = proto.RepeatedField(proto.MESSAGE, number=1, message=Model,) + + next_page_token = proto.Field(proto.STRING, number=2) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py new file mode 100644 index 000000000..e3891d6c1 --- /dev/null +++ b/google/cloud/bigquery_v2/types/model_reference.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"ModelReference",}, +) + + +class ModelReference(proto.Message): + r"""Id path of a model. + + Attributes: + project_id (str): + Required. The ID of the project containing + this model. + dataset_id (str): + Required. The ID of the dataset containing + this model. + model_id (str): + Required. The ID of the model. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + model_id = proto.Field(proto.STRING, number=3) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py new file mode 100644 index 000000000..72f12f284 --- /dev/null +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -0,0 +1,106 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", + manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, +) + + +class StandardSqlDataType(proto.Message): + r"""The type of a variable, e.g., a function argument. Examples: INT64: + {type_kind="INT64"} ARRAY: {type_kind="ARRAY", + array_element_type="STRING"} STRUCT: + {type_kind="STRUCT", struct_type={fields=[ {name="x", + type={type_kind="STRING"}}, {name="y", type={type_kind="ARRAY", + array_element_type="DATE"}} ]}} + + Attributes: + type_kind (~.standard_sql.StandardSqlDataType.TypeKind): + Required. The top level type of this field. + Can be any standard SQL data type (e.g., + "INT64", "DATE", "ARRAY"). + array_element_type (~.standard_sql.StandardSqlDataType): + The type of the array's elements, if type_kind = "ARRAY". + struct_type (~.standard_sql.StandardSqlStructType): + The fields of this struct, in order, if type_kind = + "STRUCT". + """ + + class TypeKind(proto.Enum): + r"""""" + TYPE_KIND_UNSPECIFIED = 0 + INT64 = 2 + BOOL = 5 + FLOAT64 = 7 + STRING = 8 + BYTES = 9 + TIMESTAMP = 19 + DATE = 10 + TIME = 20 + DATETIME = 21 + GEOGRAPHY = 22 + NUMERIC = 23 + ARRAY = 16 + STRUCT = 17 + + type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) + + array_element_type = proto.Field( + proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", + ) + + struct_type = proto.Field( + proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", + ) + + +class StandardSqlField(proto.Message): + r"""A field or a column. + + Attributes: + name (str): + Optional. The name of this field. Can be + absent for struct fields. + type (~.standard_sql.StandardSqlDataType): + Optional. The type of this parameter. Absent + if not explicitly specified (e.g., CREATE + FUNCTION statement can omit the return type; in + this case the output parameter does not have + this "type" field). + """ + + name = proto.Field(proto.STRING, number=1) + + type = proto.Field(proto.MESSAGE, number=2, message=StandardSqlDataType,) + + +class StandardSqlStructType(proto.Message): + r""" + + Attributes: + fields (Sequence[~.standard_sql.StandardSqlField]): + + """ + + fields = proto.RepeatedField(proto.MESSAGE, number=1, message=StandardSqlField,) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/noxfile.py b/noxfile.py index 90f023add..42d8f9356 100644 --- a/noxfile.py +++ b/noxfile.py @@ -49,16 +49,10 @@ def default(session): constraints_path, ) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas,pyarrow]", "-c", constraints_path) - elif session.python == "3.5": - session.install("-e", ".[all]", "-c", constraints_path) - else: - # fastparquet is not included in .[all] because, in general, it's - # redundant with pyarrow. We still want to run some unit tests with - # fastparquet serialization, though. - session.install("-e", ".[all,fastparquet]", "-c", constraints_path) + # fastparquet is not included in .[all] because, in general, it's + # redundant with pyarrow. We still want to run some unit tests with + # fastparquet serialization, though. + session.install("-e", ".[all,fastparquet]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -77,13 +71,13 @@ def default(session): ) -@nox.session(python=["2.7", "3.5", "3.6", "3.7", "3.8"]) +@nox.session(python=["3.6", "3.7", "3.8"]) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["2.7", "3.8"]) +@nox.session(python=["3.8"]) def system(session): """Run the system test suite.""" @@ -108,12 +102,7 @@ def system(session): ) session.install("google-cloud-storage", "-c", constraints_path) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas]", "-c", constraints_path) - else: - session.install("-e", ".[all]", "-c", constraints_path) - + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. @@ -122,7 +111,7 @@ def system(session): ) -@nox.session(python=["2.7", "3.8"]) +@nox.session(python=["3.8"]) def snippets(session): """Run the snippets test suite.""" @@ -139,11 +128,7 @@ def snippets(session): session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) - if session.python == "2.7": - # The [all] extra is not installable on Python 2.7. - session.install("-e", ".[pandas]", "-c", constraints_path) - else: - session.install("-e", ".[all]", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session diff --git a/samples/create_routine.py b/samples/create_routine.py index d9b221a4f..012c7927a 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -34,7 +34,7 @@ def create_routine(routine_id): bigquery.RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ], diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py index d80085dd3..0fdacaaec 100644 --- a/samples/tests/conftest.py +++ b/samples/tests/conftest.py @@ -126,7 +126,7 @@ def routine_id(client, dataset_id): bigquery.RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] diff --git a/samples/tests/test_routine_samples.py b/samples/tests/test_routine_samples.py index a4467c59a..59ec1fae9 100644 --- a/samples/tests/test_routine_samples.py +++ b/samples/tests/test_routine_samples.py @@ -39,21 +39,21 @@ def test_create_routine_ddl(capsys, random_routine_id, client): bigquery.RoutineArgument( name="arr", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, array_element_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRUCT, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRUCT, struct_type=bigquery_v2.types.StandardSqlStructType( fields=[ bigquery_v2.types.StandardSqlField( name="name", type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.STRING + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.STRING ), ), bigquery_v2.types.StandardSqlField( name="val", type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ), ] diff --git a/scripts/fixup_bigquery_v2_keywords.py b/scripts/fixup_bigquery_v2_keywords.py new file mode 100644 index 000000000..82b46d64e --- /dev/null +++ b/scripts/fixup_bigquery_v2_keywords.py @@ -0,0 +1,181 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import libcst as cst +import pathlib +import sys +from typing import (Any, Callable, Dict, List, Sequence, Tuple) + + +def partition( + predicate: Callable[[Any], bool], + iterator: Sequence[Any] +) -> Tuple[List[Any], List[Any]]: + """A stable, out-of-place partition.""" + results = ([], []) + + for i in iterator: + results[int(predicate(i))].append(i) + + # Returns trueList, falseList + return results[1], results[0] + + +class bigqueryCallTransformer(cst.CSTTransformer): + CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') + METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { + 'delete_model': ('project_id', 'dataset_id', 'model_id', ), + 'get_model': ('project_id', 'dataset_id', 'model_id', ), + 'list_models': ('project_id', 'dataset_id', 'max_results', 'page_token', ), + 'patch_model': ('project_id', 'dataset_id', 'model_id', 'model', ), + + } + + def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: + try: + key = original.func.attr.value + kword_params = self.METHOD_TO_PARAMS[key] + except (AttributeError, KeyError): + # Either not a method from the API or too convoluted to be sure. + return updated + + # If the existing code is valid, keyword args come after positional args. + # Therefore, all positional args must map to the first parameters. + args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) + if any(k.keyword.value == "request" for k in kwargs): + # We've already fixed this file, don't fix it again. + return updated + + kwargs, ctrl_kwargs = partition( + lambda a: not a.keyword.value in self.CTRL_PARAMS, + kwargs + ) + + args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] + ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) + for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) + + request_arg = cst.Arg( + value=cst.Dict([ + cst.DictElement( + cst.SimpleString("'{}'".format(name)), + cst.Element(value=arg.value) + ) + # Note: the args + kwargs looks silly, but keep in mind that + # the control parameters had to be stripped out, and that + # those could have been passed positionally or by keyword. + for name, arg in zip(kword_params, args + kwargs)]), + keyword=cst.Name("request") + ) + + return updated.with_changes( + args=[request_arg] + ctrl_kwargs + ) + + +def fix_files( + in_dir: pathlib.Path, + out_dir: pathlib.Path, + *, + transformer=bigqueryCallTransformer(), +): + """Duplicate the input dir to the output dir, fixing file method calls. + + Preconditions: + * in_dir is a real directory + * out_dir is a real, empty directory + """ + pyfile_gen = ( + pathlib.Path(os.path.join(root, f)) + for root, _, files in os.walk(in_dir) + for f in files if os.path.splitext(f)[1] == ".py" + ) + + for fpath in pyfile_gen: + with open(fpath, 'r') as f: + src = f.read() + + # Parse the code and insert method call fixes. + tree = cst.parse_module(src) + updated = tree.visit(transformer) + + # Create the path and directory structure for the new file. + updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) + updated_path.parent.mkdir(parents=True, exist_ok=True) + + # Generate the updated source file at the corresponding path. + with open(updated_path, 'w') as f: + f.write(updated.code) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="""Fix up source that uses the bigquery client library. + +The existing sources are NOT overwritten but are copied to output_dir with changes made. + +Note: This tool operates at a best-effort level at converting positional + parameters in client method calls to keyword based parameters. + Cases where it WILL FAIL include + A) * or ** expansion in a method call. + B) Calls via function or method alias (includes free function calls) + C) Indirect or dispatched calls (e.g. the method is looked up dynamically) + + These all constitute false negatives. The tool will also detect false + positives when an API method shares a name with another method. +""") + parser.add_argument( + '-d', + '--input-directory', + required=True, + dest='input_dir', + help='the input directory to walk for python files to fix up', + ) + parser.add_argument( + '-o', + '--output-directory', + required=True, + dest='output_dir', + help='the directory to output files fixed via un-flattening', + ) + args = parser.parse_args() + input_dir = pathlib.Path(args.input_dir) + output_dir = pathlib.Path(args.output_dir) + if not input_dir.is_dir(): + print( + f"input directory '{input_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if not output_dir.is_dir(): + print( + f"output directory '{output_dir}' does not exist or is not a directory", + file=sys.stderr, + ) + sys.exit(-1) + + if os.listdir(output_dir): + print( + f"output directory '{output_dir}' is not empty", + file=sys.stderr, + ) + sys.exit(-1) + + fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index 73d9a03ca..2cb57aad2 100644 --- a/setup.py +++ b/setup.py @@ -22,22 +22,23 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "1.28.0" +version = "2.0.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - 'enum34; python_version < "3.4"', - "google-api-core >= 1.21.0, < 2.0dev", + "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "proto-plus >= 1.10.0", + "libcst >= 0.2.5", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", ] extras = { "bqstorage": [ - "google-cloud-bigquery-storage >= 1.0.0, <2.0.0dev", + "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. @@ -50,19 +51,10 @@ "pandas": ["pandas>=0.23.0"], "pyarrow": [ # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 2.0de ; python_version>='3.5'", - "pyarrow >= 0.16.0, < 0.17.0dev ; python_version<'3.5'", + "pyarrow >= 1.0.0, < 2.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "fastparquet": [ - "fastparquet", - "python-snappy", - # llvmlite >= 0.32.0 cannot be installed on Python 3.5 and below - # (building the wheel fails), thus needs to be restricted. - # See: https://github.com/googleapis/python-bigquery/issues/78 - "llvmlite<=0.34.0;python_version>='3.6'", - "llvmlite<=0.31.0;python_version<'3.6'", - ], + "fastparquet": ["fastparquet", "python-snappy", "llvmlite>=0.34.0"], "opentelemetry": [ "opentelemetry-api==0.9b0", "opentelemetry-sdk==0.9b0", @@ -95,7 +87,9 @@ # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ - package for package in setuptools.find_packages() if package.startswith("google") + package + for package in setuptools.PEP420PackageFinder.find() + if package.startswith("google") ] # Determine which namespaces are needed. @@ -118,10 +112,7 @@ "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", @@ -133,7 +124,8 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*", + python_requires=">=3.6", + scripts=["scripts/fixup_bigquery_v2_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/synth.metadata b/synth.metadata index 7fdc4fb28..c47ff1e51 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,30 +3,15 @@ { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "b716e1c8ecd90142b498b95e7f8830835529cf4a" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", - "internalRef": "327026955" + "remote": "git@github.com:plamut/python-bigquery.git", + "sha": "64d666033446f9af669bb8eb9170b8f62d6308e4" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "27f4406999b1eee29e04b09b2423a8e4646c7e24" + "sha": "8a7a3021fe97aa0a3641db642fe2b767f1c8110f" } } ], @@ -40,89 +25,5 @@ "generator": "bazel" } } - ], - "generatedFiles": [ - ".coveragerc", - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-2.7.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/conf.py", - "google/cloud/bigquery_v2/gapic/enums.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", - "google/cloud/bigquery_v2/proto/encryption_config_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_pb2.py", - "google/cloud/bigquery_v2/proto/model_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/model_reference_pb2.py", - "google/cloud/bigquery_v2/proto/model_reference_pb2_grpc.py", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/standard_sql_pb2.py", - "google/cloud/bigquery_v2/proto/standard_sql_pb2_grpc.py", - "google/cloud/bigquery_v2/types.py", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "samples/snippets/README.rst", - "samples/snippets/noxfile.py", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" ] } \ No newline at end of file diff --git a/synth.py b/synth.py index ac20c9aec..501380be2 100644 --- a/synth.py +++ b/synth.py @@ -20,56 +20,73 @@ gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -version = 'v2' +version = "v2" library = gapic.py_library( - service='bigquery', + service="bigquery", version=version, bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", include_protos=True, ) s.move( - [ - library / "google/cloud/bigquery_v2/gapic/enums.py", - library / "google/cloud/bigquery_v2/types.py", - library / "google/cloud/bigquery_v2/proto/location*", - library / "google/cloud/bigquery_v2/proto/encryption_config*", - library / "google/cloud/bigquery_v2/proto/model*", - library / "google/cloud/bigquery_v2/proto/standard_sql*", + library, + excludes=[ + "docs/index.rst", + "README.rst", + "noxfile.py", + "setup.py", + library / f"google/cloud/bigquery/__init__.py", + library / f"google/cloud/bigquery/py.typed", + # There are no public API endpoints for the generated ModelServiceClient, + # thus there's no point in generating it and its tests. + library / f"google/cloud/bigquery_{version}/services/**", + library / f"tests/unit/gapic/bigquery_{version}/**", ], ) -# Fix up proto docs that are missing summary line. -s.replace( - "google/cloud/bigquery_v2/proto/model_pb2.py", - '"""Attributes:', - '"""Protocol buffer.\n\n Attributes:', -) -s.replace( - "google/cloud/bigquery_v2/proto/encryption_config_pb2.py", - '"""Attributes:', - '"""Encryption configuration.\n\n Attributes:', -) - -# Remove non-ascii characters from docstrings for Python 2.7. -# Format quoted strings as plain text. -s.replace("google/cloud/bigquery_v2/proto/*.py", "[“”]", '``') - # ---------------------------------------------------------------------------- # Add templated files # ---------------------------------------------------------------------------- -templated_files = common.py_library(cov_level=100, samples=True, split_system_tests=True) +templated_files = common.py_library( + cov_level=100, + samples=True, + microgenerator=True, + split_system_tests=True, +) # BigQuery has a custom multiprocessing note -s.move(templated_files, excludes=["noxfile.py", "docs/multiprocessing.rst"]) +s.move( + templated_files, + excludes=["noxfile.py", "docs/multiprocessing.rst", ".coveragerc"] +) # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- -python.py_samples() +# python.py_samples() # TODO: why doesn't this work here with Bazel? + +# Do not expose ModelServiceClient, as there is no public API endpoint for the +# models service. +s.replace( + "google/cloud/bigquery_v2/__init__.py", + r"from \.services\.model_service import ModelServiceClient", + "", +) +s.replace( + "google/cloud/bigquery_v2/__init__.py", + r"""["']ModelServiceClient["'],""", + "", +) +# Adjust Model docstring so that Sphinx does not think that "predicted_" is +# a reference to something, issuing a false warning. +s.replace( + "google/cloud/bigquery_v2/types/model.py", + r'will have a "predicted_"', + "will have a `predicted_`", +) s.replace( "docs/conf.py", @@ -77,4 +94,11 @@ '{"members": True, "inherited-members": True}' ) +# Tell Sphinx to ingore autogenerated docs files. +s.replace( + "docs/conf.py", + r'"samples/snippets/README\.rst",', + '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/testing/constraints-2.7.txt b/testing/constraints-2.7.txt deleted file mode 100644 index fafbaa27f..000000000 --- a/testing/constraints-2.7.txt +++ /dev/null @@ -1,9 +0,0 @@ -google-api-core==1.21.0 -google-cloud-core==1.4.1 -google-cloud-storage==1.30.0 -google-resumable-media==0.6.0 -ipython==5.5 -pandas==0.23.0 -pyarrow==0.16.0 -six==1.13.0 -tqdm==4.7.4 \ No newline at end of file diff --git a/testing/constraints-3.5.txt b/testing/constraints-3.5.txt deleted file mode 100644 index a262dbe5f..000000000 --- a/testing/constraints-3.5.txt +++ /dev/null @@ -1,12 +0,0 @@ -google-api-core==1.21.0 -google-cloud-bigquery-storage==1.0.0 -google-cloud-core==1.4.1 -google-resumable-media==0.6.0 -google-cloud-storage==1.30.0 -grpcio==1.32.0 -ipython==5.5 -# pandas 0.23.0 is the first version to work with pyarrow to_pandas. -pandas==0.23.0 -pyarrow==1.0.0 -six==1.13.0 -tqdm==4.7.4 \ No newline at end of file diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index e69de29bb..a9f4faa92 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -0,0 +1,16 @@ +fastparquet==0.4.1 +google-api-core==1.22.2 +google-cloud-bigquery-storage==2.0.0 +google-cloud-core==1.4.1 +google-resumable-media==0.6.0 +grpcio==1.32.0 +ipython==5.5 +libcst==0.2.5 +llvmlite==0.34.0 +# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pandas==0.23.0 +proto-plus==1.10.0 +pyarrow==1.0.0 +python-snappy==0.5.4 +six==1.13.0 +tqdm==4.7.4 diff --git a/tests/system.py b/tests/system.py index 02cc8e139..68fcb918c 100644 --- a/tests/system.py +++ b/tests/system.py @@ -34,11 +34,9 @@ import pkg_resources try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None try: import fastavro # to parse BQ storage client results @@ -1793,57 +1791,11 @@ def test_dbapi_fetchall(self): self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_dbapi_fetch_w_bqstorage_client_large_result_set(self): - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() - - cursor.execute( - """ - SELECT id, `by`, time_ts - FROM `bigquery-public-data.hacker_news.comments` - ORDER BY `id` ASC - LIMIT 100000 - """ - ) - - result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()] - - field_name = operator.itemgetter(0) - fetched_data = [sorted(row.items(), key=field_name) for row in result_rows] - - # Since DB API is not thread safe, only a single result stream should be - # requested by the BQ storage client, meaning that results should arrive - # in the sorted order. - expected_data = [ - [ - ("by", "sama"), - ("id", 15), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 17), - ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)), - ], - [ - ("by", "pg"), - ("id", 22), - ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)), - ], - ] - self.assertEqual(fetched_data, expected_data) - - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_dbapi_fetch_w_bqstorage_client_v1beta1_large_result_set(self): - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) cursor = dbapi.connect(Config.CLIENT, bqstorage_client).cursor() @@ -1901,7 +1853,7 @@ def test_dbapi_dry_run_query(self): self.assertEqual(list(rows), []) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_dbapi_connection_does_not_leak_sockets(self): current_process = psutil.Process() @@ -2331,7 +2283,7 @@ def test_query_results_to_dataframe(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_query_results_to_dataframe_w_bqstorage(self): query = """ @@ -2340,40 +2292,7 @@ def test_query_results_to_dataframe_w_bqstorage(self): LIMIT 10 """ - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": six.text_type, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_query_results_to_dataframe_w_bqstorage_v1beta1(self): - query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - bqstorage_client = bigquery_storage_v1beta1.BigQueryStorageClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2569,7 +2488,7 @@ def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) float64_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.FLOAT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.FLOAT64 ) routine = bigquery.Routine( dataset.routine(routine_name), @@ -2584,7 +2503,7 @@ def test_create_routine(self): bigquery.RoutineArgument( name="arr", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.ARRAY, + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.ARRAY, array_element_type=float64_type, ), ) @@ -2663,7 +2582,7 @@ def _fetch_dataframe(self, query): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_nested_table_to_arrow(self): from google.cloud.bigquery.job import SourceFormat @@ -2699,7 +2618,7 @@ def test_nested_table_to_arrow(self): job_config.schema = schema # Load a table using a local JSON file from memory. Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) @@ -2855,13 +2774,13 @@ def test_list_rows_page_size(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_list_rows_max_results_w_bqstorage(self): table_ref = DatasetReference("bigquery-public-data", "utility_us").table( "country_code_iso" ) - bqstorage_client = bigquery_storage_v1.BigQueryReadClient( + bqstorage_client = bigquery_storage.BigQueryReadClient( credentials=Config.CLIENT._credentials ) diff --git a/tests/unit/enums/test_standard_sql_data_types.py b/tests/unit/enums/test_standard_sql_data_types.py index 6fa4f057f..7f62c46fd 100644 --- a/tests/unit/enums/test_standard_sql_data_types.py +++ b/tests/unit/enums/test_standard_sql_data_types.py @@ -32,7 +32,7 @@ def enum_under_test(): @pytest.fixture def gapic_enum(): """The referential autogenerated enum the enum under test is based on.""" - from google.cloud.bigquery_v2.gapic.enums import StandardSqlDataType + from google.cloud.bigquery_v2.types import StandardSqlDataType return StandardSqlDataType.TypeKind @@ -61,7 +61,10 @@ def test_standard_sql_types_enum_members(enum_under_test, gapic_enum): assert name not in enum_under_test.__members__ -def test_standard_sql_types_enum_docstring(enum_under_test, gapic_enum): +@pytest.mark.skip(reason="Code generator issue, the docstring is not generated.") +def test_standard_sql_types_enum_docstring( + enum_under_test, gapic_enum +): # pragma: NO COVER assert "STRUCT (int):" not in enum_under_test.__doc__ assert "BOOL (int):" in enum_under_test.__doc__ assert "TIME (int):" in enum_under_test.__doc__ diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 90fc09e66..2c0079429 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -19,7 +19,7 @@ import pytest import google.cloud._helpers -from google.cloud.bigquery_v2.gapic import enums +from google.cloud.bigquery_v2 import types KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" @@ -117,7 +117,7 @@ def test_from_api_repr(target_class): assert got.expires == expiration_time assert got.description == u"A friendly description." assert got.friendly_name == u"A friendly name." - assert got.model_type == enums.Model.ModelType.LOGISTIC_REGRESSION + assert got.model_type == types.Model.ModelType.LOGISTIC_REGRESSION assert got.labels == {"greeting": u"こんにちは"} assert got.encryption_configuration.kms_key_name == KMS_KEY_NAME assert got.training_runs[0].training_options.initial_learn_rate == 1.0 @@ -162,7 +162,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert got.expires is None assert got.description is None assert got.friendly_name is None - assert got.model_type == enums.Model.ModelType.MODEL_TYPE_UNSPECIFIED + assert got.model_type == types.Model.ModelType.MODEL_TYPE_UNSPECIFIED assert got.labels == {} assert got.encryption_configuration is None assert len(got.training_runs) == 0 diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 02f703535..b02ace1db 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # Copyright 2019 Google LLC # @@ -63,14 +62,14 @@ def test_ctor_w_properties(target_class): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] body = "x * 3" language = "SQL" return_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) type_ = "SCALAR_FUNCTION" description = "A routine description." @@ -141,14 +140,14 @@ def test_from_api_repr(target_class): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] assert actual_routine.body == "42" assert actual_routine.language == "SQL" assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" diff --git a/tests/unit/routine/test_routine_argument.py b/tests/unit/routine/test_routine_argument.py index 7d17b5fc7..e3bda9539 100644 --- a/tests/unit/routine/test_routine_argument.py +++ b/tests/unit/routine/test_routine_argument.py @@ -28,7 +28,7 @@ def target_class(): def test_ctor(target_class): data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) actual_arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type @@ -51,7 +51,7 @@ def test_from_api_repr(target_class): assert actual_arg.kind == "FIXED_TYPE" assert actual_arg.mode == "IN" assert actual_arg.data_type == bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) @@ -72,7 +72,7 @@ def test_from_api_repr_w_unknown_fields(target_class): def test_eq(target_class): data_type = bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) arg = target_class( name="field_name", kind="FIXED_TYPE", mode="IN", data_type=data_type diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index f4355072a..c1073066d 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -20,7 +20,6 @@ import warnings import mock -import six try: import pandas @@ -300,10 +299,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): ) ) assert pyarrow.types.is_struct(actual) - try: - assert actual.num_fields == len(fields) - except AttributeError: # py27 - assert actual.num_children == len(fields) + assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -348,10 +344,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): ) assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) - try: - assert actual.value_type.num_fields == len(fields) - except AttributeError: # py27 - assert actual.value_type.num_children == len(fields) + assert actual.value_type.num_fields == len(fields) assert actual.value_type.equals(expected_value_type) @@ -553,12 +546,9 @@ def test_bq_to_arrow_schema_w_unknown_type(module_under_test): actual = module_under_test.bq_to_arrow_schema(fields) assert actual is None - if six.PY3: - assert len(warned) == 1 - warning = warned[0] - assert "field3" in str(warning) - else: - assert len(warned) == 0 + assert len(warned) == 1 + warning = warned[0] + assert "field3" in str(warning) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -773,26 +763,6 @@ def test_dataframe_to_bq_schema_dict_sequence(module_under_test): assert returned_schema == expected_schema -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(not six.PY2, reason="Requires Python 2.7") -def test_dataframe_to_bq_schema_w_struct_raises_py27(module_under_test): - dataframe = pandas.DataFrame( - data=[{"struct_field": {"int_col": 1}}, {"struct_field": {"int_col": 2}}] - ) - bq_schema = [ - schema.SchemaField( - "struct_field", - field_type="STRUCT", - fields=[schema.SchemaField("int_col", field_type="INT64")], - ), - ] - - with pytest.raises(ValueError) as excinfo: - module_under_test.dataframe_to_bq_schema(dataframe, bq_schema=bq_schema) - - assert "struct (record) column types is not supported" in str(excinfo.value) - - @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_dataframe_to_arrow_with_multiindex(module_under_test): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c4c604ed0..f44201ab8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -48,7 +48,7 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) -except (ImportError, AttributeError): +except (ImportError, AttributeError): # pragma: NO COVER opentelemetry = None try: import pyarrow @@ -62,9 +62,9 @@ from google.cloud.bigquery.dataset import DatasetReference try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None from test_utils.imports import maybe_fail_import from tests.unit.helpers import make_connection @@ -794,17 +794,17 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_create_bqstorage_client(self): - mock_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) with mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", mock_client + "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): bqstorage_client = client._create_bqstorage_client() @@ -817,8 +817,8 @@ def test_create_bqstorage_client_missing_dependency(self): def fail_bqstorage_import(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage_v1" in name or ( - fromlist is not None and "bigquery_storage_v1" in fromlist + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist ) no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) @@ -2499,7 +2499,7 @@ def test_update_routine(self): RoutineArgument( name="x", data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.enums.StandardSqlDataType.TypeKind.INT64 + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ), ) ] @@ -8032,49 +8032,35 @@ def test_load_table_from_dataframe_struct_fields(self): "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - if six.PY2: - with pytest.raises(ValueError) as exc_info, load_patch: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - job_config=job_config, - location=self.LOCATION, - ) - - err_msg = str(exc_info.value) - assert "struct" in err_msg - assert "not support" in err_msg - - else: - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - with load_patch as load_table_from_file, get_table_patch: - client.load_table_from_dataframe( - dataframe, - self.TABLE_REF, - job_config=job_config, - location=self.LOCATION, - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - job_id=mock.ANY, - job_id_prefix=None, + job_config=job_config, location=self.LOCATION, - project=None, - job_config=mock.ANY, ) - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == schema + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert sent_config.schema == schema @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -8671,14 +8657,9 @@ def test_schema_from_json_with_file_path(self): client = self._make_client() mock_file_path = "/mocked/file.json" - if six.PY2: - open_patch = mock.patch( - "__builtin__.open", mock.mock_open(read_data=file_content) - ) - else: - open_patch = mock.patch( - "builtins.open", new=mock.mock_open(read_data=file_content) - ) + open_patch = mock.patch( + "builtins.open", new=mock.mock_open(read_data=file_content) + ) with open_patch as _mock_file: actual = client.schema_from_json(mock_file_path) @@ -8720,12 +8701,7 @@ def test_schema_from_json_with_file_object(self): ] client = self._make_client() - - if six.PY2: - fake_file = io.BytesIO(file_content) - else: - fake_file = io.StringIO(file_content) - + fake_file = io.StringIO(file_content) actual = client.schema_from_json(fake_file) assert expected == actual @@ -8762,11 +8738,7 @@ def test_schema_to_json_with_file_path(self): client = self._make_client() mock_file_path = "/mocked/file.json" - - if six.PY2: - open_patch = mock.patch("__builtin__.open", mock.mock_open()) - else: - open_patch = mock.patch("builtins.open", mock.mock_open()) + open_patch = mock.patch("builtins.open", mock.mock_open()) with open_patch as mock_file, mock.patch("json.dump") as mock_dump: client.schema_to_json(schema_list, mock_file_path) @@ -8808,10 +8780,7 @@ def test_schema_to_json_with_file_object(self): SchemaField("sales", "FLOAT", "NULLABLE", "total sales"), ] - if six.PY2: - fake_file = io.BytesIO() - else: - fake_file = io.StringIO() + fake_file = io.StringIO() client = self._make_client() diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 0f1be45ee..30fb1292e 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -19,9 +19,9 @@ import six try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None class TestConnection(unittest.TestCase): @@ -41,29 +41,26 @@ def _mock_client(self): return mock_client def _mock_bqstorage_client(self): - if bigquery_storage_v1 is None: - return None - mock_client = mock.create_autospec( - bigquery_storage_v1.client.BigQueryReadClient - ) - mock_client.transport = mock.Mock(spec=["channel"]) - mock_client.transport.channel = mock.Mock(spec=["close"]) + # Assumption: bigquery_storage exists. It's the test's responisbility to + # not use this helper or skip itself if bqstroage is not installed. + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + mock_client._transport = mock.Mock(spec=["channel"]) + mock_client._transport.grpc_channel = mock.Mock(spec=["close"]) return mock_client def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._create_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) - self.assertIs(connection._bqstorage_client, mock_bqstorage_client) + self.assertIs(connection._bqstorage_client, None) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_ctor_w_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection @@ -87,6 +84,9 @@ def test_connect_wo_client(self, mock_client): self.assertIsNotNone(connection._client) self.assertIsNotNone(connection._bqstorage_client) + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) def test_connect_w_client(self): from google.cloud.bigquery.dbapi import connect from google.cloud.bigquery.dbapi import Connection @@ -101,7 +101,7 @@ def test_connect_w_client(self): self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_connect_w_both_clients(self): from google.cloud.bigquery.dbapi import connect @@ -130,7 +130,7 @@ def test_raises_error_if_closed(self): getattr(connection, method)() @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_close_closes_all_created_bigquery_clients(self): client = self._mock_client() @@ -150,10 +150,10 @@ def test_close_closes_all_created_bigquery_clients(self): connection.close() self.assertTrue(client.close.called) - self.assertTrue(bqstorage_client.transport.channel.close.called) + self.assertTrue(bqstorage_client._transport.grpc_channel.close.called) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_close_does_not_close_bigquery_clients_passed_to_it(self): client = self._mock_client() @@ -163,7 +163,7 @@ def test_close_does_not_close_bigquery_clients_passed_to_it(self): connection.close() self.assertFalse(client.close.called) - self.assertFalse(bqstorage_client.transport.channel.called) + self.assertFalse(bqstorage_client._transport.grpc_channel.close.called) def test_close_closes_all_created_cursors(self): connection = self._make_one(client=self._mock_client()) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index bd1d9dc0a..9a1a6b1e8 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -14,7 +14,6 @@ import operator as op import unittest -import warnings import mock import six @@ -27,11 +26,9 @@ from google.api_core import exceptions try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 + from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None from tests.unit.helpers import _to_pyarrow @@ -78,32 +75,17 @@ def _mock_client( return mock_client - def _mock_bqstorage_client(self, rows=None, stream_count=0, v1beta1=False): - from google.cloud.bigquery_storage_v1 import client - from google.cloud.bigquery_storage_v1 import types - from google.cloud.bigquery_storage_v1beta1 import types as types_v1beta1 - + def _mock_bqstorage_client(self, rows=None, stream_count=0): if rows is None: rows = [] - if v1beta1: - mock_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - mock_read_session = mock.MagicMock( - streams=[ - types_v1beta1.Stream(name="streams/stream_{}".format(i)) - for i in range(stream_count) - ] - ) - else: - mock_client = mock.create_autospec(client.BigQueryReadClient) - mock_read_session = mock.MagicMock( - streams=[ - types.ReadStream(name="streams/stream_{}".format(i)) - for i in range(stream_count) - ] - ) + mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + mock_read_session = mock.MagicMock( + streams=[ + bigquery_storage.types.ReadStream(name="streams/stream_{}".format(i)) + for i in range(stream_count) + ] + ) mock_client.create_read_session.return_value = mock_read_session @@ -291,7 +273,7 @@ def test_fetchall_w_row(self): self.assertEqual(rows[0], (1,)) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_fetchall_w_bqstorage_client_fetch_success(self): @@ -345,71 +327,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): self.assertEqual(sorted_row_data, expected_row_data) @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_fetchall_w_bqstorage_client_v1beta1_fetch_success(self): - from google.cloud.bigquery import dbapi - from google.cloud.bigquery import table - - # use unordered data to also test any non-determenistic key order in dicts - row_data = [ - table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}), - ] - bqstorage_streamed_rows = [ - { - "bar": _to_pyarrow(1.2), - "foo": _to_pyarrow(1.1), - "quux": _to_pyarrow(1.4), - "baz": _to_pyarrow(1.3), - }, - { - "bar": _to_pyarrow(2.2), - "foo": _to_pyarrow(2.1), - "quux": _to_pyarrow(2.4), - "baz": _to_pyarrow(2.3), - }, - ] - - mock_client = self._mock_client(rows=row_data) - mock_bqstorage_client = self._mock_bqstorage_client( - stream_count=1, rows=bqstorage_streamed_rows, v1beta1=True - ) - - connection = dbapi.connect( - client=mock_client, bqstorage_client=mock_bqstorage_client, - ) - cursor = connection.cursor() - cursor.execute("SELECT foo, bar FROM some_table") - - with warnings.catch_warnings(record=True) as warned: - rows = cursor.fetchall() - - # a deprecation warning should have been emitted - expected_warnings = [ - warning - for warning in warned - if issubclass(warning.category, DeprecationWarning) - and "v1beta1" in str(warning) - ] - self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") - - # the default client was not used - mock_client.list_rows.assert_not_called() - - # check the data returned - field_value = op.itemgetter(1) - sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] - expected_row_data = [ - [("foo", 1.1), ("bar", 1.2), ("baz", 1.3), ("quux", 1.4)], - [("foo", 2.1), ("bar", 2.2), ("baz", 2.3), ("quux", 2.4)], - ] - - self.assertEqual(sorted_row_data, expected_row_data) - - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_no_rows(self): from google.cloud.bigquery import dbapi @@ -432,7 +350,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): self.assertEqual(rows, []) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): from google.cloud.bigquery import dbapi diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index fb6a46bd6..fb042e18c 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -35,9 +35,9 @@ except ImportError: # pragma: NO COVER pyarrow = None try: - from google.cloud import bigquery_storage_v1 + from google.cloud import bigquery_storage except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage_v1 = None + bigquery_storage = None try: from tqdm import tqdm except (ImportError, AttributeError): # pragma: NO COVER @@ -5667,7 +5667,7 @@ def test_to_dataframe_ddl_query(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_bqstorage(self): query_resource = { @@ -5685,8 +5685,8 @@ def test_to_dataframe_bqstorage(self): client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -5704,9 +5704,9 @@ def test_to_dataframe_bqstorage(self): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage_v1.types.ReadSession( + expected_session = bigquery_storage.types.ReadSession( table=destination_table, - data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + data_format=bigquery_storage.types.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/{}".format(self.PROJECT), @@ -6259,7 +6259,7 @@ def test__contains_order_by(query, expected): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.parametrize( "query", @@ -6295,8 +6295,8 @@ def test_to_dataframe_bqstorage_preserve_order(query): connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.avro_schema.schema = json.dumps( { "type": "record", @@ -6314,8 +6314,8 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage_v1.types.ReadSession( - table=destination_table, data_format=bigquery_storage_v1.enums.DataFormat.ARROW, + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index c4527c837..20be6b755 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -41,7 +41,7 @@ io = pytest.importorskip("IPython.utils.io") tools = pytest.importorskip("IPython.testing.tools") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") -bigquery_storage_v1 = pytest.importorskip("google.cloud.bigquery_storage_v1") +bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") @pytest.fixture(scope="session") @@ -83,8 +83,8 @@ def missing_bq_storage(): def fail_if(name, globals, locals, fromlist, level): # NOTE: *very* simplified, assuming a straightforward absolute import - return "bigquery_storage_v1" in name or ( - fromlist is not None and "bigquery_storage_v1" in fromlist + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist ) return maybe_fail_import(predicate=fail_if) @@ -314,14 +314,14 @@ def test__make_bqstorage_client_false(): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) got = magics._make_bqstorage_client(True, credentials_mock) - assert isinstance(got, bigquery_storage_v1.BigQueryReadClient) + assert isinstance(got, bigquery_storage.BigQueryReadClient) def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @@ -338,7 +338,7 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): @@ -396,7 +396,7 @@ def test_extension_load(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_without_optional_arguments(monkeypatch): ip = IPython.get_ipython() @@ -410,14 +410,14 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -559,7 +559,7 @@ def test_bigquery_magic_clears_display_in_verbose_mode(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): ip = IPython.get_ipython() @@ -573,14 +573,14 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -623,7 +623,7 @@ def warning_match(warning): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) def test_bigquery_magic_with_rest_client_requested(monkeypatch): ip = IPython.get_ipython() @@ -637,9 +637,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): monkeypatch.setattr(magics.context, "_credentials", mock_credentials) # Mock out the BigQuery Storage API. - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) sql = "SELECT 17 AS num" @@ -841,7 +841,7 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_bigquery_magic_w_table_id_and_bqstorage_client(): @@ -864,14 +864,14 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): "google.cloud.bigquery.magics.magics.bigquery.Client", autospec=True ) - bqstorage_mock = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_instance_mock = mock.create_autospec( - bigquery_storage_v1.BigQueryReadClient, instance=True + bigquery_storage.BigQueryReadClient, instance=True ) - bqstorage_instance_mock.transport = mock.Mock() + bqstorage_instance_mock._transport = mock.Mock() bqstorage_mock.return_value = bqstorage_instance_mock bqstorage_client_patch = mock.patch( - "google.cloud.bigquery_storage_v1.BigQueryReadClient", bqstorage_mock + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock ) table_id = "bigquery-public-data.samples.shakespeare" diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 1c35b0a82..09afa7531 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -25,7 +25,7 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) -except ImportError: +except ImportError: # pragma: NO COVER opentelemetry = None import pytest from six.moves import reload_module diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 9f7ee7bb3..71bf6b5ae 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -206,15 +206,15 @@ def test_to_standard_sql_simple_type(self): sql_type = self._get_standard_sql_data_type_class() examples = ( # a few legacy types - ("INTEGER", sql_type.INT64), - ("FLOAT", sql_type.FLOAT64), - ("BOOLEAN", sql_type.BOOL), - ("DATETIME", sql_type.DATETIME), + ("INTEGER", sql_type.TypeKind.INT64), + ("FLOAT", sql_type.TypeKind.FLOAT64), + ("BOOLEAN", sql_type.TypeKind.BOOL), + ("DATETIME", sql_type.TypeKind.DATETIME), # a few standard types - ("INT64", sql_type.INT64), - ("FLOAT64", sql_type.FLOAT64), - ("BOOL", sql_type.BOOL), - ("GEOGRAPHY", sql_type.GEOGRAPHY), + ("INT64", sql_type.TypeKind.INT64), + ("FLOAT64", sql_type.TypeKind.FLOAT64), + ("BOOL", sql_type.TypeKind.BOOL), + ("GEOGRAPHY", sql_type.TypeKind.GEOGRAPHY), ) for legacy_type, standard_type in examples: field = self._make_one("some_field", legacy_type) @@ -258,26 +258,26 @@ def test_to_standard_sql_struct_type(self): # level 2 fields sub_sub_field_date = types.StandardSqlField( - name="date_field", type=sql_type(type_kind=sql_type.DATE) + name="date_field", type=sql_type(type_kind=sql_type.TypeKind.DATE) ) sub_sub_field_time = types.StandardSqlField( - name="time_field", type=sql_type(type_kind=sql_type.TIME) + name="time_field", type=sql_type(type_kind=sql_type.TypeKind.TIME) ) # level 1 fields sub_field_struct = types.StandardSqlField( - name="last_used", type=sql_type(type_kind=sql_type.STRUCT) + name="last_used", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) sub_field_struct.type.struct_type.fields.extend( [sub_sub_field_date, sub_sub_field_time] ) sub_field_bytes = types.StandardSqlField( - name="image_content", type=sql_type(type_kind=sql_type.BYTES) + name="image_content", type=sql_type(type_kind=sql_type.TypeKind.BYTES) ) # level 0 (top level) expected_result = types.StandardSqlField( - name="image_usage", type=sql_type(type_kind=sql_type.STRUCT) + name="image_usage", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) expected_result.type.struct_type.fields.extend( [sub_field_bytes, sub_field_struct] @@ -304,8 +304,8 @@ def test_to_standard_sql_array_type_simple(self): sql_type = self._get_standard_sql_data_type_class() # construct expected result object - expected_sql_type = sql_type(type_kind=sql_type.ARRAY) - expected_sql_type.array_element_type.type_kind = sql_type.INT64 + expected_sql_type = sql_type(type_kind=sql_type.TypeKind.ARRAY) + expected_sql_type.array_element_type.type_kind = sql_type.TypeKind.INT64 expected_result = types.StandardSqlField( name="valid_numbers", type=expected_sql_type ) @@ -323,19 +323,19 @@ def test_to_standard_sql_array_type_struct(self): # define person STRUCT name_field = types.StandardSqlField( - name="name", type=sql_type(type_kind=sql_type.STRING) + name="name", type=sql_type(type_kind=sql_type.TypeKind.STRING) ) age_field = types.StandardSqlField( - name="age", type=sql_type(type_kind=sql_type.INT64) + name="age", type=sql_type(type_kind=sql_type.TypeKind.INT64) ) person_struct = types.StandardSqlField( - name="person_info", type=sql_type(type_kind=sql_type.STRUCT) + name="person_info", type=sql_type(type_kind=sql_type.TypeKind.STRUCT) ) person_struct.type.struct_type.fields.extend([name_field, age_field]) # define expected result - an ARRAY of person structs expected_sql_type = sql_type( - type_kind=sql_type.ARRAY, array_element_type=person_struct.type + type_kind=sql_type.TypeKind.ARRAY, array_element_type=person_struct.type ) expected_result = types.StandardSqlField( name="known_people", type=expected_sql_type @@ -358,7 +358,9 @@ def test_to_standard_sql_unknown_type(self): standard_field = field.to_standard_sql() self.assertEqual(standard_field.name, "weird_field") - self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED) + self.assertEqual( + standard_field.type.type_kind, sql_type.TypeKind.TYPE_KIND_UNSPECIFIED + ) def test___eq___wrong_type(self): field = self._make_one("test", "STRING") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 10bedfee1..12169658e 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -13,7 +13,6 @@ # limitations under the License. import datetime as dt -import itertools import logging import time import unittest @@ -26,19 +25,13 @@ import google.api_core.exceptions try: - from google.cloud import bigquery_storage_v1 - from google.cloud import bigquery_storage_v1beta1 - from google.cloud.bigquery_storage_v1.gapic.transports import ( - big_query_read_grpc_transport, - ) - from google.cloud.bigquery_storage_v1beta1.gapic.transports import ( - big_query_storage_grpc_transport as big_query_storage_grpc_transport_v1beta1, + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage_v1.services.big_query_read.transports import ( + grpc as big_query_read_grpc_transport, ) except ImportError: # pragma: NO COVER - bigquery_storage_v1 = None - bigquery_storage_v1beta1 = None + bigquery_storage = None big_query_read_grpc_transport = None - big_query_storage_grpc_transport_v1beta1 = None try: import pandas @@ -1846,7 +1839,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_max_results_w_create_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -1886,15 +1879,15 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut from google.cloud.bigquery_storage_v1 import reader - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -1902,7 +1895,7 @@ def test_to_arrow_w_bqstorage(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession(streams=streams) + session = bigquery_storage.types.ReadSession(streams=streams) arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.int64()), @@ -1963,23 +1956,23 @@ def test_to_arrow_w_bqstorage(self): self.assertEqual(actual_tbl.num_rows, total_rows) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1.types.ReadSession() + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -1994,7 +1987,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): ) row_iterator.to_arrow(create_bqstorage_client=True) mock_client._create_bqstorage_client.assert_called_once() - bqstorage_client.transport.channel.close.assert_called_once() + bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): @@ -2025,14 +2018,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_arrow_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() arrow_schema = pyarrow.schema( [ pyarrow.field("colA", pyarrow.string()), @@ -2157,7 +2150,7 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_iterable_w_bqstorage(self): @@ -2173,8 +2166,8 @@ def test_to_dataframe_iterable_w_bqstorage(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -2182,7 +2175,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2225,7 +2218,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): self.assertEqual(len(got), total_pages) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): @@ -2790,19 +2783,19 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_creates_client(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut mock_client = _mock_client() - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) mock_client._create_bqstorage_client.return_value = bqstorage_client - session = bigquery_storage_v1.types.ReadSession() + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( mock_client, @@ -2817,18 +2810,18 @@ def test_to_dataframe_w_bqstorage_creates_client(self): ) row_iterator.to_dataframe(create_bqstorage_client=True) mock_client._create_bqstorage_client.assert_called_once() - bqstorage_client.transport.channel.close.assert_called_once() + bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_no_streams(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2848,55 +2841,16 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertEqual(list(got), column_names) self.assertTrue(got.empty) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_w_bqstorage_v1beta1_no_streams(self): - from google.cloud.bigquery import schema - from google.cloud.bigquery import table as mut - - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - session = bigquery_storage_v1beta1.types.ReadSession() - bqstorage_client.create_read_session.return_value = session - - row_iterator = mut.RowIterator( - _mock_client(), - api_request=None, - path=None, - schema=[ - schema.SchemaField("colA", "INTEGER"), - schema.SchemaField("colC", "FLOAT"), - schema.SchemaField("colB", "STRING"), - ], - table=mut.TableReference.from_string("proj.dset.tbl"), - ) - - with warnings.catch_warnings(record=True) as warned: - got = row_iterator.to_dataframe(bqstorage_client) - - column_names = ["colA", "colC", "colB"] - self.assertEqual(list(got), column_names) - self.assertTrue(got.empty) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue( - "Support for BigQuery Storage v1beta1 clients is deprecated" in str(warning) - ) - @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_logs_session(self): from google.cloud.bigquery.table import Table - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession() + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() session.name = "projects/test-proj/locations/us/sessions/SOMESESSION" bqstorage_client.create_read_session.return_value = session mock_logger = mock.create_autospec(logging.Logger) @@ -2914,7 +2868,7 @@ def test_to_dataframe_w_bqstorage_logs_session(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_empty_streams(self): @@ -2930,8 +2884,8 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -2969,7 +2923,7 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_nonempty(self): @@ -2985,8 +2939,8 @@ def test_to_dataframe_w_bqstorage_nonempty(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) streams = [ @@ -2994,7 +2948,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3045,103 +2999,11 @@ def test_to_dataframe_w_bqstorage_nonempty(self): self.assertEqual(len(got.index), total_rows) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_dataframe_w_bqstorage_v1beta1_nonempty(self): - from google.cloud.bigquery import schema - from google.cloud.bigquery import table as mut - from google.cloud.bigquery_storage_v1beta1 import reader - - arrow_fields = [ - pyarrow.field("colA", pyarrow.int64()), - # Not alphabetical to test column order. - pyarrow.field("colC", pyarrow.float64()), - pyarrow.field("colB", pyarrow.utf8()), - ] - arrow_schema = pyarrow.schema(arrow_fields) - - bqstorage_client = mock.create_autospec( - bigquery_storage_v1beta1.BigQueryStorageClient - ) - bqstorage_client.transport = mock.create_autospec( - big_query_storage_grpc_transport_v1beta1.BigQueryStorageGrpcTransport - ) - streams = [ - # Use two streams we want to check frames are read from each stream. - {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, - {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, - ] - session = bigquery_storage_v1beta1.types.ReadSession( - streams=streams, - arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, - ) - bqstorage_client.create_read_session.return_value = session - - mock_rowstream = mock.create_autospec(reader.ReadRowsStream) - bqstorage_client.read_rows.return_value = mock_rowstream - - mock_rows = mock.create_autospec(reader.ReadRowsIterable) - mock_rowstream.rows.return_value = mock_rows - page_items = [ - pyarrow.array([1, -1]), - pyarrow.array([2.0, 4.0]), - pyarrow.array(["abc", "def"]), - ] - page_record_batch = pyarrow.RecordBatch.from_arrays( - page_items, schema=arrow_schema - ) - mock_page = mock.create_autospec(reader.ReadRowsPage) - mock_page.to_arrow.return_value = page_record_batch - mock_pages = (mock_page, mock_page, mock_page) - type(mock_rows).pages = mock.PropertyMock(return_value=mock_pages) - - schema = [ - schema.SchemaField("colA", "IGNORED"), - schema.SchemaField("colC", "IGNORED"), - schema.SchemaField("colB", "IGNORED"), - ] - - row_iterator = mut.RowIterator( - _mock_client(), - None, # api_request: ignored - None, # path: ignored - schema, - table=mut.TableReference.from_string("proj.dset.tbl"), - selected_fields=schema, - ) - - with warnings.catch_warnings(record=True) as warned: - got = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - # Was a deprecation warning emitted? - expected_warnings = [ - warning - for warning in warned - if issubclass(warning.category, DeprecationWarning) - and "v1beta1" in str(warning) - ] - self.assertEqual(len(expected_warnings), 1, "Deprecation warning not raised.") - - # Are the columns in the expected order? - column_names = ["colA", "colC", "colB"] - self.assertEqual(list(got), column_names) - - # Have expected number of rows? - total_pages = len(streams) * len(mock_pages) - total_rows = len(page_items[0]) * total_pages - self.assertEqual(len(got.index), total_rows) - - # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): @@ -3156,12 +3018,12 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client.create_read_session.return_value = session mock_rowstream = mock.create_autospec(reader.ReadRowsStream) @@ -3195,7 +3057,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(tqdm is None, "Requires `tqdm`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -3211,14 +3073,14 @@ def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock): arrow_fields = [pyarrow.field("testcol", pyarrow.int64())] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) streams = [ # Use two streams we want to check that progress bar updates are # sent from each stream. {"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}, {"name": "/projects/proj/dataset/dset/tables/tbl/streams/5678"}, ] - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=streams, arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3274,7 +3136,7 @@ def blocking_to_arrow(*args, **kwargs): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): @@ -3293,8 +3155,8 @@ def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): ] arrow_schema = pyarrow.schema(arrow_fields) - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - session = bigquery_storage_v1.types.ReadSession( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession( streams=[ # Use multiple streams because one will fail with a # KeyboardInterrupt, and we want to check that the other streams @@ -3393,12 +3255,12 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_raises_auth_error(self): from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) bqstorage_client.create_read_session.side_effect = google.api_core.exceptions.Forbidden( "TEST BigQuery Storage API not enabled. TEST" ) @@ -3412,13 +3274,13 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self): row_iterator.to_dataframe(bqstorage_client=bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_partition(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3432,13 +3294,13 @@ def test_to_dataframe_w_bqstorage_partition(self): row_iterator.to_dataframe(bqstorage_client) @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) def test_to_dataframe_w_bqstorage_snapshot(self): from google.cloud.bigquery import schema from google.cloud.bigquery import table as mut - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) row_iterator = mut.RowIterator( _mock_client(), @@ -3453,7 +3315,7 @@ def test_to_dataframe_w_bqstorage_snapshot(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( - bigquery_storage_v1 is None, "Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): @@ -3472,11 +3334,11 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): arrow_schema = pyarrow.schema(arrow_fields) # create a mock BQ storage client - bqstorage_client = mock.create_autospec(bigquery_storage_v1.BigQueryReadClient) - bqstorage_client.transport = mock.create_autospec( + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - session = bigquery_storage_v1.types.ReadSession( + session = bigquery_storage.types.ReadSession( streams=[{"name": "/projects/proj/dataset/dset/tables/tbl/streams/1234"}], arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, ) @@ -3560,7 +3422,7 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): ) # Don't close the client if it was passed in. - bqstorage_client.transport.channel.close.assert_not_called() + bqstorage_client._transport.grpc_channel.close.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): @@ -4003,7 +3865,7 @@ def test_set_expiration_w_none(self): @pytest.mark.skipif( - bigquery_storage_v1 is None, reason="Requires `google-cloud-bigquery-storage`" + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @pytest.mark.parametrize( "table_path", @@ -4022,43 +3884,3 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected - - -@pytest.mark.skipif( - bigquery_storage_v1beta1 is None, reason="Requires `google-cloud-bigquery-storage`" -) -def test_table_reference_to_bqstorage_v1beta1(): - from google.cloud.bigquery import table as mut - - # Can't use parametrized pytest because bigquery_storage_v1beta1 may not be - # available. - expected = bigquery_storage_v1beta1.types.TableReference( - project_id="my-project", dataset_id="my_dataset", table_id="my_table" - ) - cases = ( - "my-project.my_dataset.my_table", - "my-project.my_dataset.my_table$20181225", - "my-project.my_dataset.my_table@1234567890", - "my-project.my_dataset.my_table$20181225@1234567890", - ) - - classes = (mut.TableReference, mut.Table, mut.TableListItem) - - for case, cls in itertools.product(cases, classes): - got = cls.from_string(case).to_bqstorage(v1beta1=True) - assert got == expected - - -@unittest.skipIf( - bigquery_storage_v1beta1 is None, "Requires `google-cloud-bigquery-storage`" -) -def test_table_reference_to_bqstorage_v1beta1_raises_import_error(): - from google.cloud.bigquery import table as mut - - classes = (mut.TableReference, mut.Table, mut.TableListItem) - for cls in classes: - with mock.patch.object(mut, "bigquery_storage_v1beta1", None), pytest.raises( - ValueError - ) as exc_context: - cls.from_string("my-project.my_dataset.my_table").to_bqstorage(v1beta1=True) - assert mut._NO_BQSTORAGE_ERROR in str(exc_context.value) From 114489e5119c1bcc3b6508b7d87714b5b725b74f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Oct 2020 00:22:05 +0200 Subject: [PATCH 004/341] chore: Release v2.0.0 (#284) This pull request was generated using releasetool. --- CHANGELOG.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8d367f73..3dac7a0f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 2.0.0 + +09-30-2020 14:51 PDT + + +### Implementation Changes + +- Transition the library to microgenerator. ([#278](https://github.com/googleapis/python-bigquery/pull/278)) + This is a **breaking change** that **drops support for Python 2.7 and 3.5** and brings a few other changes. + See [migration guide](https://googleapis.dev/python/bigquery/latest/UPGRADING.html) for more info. + + + +### Internal / Testing Changes + +- Update protoc-generated comments (via synth). ([#270](https://github.com/googleapis/python-bigquery/pull/270)) +- Add CI secrets manager (via synth). ([#271](https://github.com/googleapis/python-bigquery/pull/271)) + ## [1.28.0](https://www.github.com/googleapis/python-bigquery/compare/v1.27.2...v1.28.0) (2020-09-22) From 2779586ef96a1c5cfdada429345248c430896140 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Oct 2020 18:49:59 +0200 Subject: [PATCH 005/341] chore(deps): update dependency google-cloud-bigquery to v2 (#287) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7fe839119..6edca4f10 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==1.26.1 +google-cloud-bigquery[pandas,bqstorage,pyarrow]==2.0.0 google-auth-oauthlib==0.4.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 9da28e52f0837f06fedf1819595b44bc2606211d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 6 Oct 2020 11:58:59 -0500 Subject: [PATCH 006/341] test: update tests to support latest google-cloud-core (#276) `google-cloud-core` version 1.4.2 populates `prettyPrint=false` by default. Update the connection tests to expect a value for `prettyPrint`. --- tests/unit/test__http.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 4da805d48..691c4c802 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -35,15 +35,33 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + conn = self._make_one(object()) - URI = "/".join([conn.DEFAULT_API_ENDPOINT, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), conn.API_BASE_URL) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_custom_endpoint(self): - custom_endpoint = "https://www.foo-googleapis.com" + from six.moves.urllib.parse import parse_qsl + from six.moves.urllib.parse import urlsplit + + custom_endpoint = "https://foo-bigquery.googleapis.com" conn = self._make_one(object(), api_endpoint=custom_endpoint) - URI = "/".join([custom_endpoint, "bigquery", conn.API_VERSION, "foo"]) - self.assertEqual(conn.build_api_url("/foo"), URI) + uri = conn.build_api_url("/foo") + scheme, netloc, path, qs, _ = urlsplit(uri) + self.assertEqual("%s://%s" % (scheme, netloc), custom_endpoint) + self.assertEqual(path, "/".join(["", "bigquery", conn.API_VERSION, "foo"])) + parms = dict(parse_qsl(qs)) + pretty_print = parms.pop("prettyPrint", "false") + self.assertEqual(pretty_print, "false") + self.assertEqual(parms, {}) def test_build_api_url_w_extra_query_params(self): from six.moves.urllib.parse import parse_qsl From 9090e1ccd8825a97835325b4829f6e7ecfd9ea88 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 6 Oct 2020 12:57:37 -0500 Subject: [PATCH 007/341] feat: add constants for MONTH and YEAR time partitioning types (#283) Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- google/cloud/bigquery/table.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 902a7040a..a72bacb74 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1980,6 +1980,12 @@ class TimePartitioningType(object): HOUR = "HOUR" """str: Generates one partition per hour.""" + MONTH = "MONTH" + """str: Generates one partition per month.""" + + YEAR = "YEAR" + """str: Generates one partition per year.""" + class TimePartitioning(object): """Configures time-based partitioning for a table. @@ -1987,13 +1993,24 @@ class TimePartitioning(object): Args: type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): Specifies the type of time partitioning to perform. Defaults to - :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`, - which is the only currently supported type. + :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. + + Supported values are: + + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH` + * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR` + field (Optional[str]): If set, the table is partitioned by this field. If not set, the table is partitioned by pseudo column ``_PARTITIONTIME``. The field - must be a top-level ``TIMESTAMP`` or ``DATE`` field. Its mode must - be ``NULLABLE`` or ``REQUIRED``. + must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE`` + field. Its mode must be ``NULLABLE`` or ``REQUIRED``. + + See the `time-unit column-partitioned tables guide + `_ + in the BigQuery documentation. expiration_ms(Optional[int]): Number of milliseconds for which to keep the storage for a partition. From d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 7 Oct 2020 06:00:18 +1100 Subject: [PATCH 008/341] docs(samples): add create_table_clustered code snippet (#291) * docs(samples): add create_table_clustered code snippet * docs(samples): add create_table_clustered code snippet * fix unit test and lint Co-authored-by: Tim Swast --- docs/usage/tables.rst | 9 +++++ samples/create_table_clustered.py | 42 ++++++++++++++++++++ samples/tests/test_create_table_clustered.py | 22 ++++++++++ 3 files changed, 73 insertions(+) create mode 100644 samples/create_table_clustered.py create mode 100644 samples/tests/test_create_table_clustered.py diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 27af7c7df..7afca05e2 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -58,6 +58,15 @@ Create an empty table with the :start-after: [START bigquery_create_table] :end-before: [END bigquery_create_table] +Create a clustered table with the +:func:`~google.cloud.bigquery.client.Client.create_table` method: + +.. literalinclude:: ../samples/create_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_create_table_clustered] + :end-before: [END bigquery_create_table_clustered] + Create an integer range partitioned table with the :func:`~google.cloud.bigquery.client.Client.create_table` method: diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py new file mode 100644 index 000000000..2b45b747e --- /dev/null +++ b/samples/create_table_clustered.py @@ -0,0 +1,42 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_table_clustered(table_id): + + # [START bigquery_create_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + schema = [ + bigquery.SchemaField("full_name", "STRING"), + bigquery.SchemaField("city", "STRING"), + bigquery.SchemaField("zipcode", "INTEGER"), + ] + + table = bigquery.Table(table_id, schema=schema) + table.clustering_fields = ["city", "zipcode"] + table = client.create_table(table) # Make an API request. + print( + "Created clustered table {}.{}.{}".format( + table.project, table.dataset_id, table.table_id + ) + ) + # [END bigquery_create_table_clustered] + return table diff --git a/samples/tests/test_create_table_clustered.py b/samples/tests/test_create_table_clustered.py new file mode 100644 index 000000000..8eab5d48b --- /dev/null +++ b/samples/tests/test_create_table_clustered.py @@ -0,0 +1,22 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import create_table_clustered + + +def test_create_table_clustered(capsys, random_table_id): + table = create_table_clustered.create_table_clustered(random_table_id) + out, _ = capsys.readouterr() + assert "Created clustered table {}".format(random_table_id) in out + assert table.clustering_fields == ["city", "zipcode"] From b54f86769c982ce5c8fcbf3889f82450428bb40c Mon Sep 17 00:00:00 2001 From: Avihay Kain <2963806+grooveygr@users.noreply.github.com> Date: Wed, 7 Oct 2020 17:38:43 +0300 Subject: [PATCH 009/341] perf: remove redundant array deepcopy (#26) * perf(bigquery): remove redundant array deepcopy deepcopy can be a very costly operation when considering large arrays with complex nested objects. refactor helpers to allow recursive conversion without copying arrays. * add check to ignore REPEATED mode * Update google/cloud/bigquery/_helpers.py Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Co-authored-by: Tres Seaver Co-authored-by: Tim Swast Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> --- google/cloud/bigquery/_helpers.py | 39 +++++++++++++++++++++++-------- tests/unit/test__helpers.py | 35 +++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 47851d42c..b59b3d794 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -15,7 +15,6 @@ """Shared helper functions for BigQuery API classes.""" import base64 -import copy import datetime import decimal import re @@ -397,13 +396,9 @@ def _repeated_field_to_json(field, row_value): Returns: List[Any]: A list of JSON-serializable objects. """ - # Remove the REPEATED, but keep the other fields. This allows us to process - # each item as if it were a top-level field. - item_field = copy.deepcopy(field) - item_field._mode = "NULLABLE" values = [] for item in row_value: - values.append(_field_to_json(item_field, item)) + values.append(_single_field_to_json(field, item)) return values @@ -462,6 +457,33 @@ def _record_field_to_json(fields, row_value): return record +def _single_field_to_json(field, row_value): + """Convert a single field into JSON-serializable values. + + Ignores mode so that this can function for ARRAY / REPEATING fields + without requiring a deepcopy of the field. See: + https://github.com/googleapis/python-bigquery/issues/6 + + Args: + field (google.cloud.bigquery.schema.SchemaField): + The SchemaField to use for type conversion and field name. + + row_value (Any): + Scalar or Struct to be inserted. The type + is inferred from the SchemaField's field_type. + + Returns: + Any: A JSON-serializable object. + """ + if row_value is None: + return None + + if field.field_type == "RECORD": + return _record_field_to_json(field.fields, row_value) + + return _scalar_field_to_json(field, row_value) + + def _field_to_json(field, row_value): """Convert a field into JSON-serializable values. @@ -483,10 +505,7 @@ def _field_to_json(field, row_value): if field.mode == "REPEATED": return _repeated_field_to_json(field, row_value) - if field.field_type == "RECORD": - return _record_field_to_json(field.fields, row_value) - - return _scalar_field_to_json(field, row_value) + return _single_field_to_json(field, row_value) def _snake_to_camel_case(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 28ebe8144..16c4fb8a5 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -806,6 +806,41 @@ def test_w_known_field_type(self): self.assertEqual(converted, str(original)) +class Test_single_field_to_json(unittest.TestCase): + def _call_fut(self, field, value): + from google.cloud.bigquery._helpers import _single_field_to_json + + return _single_field_to_json(field, value) + + def test_w_none(self): + field = _make_field("INT64") + original = None + converted = self._call_fut(field, original) + self.assertIsNone(converted) + + def test_w_record(self): + subfields = [ + _make_field("INT64", name="one"), + _make_field("STRING", name="two"), + ] + field = _make_field("RECORD", fields=subfields) + original = {"one": 42, "two": "two"} + converted = self._call_fut(field, original) + self.assertEqual(converted, {"one": "42", "two": "two"}) + + def test_w_scalar(self): + field = _make_field("INT64") + original = 42 + converted = self._call_fut(field, original) + self.assertEqual(converted, str(original)) + + def test_w_scalar_ignores_mode(self): + field = _make_field("STRING", mode="REPEATED") + original = "hello world" + converted = self._call_fut(field, original) + self.assertEqual(converted, original) + + class Test_repeated_field_to_json(unittest.TestCase): def _call_fut(self, field, value): from google.cloud.bigquery._helpers import _repeated_field_to_json From 31644d380b35a76a9147801a4b6b0271c246fd0c Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 7 Oct 2020 10:25:05 -0700 Subject: [PATCH 010/341] chore(python): skip reporting coverage for namespace package (#279) * chore(python): remove note about editable installs `pip install -e .` is supported and is how we install the library for tests. Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Tue Sep 22 12:06:12 2020 -0600 Source-Repo: googleapis/synthtool Source-Sha: a651c5fb763c69a921aecdd3e1d8dc51dbf20f8d Source-Link: https://github.com/googleapis/synthtool/commit/a651c5fb763c69a921aecdd3e1d8dc51dbf20f8d * chore(python): skip reporting coverage for namespace package Source-Author: Tres Seaver Source-Date: Wed Sep 23 10:58:13 2020 -0400 Source-Repo: googleapis/synthtool Source-Sha: f3c04883d6c43261ff13db1f52d03a283be06871 Source-Link: https://github.com/googleapis/synthtool/commit/f3c04883d6c43261ff13db1f52d03a283be06871 Co-authored-by: Tim Swast --- .coveragerc | 5 ++++- synth.metadata | 14 +++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.coveragerc b/.coveragerc index dd39c8546..0d8e6297d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -17,6 +17,8 @@ # Generated by synthtool. DO NOT EDIT! [run] branch = True +omit = + google/cloud/__init__.py [report] fail_under = 100 @@ -32,4 +34,5 @@ omit = */gapic/*.py */proto/*.py */core/*.py - */site-packages/*.py \ No newline at end of file + */site-packages/*.py + google/cloud/__init__.py diff --git a/synth.metadata b/synth.metadata index c47ff1e51..d40e66dac 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,15 +3,23 @@ { "git": { "name": ".", - "remote": "git@github.com:plamut/python-bigquery.git", - "sha": "64d666033446f9af669bb8eb9170b8f62d6308e4" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "fbbe0cb0ea22161d81f1e5504bb89b55e4198634" + } + }, + { + "git": { + "name": "googleapis", + "remote": "https://github.com/googleapis/googleapis.git", + "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", + "internalRef": "327026955" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "8a7a3021fe97aa0a3641db642fe2b767f1c8110f" + "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" } } ], From c05593094c1405f752b2c51b15202a6dbb5cb83f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 8 Oct 2020 10:19:04 -0500 Subject: [PATCH 011/341] fix: remove unnecessary dependency on libcst (#308) * fix: remove unnecessary dependency on libcst * remove scripts Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- scripts/fixup_bigquery_v2_keywords.py | 181 -------------------------- setup.py | 2 - synth.py | 1 + 3 files changed, 1 insertion(+), 183 deletions(-) delete mode 100644 scripts/fixup_bigquery_v2_keywords.py diff --git a/scripts/fixup_bigquery_v2_keywords.py b/scripts/fixup_bigquery_v2_keywords.py deleted file mode 100644 index 82b46d64e..000000000 --- a/scripts/fixup_bigquery_v2_keywords.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*- - -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import argparse -import os -import libcst as cst -import pathlib -import sys -from typing import (Any, Callable, Dict, List, Sequence, Tuple) - - -def partition( - predicate: Callable[[Any], bool], - iterator: Sequence[Any] -) -> Tuple[List[Any], List[Any]]: - """A stable, out-of-place partition.""" - results = ([], []) - - for i in iterator: - results[int(predicate(i))].append(i) - - # Returns trueList, falseList - return results[1], results[0] - - -class bigqueryCallTransformer(cst.CSTTransformer): - CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata') - METHOD_TO_PARAMS: Dict[str, Tuple[str]] = { - 'delete_model': ('project_id', 'dataset_id', 'model_id', ), - 'get_model': ('project_id', 'dataset_id', 'model_id', ), - 'list_models': ('project_id', 'dataset_id', 'max_results', 'page_token', ), - 'patch_model': ('project_id', 'dataset_id', 'model_id', 'model', ), - - } - - def leave_Call(self, original: cst.Call, updated: cst.Call) -> cst.CSTNode: - try: - key = original.func.attr.value - kword_params = self.METHOD_TO_PARAMS[key] - except (AttributeError, KeyError): - # Either not a method from the API or too convoluted to be sure. - return updated - - # If the existing code is valid, keyword args come after positional args. - # Therefore, all positional args must map to the first parameters. - args, kwargs = partition(lambda a: not bool(a.keyword), updated.args) - if any(k.keyword.value == "request" for k in kwargs): - # We've already fixed this file, don't fix it again. - return updated - - kwargs, ctrl_kwargs = partition( - lambda a: not a.keyword.value in self.CTRL_PARAMS, - kwargs - ) - - args, ctrl_args = args[:len(kword_params)], args[len(kword_params):] - ctrl_kwargs.extend(cst.Arg(value=a.value, keyword=cst.Name(value=ctrl)) - for a, ctrl in zip(ctrl_args, self.CTRL_PARAMS)) - - request_arg = cst.Arg( - value=cst.Dict([ - cst.DictElement( - cst.SimpleString("'{}'".format(name)), - cst.Element(value=arg.value) - ) - # Note: the args + kwargs looks silly, but keep in mind that - # the control parameters had to be stripped out, and that - # those could have been passed positionally or by keyword. - for name, arg in zip(kword_params, args + kwargs)]), - keyword=cst.Name("request") - ) - - return updated.with_changes( - args=[request_arg] + ctrl_kwargs - ) - - -def fix_files( - in_dir: pathlib.Path, - out_dir: pathlib.Path, - *, - transformer=bigqueryCallTransformer(), -): - """Duplicate the input dir to the output dir, fixing file method calls. - - Preconditions: - * in_dir is a real directory - * out_dir is a real, empty directory - """ - pyfile_gen = ( - pathlib.Path(os.path.join(root, f)) - for root, _, files in os.walk(in_dir) - for f in files if os.path.splitext(f)[1] == ".py" - ) - - for fpath in pyfile_gen: - with open(fpath, 'r') as f: - src = f.read() - - # Parse the code and insert method call fixes. - tree = cst.parse_module(src) - updated = tree.visit(transformer) - - # Create the path and directory structure for the new file. - updated_path = out_dir.joinpath(fpath.relative_to(in_dir)) - updated_path.parent.mkdir(parents=True, exist_ok=True) - - # Generate the updated source file at the corresponding path. - with open(updated_path, 'w') as f: - f.write(updated.code) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser( - description="""Fix up source that uses the bigquery client library. - -The existing sources are NOT overwritten but are copied to output_dir with changes made. - -Note: This tool operates at a best-effort level at converting positional - parameters in client method calls to keyword based parameters. - Cases where it WILL FAIL include - A) * or ** expansion in a method call. - B) Calls via function or method alias (includes free function calls) - C) Indirect or dispatched calls (e.g. the method is looked up dynamically) - - These all constitute false negatives. The tool will also detect false - positives when an API method shares a name with another method. -""") - parser.add_argument( - '-d', - '--input-directory', - required=True, - dest='input_dir', - help='the input directory to walk for python files to fix up', - ) - parser.add_argument( - '-o', - '--output-directory', - required=True, - dest='output_dir', - help='the directory to output files fixed via un-flattening', - ) - args = parser.parse_args() - input_dir = pathlib.Path(args.input_dir) - output_dir = pathlib.Path(args.output_dir) - if not input_dir.is_dir(): - print( - f"input directory '{input_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if not output_dir.is_dir(): - print( - f"output directory '{output_dir}' does not exist or is not a directory", - file=sys.stderr, - ) - sys.exit(-1) - - if os.listdir(output_dir): - print( - f"output directory '{output_dir}' is not empty", - file=sys.stderr, - ) - sys.exit(-1) - - fix_files(input_dir, output_dir) diff --git a/setup.py b/setup.py index 2cb57aad2..109fcb10c 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ dependencies = [ "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", "proto-plus >= 1.10.0", - "libcst >= 0.2.5", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", @@ -125,7 +124,6 @@ install_requires=dependencies, extras_require=extras, python_requires=">=3.6", - scripts=["scripts/fixup_bigquery_v2_keywords.py"], include_package_data=True, zip_safe=False, ) diff --git a/synth.py b/synth.py index 501380be2..97466d0f4 100644 --- a/synth.py +++ b/synth.py @@ -36,6 +36,7 @@ "README.rst", "noxfile.py", "setup.py", + "scripts/fixup_bigquery_v2_keywords.py", library / f"google/cloud/bigquery/__init__.py", library / f"google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, From 1a4dc12e8d6ab80bea72fa8db83ee1916ead6e56 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 8 Oct 2020 15:44:05 +0000 Subject: [PATCH 012/341] chore: release 2.1.0 (#301) :robot: I have created a release \*beep\* \*boop\* --- ## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) ### Features * add constants for MONTH and YEAR time partitioning types ([#283](https://www.github.com/googleapis/python-bigquery/issues/283)) ([9090e1c](https://www.github.com/googleapis/python-bigquery/commit/9090e1ccd8825a97835325b4829f6e7ecfd9ea88)) ### Bug Fixes * remove unnecessary dependency on libcst ([#308](https://www.github.com/googleapis/python-bigquery/issues/308)) ([c055930](https://www.github.com/googleapis/python-bigquery/commit/c05593094c1405f752b2c51b15202a6dbb5cb83f)) ### Performance Improvements * remove redundant array deepcopy ([#26](https://www.github.com/googleapis/python-bigquery/issues/26)) ([b54f867](https://www.github.com/googleapis/python-bigquery/commit/b54f86769c982ce5c8fcbf3889f82450428bb40c)) ### Documentation * **samples:** add create_table_clustered code snippet ([#291](https://www.github.com/googleapis/python-bigquery/issues/291)) ([d1eb8b3](https://www.github.com/googleapis/python-bigquery/commit/d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- CHANGELOG.md | 22 ++++++++++++++++++++++ setup.py | 2 +- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dac7a0f1..ad6c9551f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,28 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) + + +### Features + +* add constants for MONTH and YEAR time partitioning types ([#283](https://www.github.com/googleapis/python-bigquery/issues/283)) ([9090e1c](https://www.github.com/googleapis/python-bigquery/commit/9090e1ccd8825a97835325b4829f6e7ecfd9ea88)) + + +### Bug Fixes + +* remove unnecessary dependency on libcst ([#308](https://www.github.com/googleapis/python-bigquery/issues/308)) ([c055930](https://www.github.com/googleapis/python-bigquery/commit/c05593094c1405f752b2c51b15202a6dbb5cb83f)) + + +### Performance Improvements + +* remove redundant array deepcopy ([#26](https://www.github.com/googleapis/python-bigquery/issues/26)) ([b54f867](https://www.github.com/googleapis/python-bigquery/commit/b54f86769c982ce5c8fcbf3889f82450428bb40c)) + + +### Documentation + +* **samples:** add create_table_clustered code snippet ([#291](https://www.github.com/googleapis/python-bigquery/issues/291)) ([d1eb8b3](https://www.github.com/googleapis/python-bigquery/commit/d1eb8b3dcc789916c5d3ba8464f62b1f8bef35ff)) + ## 2.0.0 09-30-2020 14:51 PDT diff --git a/setup.py b/setup.py index 109fcb10c..14b38b63e 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "2.0.0" +version = "2.1.0" # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' From 61634be9bf9e3df7589fc1bfdbda87288859bb13 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Oct 2020 11:57:40 -0500 Subject: [PATCH 013/341] docs: update snippets samples to support version 2.0 (#309) * docs: update snippets samples to support version 2.0 For some reason, old versions of the google-cloud-bigquery-storage library were still getting used. This pins those dependencies directly, instead. Also, updates the samples to remove warnings about `client.dataset`. * blacken --- samples/snippets/authorized_view_tutorial.py | 8 ++++++-- samples/snippets/authorized_view_tutorial_test.py | 10 +++++++--- samples/snippets/natality_tutorial.py | 4 +++- samples/snippets/natality_tutorial_test.py | 4 ++-- samples/snippets/quickstart.py | 4 ++-- samples/snippets/requirements.txt | 6 +++++- 6 files changed, 25 insertions(+), 11 deletions(-) diff --git a/samples/snippets/authorized_view_tutorial.py b/samples/snippets/authorized_view_tutorial.py index 6b5cc378f..b6a20c6ec 100644 --- a/samples/snippets/authorized_view_tutorial.py +++ b/samples/snippets/authorized_view_tutorial.py @@ -27,16 +27,18 @@ def run_authorized_view_tutorial(override_values={}): client = bigquery.Client() source_dataset_id = "github_source_data" + source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_source_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + source_dataset_id_full = "{}.{}".format(client.project, source_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_source_dataset] - source_dataset = bigquery.Dataset(client.dataset(source_dataset_id)) + source_dataset = bigquery.Dataset(source_dataset_id_full) # Specify the geographic location where the dataset should reside. source_dataset.location = "US" source_dataset = client.create_dataset(source_dataset) # API request @@ -66,16 +68,18 @@ def run_authorized_view_tutorial(override_values={}): # Create a separate dataset to store your view # [START bigquery_avt_create_shared_dataset] shared_dataset_id = "shared_views" + shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [END bigquery_authorized_view_tutorial] # [END bigquery_avt_create_shared_dataset] # To facilitate testing, we replace values with alternatives # provided by the testing harness. shared_dataset_id = override_values.get("shared_dataset_id", shared_dataset_id) + shared_dataset_id_full = "{}.{}".format(client.project, shared_dataset_id) # [START bigquery_authorized_view_tutorial] # [START bigquery_avt_create_shared_dataset] - shared_dataset = bigquery.Dataset(client.dataset(shared_dataset_id)) + shared_dataset = bigquery.Dataset(shared_dataset_id_full) shared_dataset.location = "US" shared_dataset = client.create_dataset(shared_dataset) # API request # [END bigquery_avt_create_shared_dataset] diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py index 4c74020bd..eb247c5eb 100644 --- a/samples/snippets/authorized_view_tutorial_test.py +++ b/samples/snippets/authorized_view_tutorial_test.py @@ -30,7 +30,7 @@ def datasets_to_delete(client): doomed = [] yield doomed for item in doomed: - client.delete_dataset(item, delete_contents=True) + client.delete_dataset(item, delete_contents=True, not_found_ok=True) def test_authorized_view_tutorial(client, datasets_to_delete): @@ -42,8 +42,12 @@ def test_authorized_view_tutorial(client, datasets_to_delete): str(uuid.uuid4()).replace("-", "_") ), } - source_dataset_ref = client.dataset(override_values["source_dataset_id"]) - shared_dataset_ref = client.dataset(override_values["shared_dataset_id"]) + source_dataset_ref = "{}.{}".format( + client.project, override_values["source_dataset_id"] + ) + shared_dataset_ref = "{}.{}".format( + client.project, override_values["shared_dataset_id"] + ) datasets_to_delete.extend( [override_values["source_dataset_id"], override_values["shared_dataset_id"]] ) diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py index b2b607b0d..a8d90501a 100644 --- a/samples/snippets/natality_tutorial.py +++ b/samples/snippets/natality_tutorial.py @@ -38,13 +38,15 @@ def run_natality_tutorial(override_values={}): # Prepare a reference to a new dataset for storing the query results. dataset_id = "natality_regression" + dataset_id_full = "{}.{}".format(client.project, dataset_id) # [END bigquery_query_natality_tutorial] # To facilitate testing, we replace values with alternatives # provided by the testing harness. dataset_id = override_values.get("dataset_id", dataset_id) + dataset_id_full = "{}.{}".format(client.project, dataset_id) # [START bigquery_query_natality_tutorial] - dataset = bigquery.Dataset(client.dataset(dataset_id)) + dataset = bigquery.Dataset(dataset_id_full) # Create the new BigQuery dataset. dataset = client.create_dataset(dataset) diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py index fae72fa46..d9c89bef2 100644 --- a/samples/snippets/natality_tutorial_test.py +++ b/samples/snippets/natality_tutorial_test.py @@ -43,8 +43,8 @@ def test_natality_tutorial(client, datasets_to_delete): natality_tutorial.run_natality_tutorial(override_values) - table_ref = bigquery.Dataset(client.dataset(override_values["dataset_id"])).table( - "regression_input" + table_ref = "{}.{}.{}".format( + client.project, override_values["dataset_id"], "regression_input" ) table = client.get_table(table_ref) assert table.num_rows > 0 diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py index 56d6fd843..1b0ef5b3a 100644 --- a/samples/snippets/quickstart.py +++ b/samples/snippets/quickstart.py @@ -33,8 +33,8 @@ def run_quickstart(override_values={}): # [START bigquery_quickstart] # Prepares a reference to the new dataset - dataset_ref = bigquery_client.dataset(dataset_id) - dataset = bigquery.Dataset(dataset_ref) + dataset_id_full = "{}.{}".format(bigquery_client.project, dataset_id) + dataset = bigquery.Dataset(dataset_id_full) # Creates the new dataset dataset = bigquery_client.create_dataset(dataset) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6edca4f10..76c333b46 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,10 @@ -google-cloud-bigquery[pandas,bqstorage,pyarrow]==2.0.0 +google-cloud-bigquery==2.0.0 +google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 +grpcio==1.32.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.1 +pandas==1.1.3 +pyarrow==1.0.1 pytz==2020.1 From 06830814e3328edadd7728e4271ced52233be0b1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 9 Oct 2020 20:10:31 +0200 Subject: [PATCH 014/341] chore(deps): update dependency google-cloud-bigquery to v2.1.0 (#312) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 76c333b46..daabdf745 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.0.0 +google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 grpcio==1.32.0 From 35627d145a41d57768f19d4392ef235928e00f72 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Fri, 9 Oct 2020 13:04:02 -0700 Subject: [PATCH 015/341] chore: start tracking obsolete files (#310) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/4563ab42-f1d6-4a7f-8e48-cc92dfba56b5/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 334645418 Source-Link: https://github.com/googleapis/googleapis/commit/c941026e5e3d600817a20e9ab4d4be03dff21a68 --- .kokoro/presubmit/presubmit.cfg | 8 ++- .kokoro/samples/python3.6/common.cfg | 6 -- .kokoro/samples/python3.7/common.cfg | 6 -- .kokoro/samples/python3.8/common.cfg | 6 -- mypy.ini | 3 + synth.metadata | 94 +++++++++++++++++++++++++++- 6 files changed, 101 insertions(+), 22 deletions(-) create mode 100644 mypy.ini diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 8f43917d9..b158096f0 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1 +1,7 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Disable system tests. +env_vars: { + key: "RUN_SYSTEM_TESTS" + value: "false" +} diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg index f3b930960..a56768eae 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.6/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.6" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py36" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg index fc0654565..c93747180 100644 --- a/.kokoro/samples/python3.7/common.cfg +++ b/.kokoro/samples/python3.7/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.7" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py37" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg index 2b0bf59b3..9808f15e3 100644 --- a/.kokoro/samples/python3.8/common.cfg +++ b/.kokoro/samples/python3.8/common.cfg @@ -13,12 +13,6 @@ env_vars: { value: "py-3.8" } -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py38" -} - env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..4505b4854 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,3 @@ +[mypy] +python_version = 3.6 +namespace_packages = True diff --git a/synth.metadata b/synth.metadata index d40e66dac..008810630 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,15 +4,22 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "fbbe0cb0ea22161d81f1e5504bb89b55e4198634" + "sha": "31644d380b35a76a9147801a4b6b0271c246fd0c" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0dc0a6c0f1a9f979bc0690f0caa5fbafa3000c2c", - "internalRef": "327026955" + "sha": "c941026e5e3d600817a20e9ab4d4be03dff21a68", + "internalRef": "334645418" + } + }, + { + "git": { + "name": "synthtool", + "remote": "https://github.com/googleapis/synthtool.git", + "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" } }, { @@ -33,5 +40,86 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/bigquery_v2/services.rst", + "docs/bigquery_v2/types.rst", + "docs/conf.py", + "google/cloud/bigquery_v2/__init__.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/py.typed", + "google/cloud/bigquery_v2/types/__init__.py", + "google/cloud/bigquery_v2/types/encryption_config.py", + "google/cloud/bigquery_v2/types/model.py", + "google/cloud/bigquery_v2/types/model_reference.py", + "google/cloud/bigquery_v2/types/standard_sql.py", + "mypy.ini", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "scripts/decrypt-secrets.sh", + "scripts/fixup_bigquery_v2_keywords.py", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From b8f502b14f21d1815697e4d57cf1225dfb4a7c5e Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 12 Oct 2020 10:24:16 -0400 Subject: [PATCH 016/341] fix: use version.py instead of pkg_resources.get_distribution (#307) * fix: use version.py instead of pkg_resources.get_distribution * fix: nit --- google/cloud/bigquery/__init__.py | 4 ++-- google/cloud/bigquery/version.py | 15 +++++++++++++++ setup.py | 7 ++++++- 3 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 google/cloud/bigquery/version.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 89c5a3624..e83e70084 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -28,9 +28,9 @@ """ -from pkg_resources import get_distribution +from google.cloud.bigquery import version as bigquery_version -__version__ = get_distribution("google-cloud-bigquery").version +__version__ = bigquery_version.__version__ from google.cloud.bigquery.client import Client from google.cloud.bigquery.dataset import AccessEntry diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py new file mode 100644 index 000000000..8b5d3328c --- /dev/null +++ b/google/cloud/bigquery/version.py @@ -0,0 +1,15 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "2.1.0" diff --git a/setup.py b/setup.py index 14b38b63e..be7296081 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ name = "google-cloud-bigquery" description = "Google BigQuery API client library" -version = "2.1.0" + # Should be one of: # 'Development Status :: 3 - Alpha' # 'Development Status :: 4 - Beta' @@ -83,6 +83,11 @@ with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() +version = {} +with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: + exec(fp.read(), version) +version = version["__version__"] + # Only include packages under the 'google' namespace. Do not include tests, # benchmarks, etc. packages = [ From 801e4c0574b7e421aa3a28cafec6fd6bcce940dd Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Mon, 12 Oct 2020 17:10:07 -0500 Subject: [PATCH 017/341] deps: require pyarrow for pandas support (#314) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [X] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [X] Ensure the tests and linter pass - [X] Code coverage does not decrease (if any source code was changed) - [X] Appropriate docs were updated (if necessary) Fixes #265 🦕 --- docs/snippets.py | 4 - google/cloud/bigquery/__init__.py | 3 - google/cloud/bigquery/client.py | 43 ++---- google/cloud/bigquery/exceptions.py | 17 -- google/cloud/bigquery/table.py | 92 ++++------- noxfile.py | 5 +- setup.py | 12 +- testing/constraints-3.6.txt | 1 - tests/unit/test__pandas_helpers.py | 8 + tests/unit/test_client.py | 144 +---------------- tests/unit/test_job.py | 39 ----- tests/unit/test_table.py | 232 ++++++---------------------- 12 files changed, 97 insertions(+), 503 deletions(-) delete mode 100644 google/cloud/bigquery/exceptions.py diff --git a/docs/snippets.py b/docs/snippets.py index bc6b58020..8c106e63d 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -26,10 +26,6 @@ import pytest -try: - import fastparquet -except (ImportError, AttributeError): - fastparquet = None try: import pandas except (ImportError, AttributeError): diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index e83e70084..b8d1cc4d7 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -38,7 +38,6 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlDataTypes -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -143,8 +142,6 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", - # Errors and warnings - "PyarrowMissingWarning", ] diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index fcb18385d..2afffab80 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -58,7 +58,6 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.model import Model @@ -2135,29 +2134,31 @@ def load_table_from_dataframe( [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. - If ``pyarrow`` and job config schema are used, the argument - is directly passed as the ``compression`` argument to the - underlying ``pyarrow.parquet.write_table()`` method (the - default value "snappy" gets converted to uppercase). + The argument is directly passed as the ``compression`` + argument to the underlying ``pyarrow.parquet.write_table()`` + method (the default value "snappy" gets converted to uppercase). https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table - If either ``pyarrow`` or job config schema are missing, the - argument is directly passed as the ``compression`` argument - to the underlying ``DataFrame.to_parquet()`` method. + If the job config schema is missing, the argument is directly + passed as the ``compression`` argument to the underlying + ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet Returns: google.cloud.bigquery.job.LoadJob: A new load job. Raises: - ImportError: + ValueError: If a usable parquet engine cannot be found. This method - requires :mod:`pyarrow` or :mod:`fastparquet` to be - installed. + requires :mod:`pyarrow` to be installed. TypeError: If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ + if pyarrow is None: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") + job_id = _make_job_id(job_id, job_id_prefix) if job_config: @@ -2222,7 +2223,7 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if pyarrow and job_config.schema: + if job_config.schema: if parquet_compression == "snappy": # adjust the default value parquet_compression = parquet_compression.upper() @@ -2233,24 +2234,6 @@ def load_table_from_dataframe( parquet_compression=parquet_compression, ) else: - if not pyarrow: - warnings.warn( - "Loading dataframe data without pyarrow installed is " - "deprecated and will become unsupported in the future. " - "Please install the pyarrow package.", - PyarrowMissingWarning, - stacklevel=2, - ) - - if job_config.schema: - warnings.warn( - "job_config.schema is set, but not used to assist in " - "identifying correct types for data serialization. " - "Please install the pyarrow package.", - PendingDeprecationWarning, - stacklevel=2, - ) - dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py deleted file mode 100644 index 93490ef97..000000000 --- a/google/cloud/bigquery/exceptions.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class PyarrowMissingWarning(DeprecationWarning): - pass diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a72bacb74..01e8815da 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -50,7 +50,6 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery.exceptions import PyarrowMissingWarning from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -1679,75 +1678,38 @@ def to_dataframe( create_bqstorage_client = False bqstorage_client = None - if pyarrow is not None: - # If pyarrow is available, calling to_arrow, then converting to a - # pandas dataframe is about 2x faster. This is because pandas.concat is - # rarely no-copy, whereas pyarrow.Table.from_batches + to_pandas is - # usually no-copy. - record_batch = self.to_arrow( - progress_bar_type=progress_bar_type, - bqstorage_client=bqstorage_client, - create_bqstorage_client=create_bqstorage_client, - ) + record_batch = self.to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + + # When converting timestamp values to nanosecond precision, the result + # can be out of pyarrow bounds. To avoid the error when converting to + # Pandas, we set the timestamp_as_object parameter to True, if necessary. + types_to_check = { + pyarrow.timestamp("us"), + pyarrow.timestamp("us", tz=pytz.UTC), + } - # When converting timestamp values to nanosecond precision, the result - # can be out of pyarrow bounds. To avoid the error when converting to - # Pandas, we set the timestamp_as_object parameter to True, if necessary. - types_to_check = { - pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), - } - - for column in record_batch: - if column.type in types_to_check: - try: - column.cast("timestamp[ns]") - except pyarrow.lib.ArrowInvalid: - timestamp_as_object = True - break - else: - timestamp_as_object = False - - extra_kwargs = {"timestamp_as_object": timestamp_as_object} - - df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) - - for column in dtypes: - df[column] = pandas.Series(df[column], dtype=dtypes[column]) - return df + for column in record_batch: + if column.type in types_to_check: + try: + column.cast("timestamp[ns]") + except pyarrow.lib.ArrowInvalid: + timestamp_as_object = True + break else: - warnings.warn( - "Converting to a dataframe without pyarrow installed is " - "often slower and will become unsupported in the future. " - "Please install the pyarrow package.", - PyarrowMissingWarning, - stacklevel=2, - ) + timestamp_as_object = False - # The bqstorage_client is only used if pyarrow is available, so the - # rest of this method only needs to account for tabledata.list. - progress_bar = self._get_progress_bar(progress_bar_type) + extra_kwargs = {"timestamp_as_object": timestamp_as_object} - frames = [] - for frame in self.to_dataframe_iterable(dtypes=dtypes): - frames.append(frame) + df = record_batch.to_pandas(date_as_object=date_as_object, **extra_kwargs) - if progress_bar is not None: - # In some cases, the number of total rows is not populated - # until the first page of rows is fetched. Update the - # progress bar's total to keep an accurate count. - progress_bar.total = progress_bar.total or self.total_rows - progress_bar.update(len(frame)) - - if progress_bar is not None: - # Indicate that the download has finished. - progress_bar.close() - - # Avoid concatting an empty list. - if not frames: - column_names = [field.name for field in self._schema] - return pandas.DataFrame(columns=column_names) - return pandas.concat(frames, ignore_index=True) + for column in dtypes: + df[column] = pandas.Series(df[column], dtype=dtypes[column]) + + return df class _EmptyRowIterator(object): diff --git a/noxfile.py b/noxfile.py index 42d8f9356..db1dcffde 100644 --- a/noxfile.py +++ b/noxfile.py @@ -49,10 +49,7 @@ def default(session): constraints_path, ) - # fastparquet is not included in .[all] because, in general, it's - # redundant with pyarrow. We still want to run some unit tests with - # fastparquet serialization, though. - session.install("-e", ".[all,fastparquet]", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) diff --git a/setup.py b/setup.py index be7296081..abd5cef95 100644 --- a/setup.py +++ b/setup.py @@ -47,13 +47,12 @@ "grpcio >= 1.32.0, < 2.0dev", "pyarrow >= 1.0.0, < 2.0dev", ], - "pandas": ["pandas>=0.23.0"], - "pyarrow": [ + "pandas": [ + "pandas>=0.23.0", # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. "pyarrow >= 1.0.0, < 2.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "fastparquet": ["fastparquet", "python-snappy", "llvmlite>=0.34.0"], "opentelemetry": [ "opentelemetry-api==0.9b0", "opentelemetry-sdk==0.9b0", @@ -64,13 +63,6 @@ all_extras = [] for extra in extras: - if extra in ( - # Skip fastparquet from "all" because it is redundant with pyarrow and - # creates a dependency on pre-release versions of numpy. See: - # https://github.com/googleapis/google-cloud-python/issues/8549 - "fastparquet", - ): - continue all_extras.extend(extras[extra]) extras["all"] = all_extras diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index a9f4faa92..798804941 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -1,4 +1,3 @@ -fastparquet==0.4.1 google-api-core==1.22.2 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index c1073066d..bdb1c56ea 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1329,3 +1329,11 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes ) ) assert result.equals(expected_result) + + with pytest.raises(StopIteration): + result = next(results_gen) + + +def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): + dataframe = module_under_test._tabledata_list_page_to_dataframe([], [], {}) + assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f44201ab8..737c1aef7 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -32,10 +32,6 @@ import pytz import pkg_resources -try: - import fastparquet -except (ImportError, AttributeError): # pragma: NO COVER - fastparquet = None try: import pandas except (ImportError, AttributeError): # pragma: NO COVER @@ -7838,80 +7834,6 @@ def test_load_table_from_dataframe_unknown_table(self): job_config=mock.ANY, ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") - def test_load_table_from_dataframe_no_pyarrow_warning(self): - from google.cloud.bigquery.client import PyarrowMissingWarning - - client = self._make_client() - - # Pick at least one column type that translates to Pandas dtype - # "object". A string column matches that. - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - pyarrow_patch_helpers = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - catch_warnings = warnings.catch_warnings(record=True) - - with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION - ) - - matches = [ - warning for warning in warned if warning.category is PyarrowMissingWarning - ] - assert matches, "A missing pyarrow deprecation warning was not raised." - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") - def test_load_table_from_dataframe_no_schema_warning_wo_pyarrow(self): - client = self._make_client() - - # Pick at least one column type that translates to Pandas dtype - # "object". A string column matches that. - records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] - dataframe = pandas.DataFrame(records) - - get_table_patch = mock.patch( - "google.cloud.bigquery.client.Client.get_table", - autospec=True, - side_effect=google.api_core.exceptions.NotFound("Table not found"), - ) - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - pyarrow_patch_helpers = mock.patch( - "google.cloud.bigquery._pandas_helpers.pyarrow", None - ) - catch_warnings = warnings.catch_warnings(record=True) - - with get_table_patch, load_patch, pyarrow_patch, pyarrow_patch_helpers, catch_warnings as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, location=self.LOCATION - ) - - matches = [ - warning - for warning in warned - if warning.category in (DeprecationWarning, PendingDeprecationWarning) - and "could not be detected" in str(warning) - and "please provide a schema" in str(warning) - ] - assert matches, "A missing schema deprecation warning was not raised." - @unittest.skipIf( pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, "Only `pandas version >=1.0.0` supported", @@ -8182,7 +8104,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): assert "unknown_col" in message @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(fastparquet is None, "Requires `fastparquet`") def test_load_table_from_dataframe_w_partial_schema_missing_types(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job @@ -8236,55 +8157,6 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): assert sent_config.source_format == job.SourceFormat.PARQUET assert sent_config.schema is None - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): - from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES - from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField - - client = self._make_client() - records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] - dataframe = pandas.DataFrame(records, columns=["name", "age"]) - schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) - job_config = job.LoadJobConfig(schema=schema) - - load_patch = mock.patch( - "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True - ) - pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) - - with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings( - record=True - ) as warned: - client.load_table_from_dataframe( - dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION - ) - - assert warned # there should be at least one warning - for warning in warned: - assert "pyarrow" in str(warning) - assert issubclass( - warning.category, (DeprecationWarning, PendingDeprecationWarning) - ) - - load_table_from_file.assert_called_once_with( - client, - mock.ANY, - self.TABLE_REF, - num_retries=_DEFAULT_NUM_RETRIES, - rewind=True, - job_id=mock.ANY, - job_id_prefix=None, - location=self.LOCATION, - project=None, - job_config=mock.ANY, - ) - - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == schema - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): @@ -8320,7 +8192,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): + def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): client = self._make_client() records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] dataframe = pandas.DataFrame(records) @@ -8338,8 +8210,8 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): dataframe, "to_parquet", wraps=dataframe.to_parquet ) - with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch as to_parquet_spy: - with warnings.catch_warnings(record=True) as warned: + with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch: + with pytest.raises(ValueError): client.load_table_from_dataframe( dataframe, self.TABLE_REF, @@ -8347,16 +8219,6 @@ def test_load_table_from_dataframe_wo_pyarrow_custom_compression(self): parquet_compression="gzip", ) - call_args = to_parquet_spy.call_args - assert call_args is not None - assert call_args.kwargs.get("compression") == "gzip" - - assert len(warned) == 2 - warning = warned[0] - assert "Loading dataframe data without pyarrow" in str(warning) - warning = warned[1] - assert "Please install the pyarrow package" in str(warning) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index fb042e18c..d21489616 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -5802,45 +5802,6 @@ def test_to_dataframe_column_date_dtypes(self): self.assertEqual(df.date.dtype.name, "datetime64[ns]") - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_date_dtypes_wo_pyarrow(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "1", - "schema": {"fields": [{"name": "date", "type": "DATE"}]}, - } - row_data = [ - ["1999-12-01"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - with warnings.catch_warnings(record=True) as warned: - df = job.to_dataframe( - date_as_object=False, create_bqstorage_client=False - ) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.date.dtype.name, "object") - - assert len(warned) == 1 - warning = warned[0] - assert "without pyarrow" in str(warning) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 12169658e..fe17d2852 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2148,6 +2148,49 @@ def test_to_dataframe_iterable(self): self.assertEqual(df_2["name"][0], "Sven") self.assertEqual(df_2["age"][0], 33) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable_with_dtypes(self): + from google.cloud.bigquery.schema import SchemaField + import types + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, page_size=1, max_results=5 + ) + dfs = row_iterator.to_dataframe_iterable(dtypes={"age": "int32"}) + + self.assertIsInstance(dfs, types.GeneratorType) + + df_1 = next(dfs) + self.assertIsInstance(df_1, pandas.DataFrame) + self.assertEqual(df_1.name.dtype.name, "object") + self.assertEqual(df_1.age.dtype.name, "int32") + self.assertEqual(len(df_1), 1) # verify the number of rows + self.assertEqual( + df_1["name"][0], "Bengt" + ) # verify the first value of 'name' column + self.assertEqual(df_1["age"][0], 32) # verify the first value of 'age' column + + df_2 = next(dfs) + self.assertEqual(len(df_2), 1) # verify the number of rows + self.assertEqual(df_2["name"][0], "Sven") + self.assertEqual(df_2["age"][0], 33) + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" @@ -2327,38 +2370,6 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_warning_wo_pyarrow(self): - from google.cloud.bigquery.client import PyarrowMissingWarning - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - no_pyarrow_patch = mock.patch("google.cloud.bigquery.table.pyarrow", new=None) - catch_warnings = warnings.catch_warnings(record=True) - - with no_pyarrow_patch, catch_warnings as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 2) - matches = [ - warning for warning in warned if warning.category is PyarrowMissingWarning - ] - self.assertTrue( - matches, msg="A missing pyarrow deprecation warning was not raised." - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(tqdm is None, "Requires `tqdm`") @mock.patch("tqdm.tqdm_gui") @@ -2399,50 +2410,6 @@ def test_to_dataframe_progress_bar( progress_bar_mock().close.assert_called_once() self.assertEqual(len(df), 4) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm_gui") - @mock.patch("tqdm.tqdm_notebook") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_progress_bar_wo_pyarrow( - self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock - ): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - rows = [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - path = "/foo" - api_request = mock.Mock(return_value={"rows": rows}) - - progress_bars = ( - ("tqdm", tqdm_mock), - ("tqdm_notebook", tqdm_notebook_mock), - ("tqdm_gui", tqdm_gui_mock), - ) - - for progress_bar_type, progress_bar_mock in progress_bars: - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe(progress_bar_type=progress_bar_type) - - progress_bar_mock.assert_called() - progress_bar_mock().update.assert_called() - progress_bar_mock().close.assert_called_once() - self.assertEqual(len(df), 4) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - @unittest.skipIf(pandas is None, "Requires `pandas`") @mock.patch("google.cloud.bigquery.table.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): @@ -2557,57 +2524,6 @@ def test_to_dataframe_w_empty_results(self): self.assertEqual(len(df), 0) # verify the number of rows self.assertEqual(list(df), ["name", "age"]) # verify the column names - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_w_empty_results_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_w_no_results_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - with mock.patch("google.cloud.bigquery.table.pyarrow", None): - schema = [ - SchemaField("name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - api_request = mock.Mock(return_value={"rows": []}) - row_iterator = self._make_one(_mock_client(), api_request, schema=schema) - - def empty_iterable(dtypes=None): - return [] - - row_iterator.to_dataframe_iterable = empty_iterable - - with warnings.catch_warnings(record=True) as warned: - df = row_iterator.to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 0) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_w_various_types_nullable(self): import datetime @@ -3424,68 +3340,6 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_concat_categorical_dtype_wo_pyarrow(self): - from google.cloud.bigquery.schema import SchemaField - - schema = [ - SchemaField("col_str", "STRING"), - SchemaField("col_category", "STRING"), - ] - row_data = [ - [u"foo", u"low"], - [u"bar", u"medium"], - [u"baz", u"low"], - [u"foo_page2", u"medium"], - [u"bar_page2", u"high"], - [u"baz_page2", u"low"], - ] - path = "/foo" - - rows = [{"f": [{"v": field} for field in row]} for row in row_data[:3]] - rows_page2 = [{"f": [{"v": field} for field in row]} for row in row_data[3:]] - api_request = mock.Mock( - side_effect=[{"rows": rows, "pageToken": "NEXTPAGE"}, {"rows": rows_page2}] - ) - - row_iterator = self._make_one(_mock_client(), api_request, path, schema) - - mock_pyarrow = mock.patch("google.cloud.bigquery.table.pyarrow", None) - catch_warnings = warnings.catch_warnings(record=True) - - with mock_pyarrow, catch_warnings as warned: - got = row_iterator.to_dataframe( - dtypes={ - "col_category": pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, - ), - }, - ) - - self.assertIsInstance(got, pandas.DataFrame) - self.assertEqual(len(got), 6) # verify the number of rows - expected_columns = [field.name for field in schema] - self.assertEqual(list(got), expected_columns) # verify the column names - - # Are column types correct? - expected_dtypes = [ - pandas.core.dtypes.dtypes.np.dtype("O"), # the default for string data - pandas.core.dtypes.dtypes.CategoricalDtype( - categories=["low", "medium", "high"], ordered=False, - ), - ] - self.assertEqual(list(got.dtypes), expected_dtypes) - - # And the data in the categorical column? - self.assertEqual( - list(got["col_category"]), - ["low", "medium", "low", "medium", "high", "low"], - ) - - self.assertEqual(len(warned), 1) - warning = warned[0] - self.assertTrue("without pyarrow" in str(warning)) - class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From 07c70f0292f9212f0c968cd5c9206e8b0409c0da Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 13 Oct 2020 02:53:19 -0400 Subject: [PATCH 018/341] feat: add method api_repr for table list item (#299) --- google/cloud/bigquery/table.py | 8 ++++++++ tests/unit/test_table.py | 11 +++++++++++ 2 files changed, 19 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 01e8815da..2214d0172 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1071,6 +1071,14 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() + def to_api_repr(self): + """Constructs the API resource of this table + + Returns: + Dict[str, object]: Table represented as an API resource + """ + return copy.deepcopy(self._properties) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index fe17d2852..376605521 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1433,6 +1433,17 @@ def test_labels_update_in_place(self): labels["foo"] = "bar" # update in place self.assertEqual(table.labels, {"foo": "bar"}) + def test_to_api_repr(self): + resource = { + "tableReference": { + "projectId": "testproject", + "datasetId": "testdataset", + "tableId": "testtable", + } + } + table = self._make_one(resource) + self.assertEqual(table.to_api_repr(), resource) + class TestRow(unittest.TestCase): def test_row(self): From 5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 14 Oct 2020 08:02:02 +1100 Subject: [PATCH 019/341] docs: update clustering field docstrings (#286) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #285 🦕 --- google/cloud/bigquery/job.py | 8 ++++---- google/cloud/bigquery/table.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 20bce597a..70db69e71 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -1073,8 +1073,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._get_sub_prop("clustering") if prop is not None: @@ -2554,8 +2554,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._get_sub_prop("clustering") if prop is not None: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2214d0172..5474f643e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -625,8 +625,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._properties.get("clustering") if prop is not None: @@ -1030,8 +1030,8 @@ def clustering_fields(self): .. note:: - As of 2018-06-29, clustering fields cannot be set on a table - which does not also have time partioning defined. + BigQuery supports clustering for both partitioned and + non-partitioned tables. """ prop = self._properties.get("clustering") if prop is not None: From 20f473bfff5ae98377f5d9cdf18bfe5554d86ff4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20BAPTISTE?= Date: Tue, 13 Oct 2020 23:26:05 +0200 Subject: [PATCH 020/341] fix: make TimePartitioning repr evaluable (#110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #109 🦕 --- google/cloud/bigquery/table.py | 15 ++++++++++++++- tests/unit/test_table.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 5474f643e..d6d966eee 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2114,7 +2114,20 @@ def to_api_repr(self): return self._properties def _key(self): - return tuple(sorted(self._properties.items())) + # because we are only "renaming" top level keys shallow copy is sufficient here. + properties = self._properties.copy() + # calling repr for non built-in type objects. + properties["type_"] = repr(properties.pop("type")) + if "field" in properties: + # calling repr for non built-in type objects. + properties["field"] = repr(properties["field"]) + if "requirePartitionFilter" in properties: + properties["require_partition_filter"] = properties.pop( + "requirePartitionFilter" + ) + if "expirationMs" in properties: + properties["expiration_ms"] = properties.pop("expirationMs") + return tuple(sorted(properties.items())) def __eq__(self, other): if not isinstance(other, TimePartitioning): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 376605521..e21453b9f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -3711,7 +3711,7 @@ def test___hash__not_equals(self): def test___repr___minimal(self): time_partitioning = self._make_one() - expected = "TimePartitioning(type=DAY)" + expected = "TimePartitioning(type_='DAY')" self.assertEqual(repr(time_partitioning), expected) def test___repr___explicit(self): @@ -3720,7 +3720,7 @@ def test___repr___explicit(self): time_partitioning = self._make_one( type_=TimePartitioningType.DAY, field="name", expiration_ms=10000 ) - expected = "TimePartitioning(" "expirationMs=10000," "field=name," "type=DAY)" + expected = "TimePartitioning(expiration_ms=10000,field='name',type_='DAY')" self.assertEqual(repr(time_partitioning), expected) def test_set_expiration_w_none(self): From c69cd50914c0676645b04d44ede9392a3d6dd5b1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Oct 2020 23:41:46 +0200 Subject: [PATCH 021/341] chore(deps): update dependency matplotlib to v3.3.2 (#260) Co-authored-by: Tim Swast --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index daabdf745..7d001fa2f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.1 grpcio==1.32.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.1 +matplotlib==3.3.2 pandas==1.1.3 pyarrow==1.0.1 pytz==2020.1 From 3be78b737add7111e24e912cd02fc6df75a07de6 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 14 Oct 2020 01:42:40 -0400 Subject: [PATCH 022/341] perf: add size parameter for load table from dataframe and json methods (#280) * feat: add size parameter for load from dataframe and json * pref: calculate length of encoded string --- google/cloud/bigquery/client.py | 7 +++++-- tests/unit/test_client.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2afffab80..b7e082daa 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2237,11 +2237,13 @@ def load_table_from_dataframe( dataframe.to_parquet(tmppath, compression=parquet_compression) with open(tmppath, "rb") as parquet_file: + file_size = os.path.getsize(tmppath) return self.load_table_from_file( parquet_file, destination, num_retries=num_retries, rewind=True, + size=file_size, job_id=job_id, job_id_prefix=job_id_prefix, location=location, @@ -2343,11 +2345,12 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) data_str = u"\n".join(json.dumps(item) for item in json_rows) - data_file = io.BytesIO(data_str.encode()) - + encoded_str = data_str.encode() + data_file = io.BytesIO(encoded_str) return self.load_table_from_file( data_file, destination, + size=len(encoded_str), num_retries=num_retries, job_id=job_id, job_id_prefix=job_id_prefix, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 737c1aef7..52e00d7c7 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7482,6 +7482,7 @@ def test_load_table_from_dataframe(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7525,6 +7526,7 @@ def test_load_table_from_dataframe_w_client_location(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7577,6 +7579,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7631,6 +7634,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7723,6 +7727,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7782,6 +7787,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7827,6 +7833,7 @@ def test_load_table_from_dataframe_unknown_table(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=None, @@ -7867,6 +7874,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7913,6 +7921,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -7973,6 +7982,7 @@ def test_load_table_from_dataframe_struct_fields(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8046,6 +8056,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8139,6 +8150,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8251,6 +8263,7 @@ def test_load_table_from_dataframe_w_nulls(self): self.TABLE_REF, num_retries=_DEFAULT_NUM_RETRIES, rewind=True, + size=mock.ANY, job_id=mock.ANY, job_id_prefix=None, location=self.LOCATION, @@ -8302,6 +8315,7 @@ def test_load_table_from_json_basic_use(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, @@ -8353,6 +8367,7 @@ def test_load_table_from_json_non_default_args(self): client, mock.ANY, self.TABLE_REF, + size=mock.ANY, num_retries=_DEFAULT_NUM_RETRIES, job_id=mock.ANY, job_id_prefix=None, From fb401bd94477323bba68cf252dd88166495daf54 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 14 Oct 2020 10:05:18 -0400 Subject: [PATCH 023/341] feat: add to_api_repr method to Model (#326) --- google/cloud/bigquery/model.py | 8 +++++++ tests/unit/model/test_model.py | 44 ++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 092d98c2e..1143b71f9 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -317,6 +317,14 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) + def to_api_repr(self): + """Construct the API resource representation of this model. + + Returns: + Dict[str, object]: Model reference represented as an API resource + """ + return json_format.MessageToDict(self._proto) + class ModelReference(object): """ModelReferences are pointers to models. diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 2c0079429..9fa29a496 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -318,3 +318,47 @@ def test_repr(target_class): "Model(reference=ModelReference(" "project_id='my-proj', dataset_id='my_dset', model_id='my_model'))" ) + + +def test_to_api_repr(target_class): + from google.protobuf import json_format + + model = target_class("my-proj.my_dset.my_model") + resource = { + "etag": "abcdefg", + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "creationTime": "1274284800000", + "lastModifiedTime": "1317484800000", + "modelType": "LOGISTIC_REGRESSION", + "trainingRuns": [ + { + "trainingOptions": {"initialLearnRate": 1.0}, + "startTime": "2010-05-19T16:00:00Z", + }, + { + "trainingOptions": {"initialLearnRate": 0.5}, + "startTime": "2011-10-01T16:00:00Z", + }, + { + "trainingOptions": {"initialLearnRate": 0.25}, + "startTime": "2012-12-21T16:00:00Z", + }, + ], + "description": "A friendly description.", + "location": "US", + "friendlyName": "A friendly name.", + "labels": {"greeting": "こんにちは"}, + "expirationTime": "1356105600000", + "encryptionConfiguration": { + "kmsKeyName": "projects/1/locations/us/keyRings/1/cryptoKeys/1" + }, + } + model._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) + got = model.to_api_repr() + assert got == resource From 5178b55682f5e264bfc082cde26acb1fdc953a18 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Wed, 14 Oct 2020 13:42:39 -0500 Subject: [PATCH 024/341] feat: allow client options to be set in magics context (#322) * feat: allow client options to be set in magics context * add separate client options for storage client --- google/cloud/bigquery/magics/magics.py | 98 ++++++++++++++++++++++++-- tests/unit/test_magics.py | 98 ++++++++++++++++++++++++-- 2 files changed, 188 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 22175ee45..5645a84a5 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -139,6 +139,7 @@ import re import ast +import copy import functools import sys import time @@ -155,6 +156,7 @@ import six from google.api_core import client_info +from google.api_core import client_options from google.api_core.exceptions import NotFound import google.auth from google.cloud import bigquery @@ -178,11 +180,13 @@ def __init__(self): self._project = None self._connection = None self._default_query_job_config = bigquery.QueryJobConfig() + self._bigquery_client_options = client_options.ClientOptions() + self._bqstorage_client_options = client_options.ClientOptions() @property def credentials(self): """google.auth.credentials.Credentials: Credentials to use for queries - performed through IPython magics + performed through IPython magics. Note: These credentials do not need to be explicitly defined if you are @@ -217,7 +221,7 @@ def credentials(self, value): @property def project(self): """str: Default project to use for queries performed through IPython - magics + magics. Note: The project does not need to be explicitly defined if you have an @@ -239,6 +243,54 @@ def project(self): def project(self, value): self._project = value + @property + def bigquery_client_options(self): + """google.api_core.client_options.ClientOptions: client options to be + used through IPython magics. + + Note:: + The client options do not need to be explicitly defined if no + special network connections are required. Normally you would be + using the https://bigquery.googleapis.com/ end point. + + Example: + Manually setting the endpoint: + + >>> from google.cloud.bigquery import magics + >>> client_options = {} + >>> client_options['api_endpoint'] = "https://some.special.url" + >>> magics.context.bigquery_client_options = client_options + """ + return self._bigquery_client_options + + @bigquery_client_options.setter + def bigquery_client_options(self, value): + self._bigquery_client_options = value + + @property + def bqstorage_client_options(self): + """google.api_core.client_options.ClientOptions: client options to be + used through IPython magics for the storage client. + + Note:: + The client options do not need to be explicitly defined if no + special network connections are required. Normally you would be + using the https://bigquerystorage.googleapis.com/ end point. + + Example: + Manually setting the endpoint: + + >>> from google.cloud.bigquery import magics + >>> client_options = {} + >>> client_options['api_endpoint'] = "https://some.special.url" + >>> magics.context.bqstorage_client_options = client_options + """ + return self._bqstorage_client_options + + @bqstorage_client_options.setter + def bqstorage_client_options(self, value): + self._bqstorage_client_options = value + @property def default_query_job_config(self): """google.cloud.bigquery.job.QueryJobConfig: Default job @@ -410,6 +462,24 @@ def _create_dataset_if_necessary(client, dataset_id): "Standard SQL if this argument is not used." ), ) +@magic_arguments.argument( + "--bigquery_api_endpoint", + type=str, + default=None, + help=( + "The desired API endpoint, e.g., bigquery.googlepis.com. Defaults to this " + "option's value in the context bigquery_client_options." + ), +) +@magic_arguments.argument( + "--bqstorage_api_endpoint", + type=str, + default=None, + help=( + "The desired API endpoint, e.g., bigquerystorage.googlepis.com. Defaults to " + "this option's value in the context bqstorage_client_options." + ), +) @magic_arguments.argument( "--use_bqstorage_api", action="store_true", @@ -511,15 +581,34 @@ def _cell_magic(line, query): params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) project = args.project or context.project + + bigquery_client_options = copy.deepcopy(context.bigquery_client_options) + if args.bigquery_api_endpoint: + if isinstance(bigquery_client_options, dict): + bigquery_client_options["api_endpoint"] = args.bigquery_api_endpoint + else: + bigquery_client_options.api_endpoint = args.bigquery_api_endpoint + client = bigquery.Client( project=project, credentials=context.credentials, default_query_job_config=context.default_query_job_config, client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + client_options=bigquery_client_options, ) if context._connection: client._connection = context._connection - bqstorage_client = _make_bqstorage_client(use_bqstorage_api, context.credentials) + + bqstorage_client_options = copy.deepcopy(context.bqstorage_client_options) + if args.bqstorage_api_endpoint: + if isinstance(bqstorage_client_options, dict): + bqstorage_client_options["api_endpoint"] = args.bqstorage_api_endpoint + else: + bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint + + bqstorage_client = _make_bqstorage_client( + use_bqstorage_api, context.credentials, bqstorage_client_options, + ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -632,7 +721,7 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials): +def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): if not use_bqstorage_api: return None @@ -658,6 +747,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials): return bigquery_storage.BigQueryReadClient( credentials=credentials, client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + client_options=client_options, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 20be6b755..30ca4d70c 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -309,7 +309,7 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock) + got = magics._make_bqstorage_client(False, credentials_mock, {}) assert got is None @@ -320,7 +320,7 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock) + got = magics._make_bqstorage_client(True, credentials_mock, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -330,7 +330,7 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock) + magics._make_bqstorage_client(True, credentials_mock, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg @@ -347,7 +347,7 @@ def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib): ) with pytest.raises(ImportError) as exc_context, missing_grpcio_lib: - magics._make_bqstorage_client(True, credentials_mock) + magics._make_bqstorage_client(True, credentials_mock, {}) assert "grpcio" in str(exc_context.value) @@ -1180,6 +1180,96 @@ def test_bigquery_magic_with_project(): assert magics.context.project == "general-project" +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bigquery_api_endpoint=https://bigquery_api.endpoint.com", + "SELECT 17 as num", + ) + + connection_used = run_query_mock.call_args_list[0][0][0]._connection + assert connection_used.API_BASE_URL == "https://bigquery_api.endpoint.com" + # context client options should not change + assert magics.context.bigquery_client_options.api_endpoint is None + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + magics.context.bigquery_client_options = {} + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bigquery_api_endpoint=https://bigquery_api.endpoint.com", + "SELECT 17 as num", + ) + + connection_used = run_query_mock.call_args_list[0][0][0]._connection + assert connection_used.API_BASE_URL == "https://bigquery_api.endpoint.com" + # context client options should not change + assert magics.context.bigquery_client_options == {} + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bqstorage_api_endpoint=https://bqstorage_api.endpoint.com", + "SELECT 17 as num", + ) + + client_used = run_query_mock.mock_calls[1][2]["bqstorage_client"] + assert client_used._transport._host == "https://bqstorage_api.endpoint.com" + # context client options should not change + assert magics.context.bqstorage_client_options.api_endpoint is None + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._connection = None + magics.context.bqstorage_client_options = {} + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", + "--bqstorage_api_endpoint=https://bqstorage_api.endpoint.com", + "SELECT 17 as num", + ) + + client_used = run_query_mock.mock_calls[1][2]["bqstorage_client"] + assert client_used._transport._host == "https://bqstorage_api.endpoint.com" + # context client options should not change + assert magics.context.bqstorage_client_options == {} + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_multiple_options(): ip = IPython.get_ipython() From d093cd4ee255bbcbe5a6a198e819b876b4aa51f9 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Oct 2020 04:55:34 -0500 Subject: [PATCH 025/341] refactor: update Job classes to use common _properties pattern (#323) Instead of mixing _properties and plain-old Python objects, always use _properties as the source of truth. This has the side-effect of properly reloading the whole job resource. Previously some properties were not reloaded. --- google/cloud/bigquery/job.py | 364 ++++++++++++++++++----------------- tests/unit/test_client.py | 69 ++++++- tests/unit/test_job.py | 62 ++---- 3 files changed, 259 insertions(+), 236 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 70db69e71..766db1d42 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -35,6 +35,7 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import _query_param_from_api_repr from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter @@ -47,8 +48,9 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning _DONE_STATE = "DONE" @@ -461,11 +463,11 @@ def created(self): Optional[datetime.datetime]: the creation time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("creationTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop( + self._properties, ["statistics", "creationTime"] + ) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def started(self): @@ -475,11 +477,9 @@ def started(self): Optional[datetime.datetime]: the start time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("startTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) @property def ended(self): @@ -489,11 +489,9 @@ def ended(self): Optional[datetime.datetime]: the end time (None until set from the server). """ - statistics = self._properties.get("statistics") - if statistics is not None: - millis = statistics.get("endTime") - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) + millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) def _job_statistics(self): """Helper for job-type specific statistics-based properties.""" @@ -535,14 +533,6 @@ def state(self): if status is not None: return status.get("state") - def _scrub_local_properties(self, cleaned): - """Helper: handle subclass properties in cleaned.""" - pass - - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - raise NotImplementedError("Abstract") - def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -550,7 +540,6 @@ def _set_properties(self, api_response): api_response (Dict): response returned from an API call. """ cleaned = api_response.copy() - self._scrub_local_properties(cleaned) statistics = cleaned.get("statistics", {}) if "creationTime" in statistics: @@ -560,25 +549,24 @@ def _set_properties(self, api_response): if "endTime" in statistics: statistics["endTime"] = float(statistics["endTime"]) + # Save configuration to keep reference same in self._configuration. + cleaned_config = cleaned.pop("configuration", {}) + configuration = self._properties.pop("configuration", {}) self._properties.clear() self._properties.update(cleaned) - self._copy_configuration_properties(cleaned.get("configuration", {})) + self._properties["configuration"] = configuration + self._properties["configuration"].update(cleaned_config) # For Future interface self._set_future_result() @classmethod - def _get_resource_config(cls, resource): + def _check_resource_config(cls, resource): """Helper for :meth:`from_api_repr` Args: resource (Dict): resource for the job. - Returns: - (str, Dict): - tuple (string, dict), where the first element is the - job ID and the second contains job-specific configuration. - Raises: KeyError: If the resource has no identifier, or @@ -589,7 +577,6 @@ def _get_resource_config(cls, resource): "Resource lacks required identity information: " '["jobReference"]["jobId"]' ) - job_id = resource["jobReference"]["jobId"] if ( "configuration" not in resource or cls._JOB_TYPE not in resource["configuration"] @@ -598,7 +585,6 @@ def _get_resource_config(cls, resource): "Resource lacks required configuration: " '["configuration"]["%s"]' % cls._JOB_TYPE ) - return job_id, resource["configuration"] def to_api_repr(self): """Generate a resource for the job.""" @@ -1002,15 +988,15 @@ def from_api_repr(cls, resource): Args: resource (Dict): - An extract job configuration in the same representation as is - returned from the API. + A job configuration in the same representation as is returned + from the API. Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - config = cls() - config._properties = copy.deepcopy(resource) - return config + job_config = cls() + job_config._properties = resource + return job_config class LoadJobConfig(_JobConfig): @@ -1450,12 +1436,23 @@ class LoadJob(_AsyncJob): def __init__(self, job_id, source_uris, destination, client, job_config=None): super(LoadJob, self).__init__(job_id, client) - if job_config is None: + if not job_config: job_config = LoadJobConfig() - self.source_uris = source_uris - self._destination = destination self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if source_uris is not None: + _helpers._set_sub_prop( + self._properties, ["configuration", "load", "sourceUris"], source_uris + ) + + if destination is not None: + _helpers._set_sub_prop( + self._properties, + ["configuration", "load", "destinationTable"], + destination.to_api_repr(), + ) @property def destination(self): @@ -1464,7 +1461,20 @@ def destination(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table """ - return self._destination + dest_config = _helpers._get_sub_prop( + self._properties, ["configuration", "load", "destinationTable"] + ) + return TableReference.from_api_repr(dest_config) + + @property + def source_uris(self): + """Optional[Sequence[str]]: URIs of data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. None for jobs that load from a file. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "load", "sourceUris"] + ) @property def allow_jagged_rows(self): @@ -1687,24 +1697,12 @@ def output_rows(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - configuration = self._configuration.to_api_repr() - if self.source_uris is not None: - _helpers._set_sub_prop( - configuration, ["load", "sourceUris"], self.source_uris - ) - _helpers._set_sub_prop( - configuration, ["load", "destinationTable"], self.destination.to_api_repr() - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -1724,16 +1722,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. """ - config_resource = resource.get("configuration", {}) - config = LoadJobConfig.from_api_repr(config_resource) - # A load job requires a destination table. - dest_config = config_resource["load"]["destinationTable"] - ds_ref = DatasetReference(dest_config["projectId"], dest_config["datasetId"]) - destination = TableReference(ds_ref, dest_config["tableId"]) - # sourceUris will be absent if this is a file upload. - source_uris = _helpers._get_sub_prop(config_resource, ["load", "sourceUris"]) + cls._check_resource_config(resource) job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, source_uris, destination, client, config) + job = cls(job_ref, None, None, client) job._set_properties(resource) return job @@ -1824,12 +1815,59 @@ class CopyJob(_AsyncJob): def __init__(self, job_id, sources, destination, client, job_config=None): super(CopyJob, self).__init__(job_id, client) - if job_config is None: + if not job_config: job_config = CopyJobConfig() - self.destination = destination - self.sources = sources self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if destination: + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "destinationTable"], + destination.to_api_repr(), + ) + + if sources: + source_resources = [source.to_api_repr() for source in sources] + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "sourceTables"], + source_resources, + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: Table into which data + is to be loaded. + """ + return TableReference.from_api_repr( + _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "destinationTable"], + ) + ) + + @property + def sources(self): + """List[google.cloud.bigquery.table.TableReference]): Table(s) from + which data is to be loaded. + """ + source_configs = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTables"] + ) + if source_configs is None: + single = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTable"] + ) + if single is None: + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] + + sources = [] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return sources @property def create_disposition(self): @@ -1860,40 +1898,15 @@ def destination_encryption_configuration(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - - source_refs = [ - { - "projectId": table.project, - "datasetId": table.dataset_id, - "tableId": table.table_id, - } - for table in self.sources - ] - - configuration = self._configuration.to_api_repr() - _helpers._set_sub_prop(configuration, ["copy", "sourceTables"], source_refs) - _helpers._set_sub_prop( - configuration, - ["copy", "destinationTable"], - { - "projectId": self.destination.project, - "datasetId": self.destination.dataset_id, - "tableId": self.destination.table_id, - }, - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation + """Factory: construct a job given its API representation .. note: @@ -1902,7 +1915,6 @@ def from_api_repr(cls, resource, client): Args: resource (Dict): dataset job representation returned from the API - client (google.cloud.bigquery.client.Client): Client which holds credentials and project configuration for the dataset. @@ -1910,22 +1922,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. """ - job_id, config_resource = cls._get_resource_config(resource) - config = CopyJobConfig.from_api_repr(config_resource) - # Copy required fields to the job. - copy_resource = config_resource["copy"] - destination = TableReference.from_api_repr(copy_resource["destinationTable"]) - sources = [] - source_configs = copy_resource.get("sourceTables") - if source_configs is None: - single = copy_resource.get("sourceTable") - if single is None: - raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") - source_configs = [single] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) - job = cls(job_id, sources, destination, client=client, job_config=config) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) job._set_properties(resource) return job @@ -2038,10 +2037,61 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): if job_config is None: job_config = ExtractJobConfig() - self.source = source - self.destination_uris = destination_uris + self._properties["configuration"] = job_config._properties self._configuration = job_config + if source: + source_ref = { + "projectId": source.project, + "datasetId": source.dataset_id, + } + + if isinstance(source, (Table, TableListItem, TableReference)): + source_ref["tableId"] = source.table_id + source_key = "sourceTable" + else: + source_ref["modelId"] = source.model_id + source_key = "sourceModel" + + _helpers._set_sub_prop( + self._properties, ["configuration", "extract", source_key], source_ref + ) + + if destination_uris: + _helpers._set_sub_prop( + self._properties, + ["configuration", "extract", "destinationUris"], + destination_uris, + ) + + @property + def source(self): + """Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]: Table or Model from which data is to be loaded or extracted. + """ + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceTable"] + ) + if source_config: + return TableReference.from_api_repr(source_config) + else: + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceModel"] + ) + return ModelReference.from_api_repr(source_config) + + @property + def destination_uris(self): + """List[str]: URIs describing where the extracted data will be + written in Cloud Storage, using the format + ``gs:///``. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "destinationUris"] + ) + @property def compression(self): """See @@ -2092,34 +2142,12 @@ def destination_uri_file_counts(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" - - configuration = self._configuration.to_api_repr() - source_ref = { - "projectId": self.source.project, - "datasetId": self.source.dataset_id, - } - - source = "sourceTable" - if isinstance(self.source, TableReference): - source_ref["tableId"] = self.source.table_id - else: - source_ref["modelId"] = self.source.model_id - source = "sourceModel" - - _helpers._set_sub_prop(configuration, ["extract", source], source_ref) - _helpers._set_sub_prop( - configuration, ["extract", "destinationUris"], self.destination_uris - ) - + # Exclude statistics, if set. return { "jobReference": self._properties["jobReference"], - "configuration": configuration, + "configuration": self._properties["configuration"], } - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) - @classmethod def from_api_repr(cls, resource, client): """Factory: construct a job given its API representation @@ -2139,30 +2167,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. """ - job_id, config_resource = cls._get_resource_config(resource) - config = ExtractJobConfig.from_api_repr(config_resource) - source_config = _helpers._get_sub_prop( - config_resource, ["extract", "sourceTable"] - ) - if source_config: - dataset = DatasetReference( - source_config["projectId"], source_config["datasetId"] - ) - source = dataset.table(source_config["tableId"]) - else: - source_config = _helpers._get_sub_prop( - config_resource, ["extract", "sourceModel"] - ) - dataset = DatasetReference( - source_config["projectId"], source_config["datasetId"] - ) - source = dataset.model(source_config["modelId"]) - - destination_uris = _helpers._get_sub_prop( - config_resource, ["extract", "destinationUris"] - ) - - job = cls(job_id, source, destination_uris, client=client, job_config=config) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) job._set_properties(resource) return job @@ -2631,11 +2638,14 @@ def __init__(self, job_id, query, client, job_config=None): if job_config.use_legacy_sql is None: job_config.use_legacy_sql = False - _helpers._set_sub_prop( - self._properties, ["configuration", "query", "query"], query - ) - + self._properties["configuration"] = job_config._properties self._configuration = job_config + + if query: + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + self._query_results = None self._done_timeout = None self._transport_timeout = None @@ -2799,19 +2809,13 @@ def schema_update_options(self): def to_api_repr(self): """Generate a resource for :meth:`_begin`.""" + # Use to_api_repr to allow for some configuration properties to be set + # automatically. configuration = self._configuration.to_api_repr() - - resource = { + return { "jobReference": self._properties["jobReference"], "configuration": configuration, } - configuration["query"]["query"] = self.query - - return resource - - def _copy_configuration_properties(self, configuration): - """Helper: assign subclass configuration properties in cleaned.""" - self._configuration._properties = copy.deepcopy(configuration) @classmethod def from_api_repr(cls, resource, client): @@ -2827,9 +2831,9 @@ def from_api_repr(cls, resource, client): Returns: google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. """ - job_id, config = cls._get_resource_config(resource) - query = _helpers._get_sub_prop(config, ["query", "query"]) - job = cls(job_id, query, client=client) + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, client=client) job._set_properties(resource) return job diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 52e00d7c7..bc2658961 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4266,7 +4266,7 @@ def test_load_table_from_uri(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) conn = client._connection = make_connection(RESOURCE) @@ -4275,7 +4275,7 @@ def test_load_table_from_uri(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.source_uris), [SOURCE_URI]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) def test_load_table_from_uri_w_explicit_project(self): job_id = "this-is-a-job-id" @@ -4576,16 +4576,67 @@ def test_copy_table(self): self.assertIs(job._client, client) self.assertEqual(job.job_id, JOB) self.assertEqual(list(job.sources), [source]) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) - conn = client._connection = make_connection(RESOURCE) - source2 = dataset.table(SOURCE + "2") - job = client.copy_table([source, source2], destination, job_id=JOB) + def test_copy_table_w_multiple_sources(self): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.table import TableReference + + job_id = "job_name" + source_id = "my-project.my_dataset.source_table" + source_id2 = "my-project.my_dataset.source_table2" + destination_id = "my-other-project.another_dataset.destination_table" + expected_resource = { + "jobReference": {"projectId": self.PROJECT, "jobId": job_id}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "source_table", + }, + { + "projectId": "my-project", + "datasetId": "my_dataset", + "tableId": "source_table2", + }, + ], + "destinationTable": { + "projectId": "my-other-project", + "datasetId": "another_dataset", + "tableId": "destination_table", + }, + } + }, + } + returned_resource = expected_resource.copy() + returned_resource["statistics"] = {} + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(returned_resource) + + job = client.copy_table([source_id, source_id2], destination_id, job_id=job_id) + + # Check that copy_table actually starts the job. + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=expected_resource, + timeout=None, + ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) - self.assertEqual(job.job_id, JOB) - self.assertEqual(list(job.sources), [source, source2]) - self.assertIs(job.destination, destination) + self.assertEqual(job.job_id, job_id) + self.assertEqual( + list(sorted(job.sources, key=lambda tbl: tbl.table_id)), + [ + TableReference.from_string(source_id), + TableReference.from_string(source_id2), + ], + ) + self.assertEqual(job.destination, TableReference.from_string(destination_id)) def test_copy_table_w_explicit_project(self): job_id = "this-is-a-job-id" diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index d21489616..75212ae95 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -455,28 +455,9 @@ def test_state(self): status["state"] = state self.assertEqual(job.state, state) - def test__scrub_local_properties(self): - before = {"foo": "bar"} - resource = before.copy() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._scrub_local_properties(resource) # no raise - self.assertEqual(resource, before) - - def test__copy_configuration_properties(self): - before = {"foo": "bar"} - resource = before.copy() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job._copy_configuration_properties(resource) - self.assertEqual(resource, before) - def _set_properties_job(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._scrub_local_properties = mock.Mock() - job._copy_configuration_properties = mock.Mock() job._set_future_result = mock.Mock() job._properties = { "jobReference": job._properties["jobReference"], @@ -493,9 +474,6 @@ def test__set_properties_no_stats(self): self.assertEqual(job._properties, resource) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_creation_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -509,9 +487,6 @@ def test__set_properties_w_creation_time(self): cleaned["statistics"]["creationTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_start_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -525,9 +500,6 @@ def test__set_properties_w_start_time(self): cleaned["statistics"]["startTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - def test__set_properties_w_end_time(self): now, millis = self._datetime_and_millis() config = {"test": True} @@ -541,38 +513,35 @@ def test__set_properties_w_end_time(self): cleaned["statistics"]["endTime"] = float(millis) self.assertEqual(job._properties, cleaned) - job._scrub_local_properties.assert_called_once_with(resource) - job._copy_configuration_properties.assert_called_once_with(config) - - def test__get_resource_config_missing_job_ref(self): + def test__check_resource_config_missing_job_ref(self): resource = {} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_job_id(self): + def test__check_resource_config_missing_job_id(self): resource = {"jobReference": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_configuration(self): + def test__check_resource_config_missing_configuration(self): resource = {"jobReference": {"jobId": self.JOB_ID}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_missing_config_type(self): + def test__check_resource_config_missing_config_type(self): resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} klass = self._make_derived_class() with self.assertRaises(KeyError): - klass._get_resource_config(resource) + klass._check_resource_config(resource) - def test__get_resource_config_ok(self): + def test__check_resource_config_ok(self): derived_config = {"foo": "bar"} resource = { "jobReference": {"jobId": self.JOB_ID}, @@ -580,10 +549,8 @@ def test__get_resource_config_ok(self): } klass = self._make_derived_class() - job_id, config = klass._get_resource_config(resource) - - self.assertEqual(job_id, self.JOB_ID) - self.assertEqual(config, {"derived": derived_config}) + # Should not throw. + klass._check_resource_config(resource) def test__build_resource(self): client = _make_client(project=self.PROJECT) @@ -2093,7 +2060,7 @@ def _verifyResourceProperties(self, job, resource): def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - self.assertIs(job.destination, self.TABLE_REF) + self.assertEqual(job.destination, self.TABLE_REF) self.assertEqual(list(job.source_uris), [self.SOURCE1]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -2907,7 +2874,7 @@ def test_ctor(self): source = self._table_ref(self.SOURCE_TABLE) destination = self._table_ref(self.DESTINATION_TABLE) job = self._make_one(self.JOB_ID, [source], destination, client) - self.assertIs(job.destination, destination) + self.assertEqual(job.destination, destination) self.assertEqual(job.sources, [source]) self.assertIs(job._client, client) self.assertEqual(job.job_type, self.JOB_TYPE) @@ -3041,8 +3008,9 @@ def test_from_api_repr_wo_sources(self): }, } klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) + _ = job.sources def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition From cebb5e0e911e8c9059bc8c9e7fce4440e518bff3 Mon Sep 17 00:00:00 2001 From: Kumar Anirudha <5357586+anistark@users.noreply.github.com> Date: Fri, 16 Oct 2020 20:02:03 +0530 Subject: [PATCH 026/341] deps: add protobuf dependency (#306) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #305 --- setup.py | 1 + testing/constraints-3.6.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/setup.py b/setup.py index abd5cef95..c7410601e 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", "six >=1.13.0,< 2.0.0dev", + "protobuf >= 3.12.0", ] extras = { "bqstorage": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 798804941..cea0ed84e 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -8,6 +8,7 @@ libcst==0.2.5 llvmlite==0.34.0 # pandas 0.23.0 is the first version to work with pyarrow to_pandas. pandas==0.23.0 +protobuf == 3.12.0 proto-plus==1.10.0 pyarrow==1.0.0 python-snappy==0.5.4 From 29dd573729102606b6fb3119602faafeb6aa81e7 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 16 Oct 2020 19:04:02 -0400 Subject: [PATCH 027/341] tests: split out snippets builds (#219) @tmatsuo Emulating PR #207. I don't know if I'm missing anything: e.g., I don't quite understand what the `split_system_tests=True` does in the `synth.py` there. Toward #191 --- .kokoro/presubmit/presubmit.cfg | 4 ++++ .kokoro/presubmit/snippets-2.7.cfg | 7 +++++++ .kokoro/presubmit/snippets-3.8.cfg | 7 +++++++ noxfile.py | 10 +++++++--- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 .kokoro/presubmit/snippets-2.7.cfg create mode 100644 .kokoro/presubmit/snippets-3.8.cfg diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index b158096f0..17d071cae 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -5,3 +5,7 @@ env_vars: { key: "RUN_SYSTEM_TESTS" value: "false" } +env_vars: { + key: "RUN_SNIPPETS_TESTS" + value: "false" +} diff --git a/.kokoro/presubmit/snippets-2.7.cfg b/.kokoro/presubmit/snippets-2.7.cfg new file mode 100644 index 000000000..3bd6134d2 --- /dev/null +++ b/.kokoro/presubmit/snippets-2.7.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "snippets-2.7" +} diff --git a/.kokoro/presubmit/snippets-3.8.cfg b/.kokoro/presubmit/snippets-3.8.cfg new file mode 100644 index 000000000..840d9e716 --- /dev/null +++ b/.kokoro/presubmit/snippets-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "snippets-3.8" +} diff --git a/noxfile.py b/noxfile.py index db1dcffde..441782583 100644 --- a/noxfile.py +++ b/noxfile.py @@ -112,14 +112,18 @@ def system(session): def snippets(session): """Run the snippets test suite.""" - constraints_path = str( - CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" - ) + # Check the value of `RUN_SNIPPETS_TESTS` env var. It defaults to true. + if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": + session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") # Sanity check: Only run snippets tests if the environment variable is set. if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): session.skip("Credentials must be set via environment variable.") + constraints_path = str( + CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" + ) + # Install all test dependencies, then install local packages in place. session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path) session.install("google-cloud-storage", "-c", constraints_path) From b0dd892176e31ac25fddd15554b5bfa054299d4d Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 19 Oct 2020 09:45:12 -0400 Subject: [PATCH 028/341] feat: add timeout paramter to load_table_from_file and it dependent methods (#327) --- google/cloud/bigquery/client.py | 71 +++++++++++++++++++++++++++------ tests/unit/test_client.py | 45 +++++++++++++++------ 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b7e082daa..cce393d6c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1591,7 +1591,7 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY): + def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1599,6 +1599,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): Keyword Arguments: retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: Union[ \ @@ -1617,7 +1620,11 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): destination = _get_sub_prop(job_config, ["load", "destinationTable"]) source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) return self.load_table_from_uri( - source_uris, destination, job_config=load_job_config, retry=retry + source_uris, + destination, + job_config=load_job_config, + retry=retry, + timeout=timeout, ) elif "copy" in job_config: copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr( @@ -1633,7 +1640,11 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): table_ref = TableReference.from_api_repr(source_config) sources.append(table_ref) return self.copy_table( - sources, destination, job_config=copy_job_config, retry=retry + sources, + destination, + job_config=copy_job_config, + retry=retry, + timeout=timeout, ) elif "extract" in job_config: extract_job_config = google.cloud.bigquery.job.ExtractJobConfig.from_api_repr( @@ -1650,6 +1661,7 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): destination_uris, job_config=extract_job_config, retry=retry, + timeout=timeout, source_type=source_type, ) elif "query" in job_config: @@ -1659,7 +1671,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY): copy_config ) query = _get_sub_prop(copy_config, ["query", "query"]) - return self.query(query, job_config=query_job_config, retry=retry) + return self.query( + query, job_config=query_job_config, retry=retry, timeout=timeout + ) else: raise TypeError("Invalid job configuration received.") @@ -1981,6 +1995,7 @@ def load_table_from_file( location=None, project=None, job_config=None, + timeout=None, ): """Upload the contents of this table from a file-like object. @@ -2020,6 +2035,9 @@ def load_table_from_file( to the client's project. job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): Extra configuration options for the job. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2058,11 +2076,11 @@ def load_table_from_file( try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( - file_obj, job_resource, num_retries + file_obj, job_resource, num_retries, timeout ) else: response = self._do_multipart_upload( - file_obj, job_resource, size, num_retries + file_obj, job_resource, size, num_retries, timeout ) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) @@ -2080,6 +2098,7 @@ def load_table_from_dataframe( project=None, job_config=None, parquet_compression="snappy", + timeout=None, ): """Upload the contents of a table from a pandas DataFrame. @@ -2143,6 +2162,9 @@ def load_table_from_dataframe( passed as the ``compression`` argument to the underlying ``DataFrame.to_parquet()`` method. https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2249,6 +2271,7 @@ def load_table_from_dataframe( location=location, project=project, job_config=job_config, + timeout=timeout, ) finally: @@ -2264,6 +2287,7 @@ def load_table_from_json( location=None, project=None, job_config=None, + timeout=None, ): """Upload the contents of a table from a JSON string or dict. @@ -2313,6 +2337,9 @@ def load_table_from_json( Extra configuration options for the job. The ``source_format`` setting is always set to :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. Returns: google.cloud.bigquery.job.LoadJob: A new load job. @@ -2357,9 +2384,10 @@ def load_table_from_json( location=location, project=project, job_config=job_config, + timeout=timeout, ) - def _do_resumable_upload(self, stream, metadata, num_retries): + def _do_resumable_upload(self, stream, metadata, num_retries, timeout): """Perform a resumable upload. Args: @@ -2371,13 +2399,17 @@ def _do_resumable_upload(self, stream, metadata, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: requests.Response: The "200 OK" response object returned after the final chunk is uploaded. """ upload, transport = self._initiate_resumable_upload( - stream, metadata, num_retries + stream, metadata, num_retries, timeout ) while not upload.finished: @@ -2385,7 +2417,7 @@ def _do_resumable_upload(self, stream, metadata, num_retries): return response - def _initiate_resumable_upload(self, stream, metadata, num_retries): + def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): """Initiate a resumable upload. Args: @@ -2397,6 +2429,10 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: Tuple: Pair of @@ -2419,12 +2455,17 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries): ) upload.initiate( - transport, stream, metadata, _GENERIC_CONTENT_TYPE, stream_final=False + transport, + stream, + metadata, + _GENERIC_CONTENT_TYPE, + stream_final=False, + timeout=timeout, ) return upload, transport - def _do_multipart_upload(self, stream, metadata, size, num_retries): + def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): """Perform a multipart upload. Args: @@ -2441,6 +2482,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): Number of upload retries. (Deprecated: This argument will be removed in a future release.) + timeout (float): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + Returns: requests.Response: The "200 OK" response object returned after the multipart @@ -2466,7 +2511,9 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries): max_retries=num_retries ) - response = upload.transmit(self._http, data, metadata, _GENERIC_CONTENT_TYPE) + response = upload.transmit( + self._http, data, metadata, _GENERIC_CONTENT_TYPE, timeout=timeout + ) return response diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bc2658961..2001ad42b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4425,7 +4425,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries + stream, metadata, num_retries, None ) # Check the returned values. @@ -4492,7 +4492,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() size = len(data) - response = client._do_multipart_upload(stream, metadata, size, num_retries) + response = client._do_multipart_upload( + stream, metadata, size, num_retries, None + ) # Check the mocks and the returned value. self.assertIs(response, fake_transport.request.return_value) @@ -7251,7 +7253,7 @@ def test_load_table_from_file_resumable(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None ) # the original config object should not have been modified @@ -7280,7 +7282,7 @@ def test_load_table_from_file_w_explicit_project(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES + file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_w_client_location(self): @@ -7310,7 +7312,7 @@ def test_load_table_from_file_w_client_location(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES + file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_resumable_metadata(self): @@ -7368,7 +7370,7 @@ def test_load_table_from_file_resumable_metadata(self): ) do_upload.assert_called_once_with( - file_obj, expected_config, _DEFAULT_NUM_RETRIES + file_obj, expected_config, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_multipart(self): @@ -7392,7 +7394,11 @@ def test_load_table_from_file_multipart(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES + file_obj, + self.EXPECTED_CONFIGURATION, + file_obj_size, + _DEFAULT_NUM_RETRIES, + None, ) def test_load_table_from_file_with_retries(self): @@ -7413,7 +7419,7 @@ def test_load_table_from_file_with_retries(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, num_retries + file_obj, self.EXPECTED_CONFIGURATION, num_retries, None ) def test_load_table_from_file_with_rewind(self): @@ -7446,7 +7452,7 @@ def test_load_table_from_file_with_readable_gzip(self): ) do_upload.assert_called_once_with( - gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES + gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None ) def test_load_table_from_file_with_writable_gzip(self): @@ -7539,6 +7545,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, + timeout=None, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7583,6 +7590,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7636,6 +7644,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7691,6 +7700,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7784,6 +7794,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7844,6 +7855,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7890,6 +7902,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, + timeout=None, ) @unittest.skipIf( @@ -7931,6 +7944,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7978,6 +7992,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8039,6 +8054,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8113,6 +8129,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8207,6 +8224,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) assert warned # there should be at least one warning @@ -8320,6 +8338,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8373,6 +8392,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8425,6 +8445,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, + timeout=None, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -8499,7 +8520,7 @@ def test__do_resumable_upload(self): client = self._make_client(transport) result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None + file_obj, self.EXPECTED_CONFIGURATION, None, None ) content = result.content.decode("utf-8") @@ -8522,7 +8543,7 @@ def test__do_multipart_upload(self): file_obj_len = len(file_obj.getvalue()) client._do_multipart_upload( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None + file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None ) # Verify that configuration data was passed in with the initial @@ -8550,7 +8571,7 @@ def test__do_multipart_upload_wrong_size(self): file_obj_len = len(file_obj.getvalue()) with pytest.raises(ValueError): - client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None) + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) def test_schema_from_json_with_file_path(self): from google.cloud.bigquery.schema import SchemaField From 502a0926018abf058cb84bd18043c25eba15a2cc Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 19 Oct 2020 07:51:53 -0700 Subject: [PATCH 029/341] feat: add support for listing arima, automl, boosted tree, DNN, and matrix factorization models (#328) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * chore: update proto definitions for bigquery/v2 to support BQML statistics PiperOrigin-RevId: 337113354 Source-Author: Google APIs Source-Date: Wed Oct 14 10:04:20 2020 -0700 Source-Repo: googleapis/googleapis Source-Sha: 215c12ade72d9d9616457d9b8b2f8a37f38e79f3 Source-Link: https://github.com/googleapis/googleapis/commit/215c12ade72d9d9616457d9b8b2f8a37f38e79f3 * fix: manually revert `type` to `type_` breaking change This should allow us to merge the fixes for `list_models` and avoid a breaking change until `proto-plus` becomes acceptable for our use. * feat: add BIGNUMERIC to data type enums Co-authored-by: Tim Swast --- google/cloud/bigquery/enums.py | 2 + google/cloud/bigquery_v2/__init__.py | 2 + .../bigquery_v2/proto/encryption_config.proto | 3 +- google/cloud/bigquery_v2/proto/model.proto | 576 ++++++++++++++++- .../bigquery_v2/proto/model_reference.proto | 3 +- .../bigquery_v2/proto/standard_sql.proto | 6 +- .../bigquery_v2/proto/table_reference.proto | 39 ++ google/cloud/bigquery_v2/types/__init__.py | 2 + google/cloud/bigquery_v2/types/model.py | 603 +++++++++++++++++- .../cloud/bigquery_v2/types/standard_sql.py | 1 + .../bigquery_v2/types/table_reference.py | 51 ++ synth.metadata | 9 +- 12 files changed, 1282 insertions(+), 15 deletions(-) create mode 100644 google/cloud/bigquery_v2/proto/table_reference.proto create mode 100644 google/cloud/bigquery_v2/types/table_reference.py diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 3247372e3..eb33e4276 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -33,6 +33,7 @@ "DATETIME", "GEOGRAPHY", "NUMERIC", + "BIGNUMERIC", ) ) @@ -81,6 +82,7 @@ class SqlTypeNames(str, enum.Enum): FLOAT = "FLOAT" FLOAT64 = "FLOAT" NUMERIC = "NUMERIC" + BIGNUMERIC = "BIGNUMERIC" BOOLEAN = "BOOLEAN" BOOL = "BOOLEAN" GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index c1989c3b0..ebcc26bef 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -27,6 +27,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.table_reference import TableReference __all__ = ( @@ -41,4 +42,5 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "TableReference", ) diff --git a/google/cloud/bigquery_v2/proto/encryption_config.proto b/google/cloud/bigquery_v2/proto/encryption_config.proto index 54445f0fa..1c0512a17 100644 --- a/google/cloud/bigquery_v2/proto/encryption_config.proto +++ b/google/cloud/bigquery_v2/proto/encryption_config.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; diff --git a/google/cloud/bigquery_v2/proto/model.proto b/google/cloud/bigquery_v2/proto/model.proto index 13d980774..2d400dddd 100644 --- a/google/cloud/bigquery_v2/proto/model.proto +++ b/google/cloud/bigquery_v2/proto/model.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; @@ -22,6 +21,7 @@ import "google/api/field_behavior.proto"; import "google/cloud/bigquery/v2/encryption_config.proto"; import "google/cloud/bigquery/v2/model_reference.proto"; import "google/cloud/bigquery/v2/standard_sql.proto"; +import "google/cloud/bigquery/v2/table_reference.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; import "google/protobuf/wrappers.proto"; @@ -62,6 +62,32 @@ service ModelService { } message Model { + message SeasonalPeriod { + enum SeasonalPeriodType { + SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; + + // No seasonality + NO_SEASONALITY = 1; + + // Daily period, 24 hours. + DAILY = 2; + + // Weekly period, 7 days. + WEEKLY = 3; + + // Monthly period, 30 days or irregular. + MONTHLY = 4; + + // Quarterly period, 90 days or irregular. + QUARTERLY = 5; + + // Yearly period, 365 days or irregular. + YEARLY = 6; + } + + + } + message KmeansEnums { // Indicates the method used to initialize the centroids for KMeans // clustering algorithm. @@ -74,6 +100,9 @@ message Model { // Initializes the centroids using data specified in // kmeans_initialization_column. CUSTOM = 2; + + // Initializes with kmeans++. + KMEANS_PLUS_PLUS = 3; } @@ -280,6 +309,73 @@ message Model { repeated Cluster clusters = 3; } + // Evaluation metrics used by weighted-ALS models specified by + // feedback_type=implicit. + message RankingMetrics { + // Calculates a precision per user for all the items by ranking them and + // then averages all the precisions across all the users. + google.protobuf.DoubleValue mean_average_precision = 1; + + // Similar to the mean squared error computed in regression and explicit + // recommendation models except instead of computing the rating directly, + // the output from evaluate is computed against a preference which is 1 or 0 + // depending on if the rating exists or not. + google.protobuf.DoubleValue mean_squared_error = 2; + + // A metric to determine the goodness of a ranking calculated from the + // predicted confidence by comparing it to an ideal rank measured by the + // original ratings. + google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; + + // Determines the goodness of a ranking by computing the percentile rank + // from the predicted confidence and dividing it by the original rank. + google.protobuf.DoubleValue average_rank = 4; + } + + // Model evaluation metrics for ARIMA forecasting models. + message ArimaForecastingMetrics { + // Model evaluation metrics for a single ARIMA forecasting model. + message ArimaSingleModelForecastingMetrics { + // Non-seasonal order. + ArimaOrder non_seasonal_order = 1; + + // Arima fitting metrics. + ArimaFittingMetrics arima_fitting_metrics = 2; + + // Is arima model fitted with drift or not. It is always false when d + // is not 1. + bool has_drift = 3; + + // The id to indicate different time series. + string time_series_id = 4; + + // Seasonal periods. Repeated because multiple periods are supported + // for one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; + } + + // Non-seasonal order. + repeated ArimaOrder non_seasonal_order = 1; + + // Arima model fitting metrics. + repeated ArimaFittingMetrics arima_fitting_metrics = 2; + + // Seasonal periods. Repeated because multiple periods are supported for one + // time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3; + + // Whether Arima model fitted with drift or not. It is always false when d + // is not 1. + repeated bool has_drift = 4; + + // Id to differentiate different time series for the large-scale case. + repeated string time_series_id = 5; + + // Repeated as there can be many metric sets (one for each model) in + // auto-arima and the large-scale case. + repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; + } + // Evaluation metrics of a model. These are either computed on all training // data or just the eval data based on whether eval data was used during // training. These are not present for imported models. @@ -297,7 +393,71 @@ message Model { // Populated for clustering models. ClusteringMetrics clustering_metrics = 4; + + // Populated for implicit feedback type matrix factorization models. + RankingMetrics ranking_metrics = 5; + + // Populated for ARIMA models. + ArimaForecastingMetrics arima_forecasting_metrics = 6; + } + } + + // Data split result. This contains references to the training and evaluation + // data tables that were used to train the model. + message DataSplitResult { + // Table reference of the training data after split. + TableReference training_table = 1; + + // Table reference of the evaluation data after split. + TableReference evaluation_table = 2; + } + + // Arima order, can be used for both non-seasonal and seasonal parts. + message ArimaOrder { + // Order of the autoregressive part. + int64 p = 1; + + // Order of the differencing part. + int64 d = 2; + + // Order of the moving-average part. + int64 q = 3; + } + + // ARIMA model fitting metrics. + message ArimaFittingMetrics { + // Log-likelihood. + double log_likelihood = 1; + + // AIC. + double aic = 2; + + // Variance. + double variance = 3; + } + + // Global explanations containing the top most important features + // after training. + message GlobalExplanation { + // Explanation for a single feature. + message Explanation { + // Full name of the feature. For non-numerical features, will be + // formatted like .. Overall size of + // feature name will always be truncated to first 120 characters. + string feature_name = 1; + + // Attribution of feature. + google.protobuf.DoubleValue attribution = 2; } + + // A list of the top global explanations. Sorted by absolute value of + // attribution in descending order. + repeated Explanation explanations = 1; + + // Class label for this set of global explanations. Will be empty/null for + // binary logistic and linear regression models. Sorted alphabetically in + // descending order. + string class_label = 2; } // Information about a single training query run for the model. @@ -367,6 +527,12 @@ message Model { // training data. Only applicable for classification models. map label_class_weights = 17; + // User column specified for matrix factorization models. + string user_column = 18; + + // Item column specified for matrix factorization models. + string item_column = 19; + // Distance type for clustering models. DistanceType distance_type = 20; @@ -380,12 +546,83 @@ message Model { // Optimization strategy for training linear regression models. OptimizationStrategy optimization_strategy = 23; + // Hidden units for dnn models. + repeated int64 hidden_units = 24; + + // Batch size for dnn models. + int64 batch_size = 25; + + // Dropout probability for dnn models. + google.protobuf.DoubleValue dropout = 26; + + // Maximum depth of a tree for boosted tree models. + int64 max_tree_depth = 27; + + // Subsample fraction of the training data to grow tree to prevent + // overfitting for boosted tree models. + double subsample = 28; + + // Minimum split loss for boosted tree models. + google.protobuf.DoubleValue min_split_loss = 29; + + // Num factors specified for matrix factorization models. + int64 num_factors = 30; + + // Feedback type that specifies which algorithm to run for matrix + // factorization. + FeedbackType feedback_type = 31; + + // Hyperparameter for matrix factoration when implicit feedback type is + // specified. + google.protobuf.DoubleValue wals_alpha = 32; + // The method used to initialize the centroids for kmeans algorithm. KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; // The column used to provide the initial centroids for kmeans algorithm // when kmeans_initialization_method is CUSTOM. string kmeans_initialization_column = 34; + + // Column to be designated as time series timestamp for ARIMA model. + string time_series_timestamp_column = 35; + + // Column to be designated as time series data for ARIMA model. + string time_series_data_column = 36; + + // Whether to enable auto ARIMA or not. + bool auto_arima = 37; + + // A specification of the non-seasonal part of the ARIMA model: the three + // components (p, d, q) are the AR order, the degree of differencing, and + // the MA order. + ArimaOrder non_seasonal_order = 38; + + // The data frequency of a time series. + DataFrequency data_frequency = 39; + + // Include drift when fitting an ARIMA model. + bool include_drift = 41; + + // The geographical region based on which the holidays are considered in + // time series modeling. If a valid value is specified, then holiday + // effects modeling is enabled. + HolidayRegion holiday_region = 42; + + // The id column that will be used to indicate different time series to + // forecast in parallel. + string time_series_id_column = 43; + + // The number of periods ahead that need to be forecasted. + int64 horizon = 44; + + // Whether to preserve the input structs in output feature names. + // Suppose there is a struct A with field b. + // When false (default), the output feature name is A_b. + // When true, the output feature name is A.b. + bool preserve_input_structs = 45; + + // The max value of non-seasonal p and q. + int64 auto_arima_max_order = 46; } // Information about a single iteration of the training run. @@ -403,6 +640,53 @@ message Model { google.protobuf.Int64Value cluster_size = 3; } + // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier + // refactoring if we want to use model-specific iteration results. + message ArimaResult { + // Arima coefficients. + message ArimaCoefficients { + // Auto-regressive coefficients, an array of double. + repeated double auto_regressive_coefficients = 1; + + // Moving-average coefficients, an array of double. + repeated double moving_average_coefficients = 2; + + // Intercept coefficient, just a double not an array. + double intercept_coefficient = 3; + } + + // Arima model information. + message ArimaModelInfo { + // Non-seasonal order. + ArimaOrder non_seasonal_order = 1; + + // Arima coefficients. + ArimaCoefficients arima_coefficients = 2; + + // Arima fitting metrics. + ArimaFittingMetrics arima_fitting_metrics = 3; + + // Whether Arima model fitted with drift or not. It is always false + // when d is not 1. + bool has_drift = 4; + + // The id to indicate different time series. + string time_series_id = 5; + + // Seasonal periods. Repeated because multiple periods are supported + // for one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; + } + + // This message is repeated because there are multiple arima models + // fitted in auto-arima. For non-auto-arima model, its size is one. + repeated ArimaModelInfo arima_model_info = 1; + + // Seasonal periods. Repeated because multiple periods are supported for + // one time series. + repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; + } + // Index of the iteration, 0 based. google.protobuf.Int32Value index = 1; @@ -420,6 +704,8 @@ message Model { // Information about top clusters for clustering models. repeated ClusterInfo cluster_infos = 8; + + ArimaResult arima_result = 9; } // Options that were used for this training run, includes @@ -435,6 +721,15 @@ message Model { // The evaluation metrics over training/eval data that were computed at the // end of training. EvaluationMetrics evaluation_metrics = 7; + + // Data split result of the training run. Only set when the input data is + // actually split. + DataSplitResult data_split_result = 9; + + // Global explanations for important features of the model. For multi-class + // models, there is one entry for each label class. For other models, there + // is only one entry in the list. + repeated GlobalExplanation global_explanations = 10; } // Indicates the type of the Model. @@ -450,8 +745,32 @@ message Model { // K-means clustering model. KMEANS = 3; + // Matrix factorization model. + MATRIX_FACTORIZATION = 4; + + // [Beta] DNN classifier model. + DNN_CLASSIFIER = 5; + // [Beta] An imported TensorFlow model. TENSORFLOW = 6; + + // [Beta] DNN regressor model. + DNN_REGRESSOR = 7; + + // [Beta] Boosted tree regressor model. + BOOSTED_TREE_REGRESSOR = 9; + + // [Beta] Boosted tree classifier model. + BOOSTED_TREE_CLASSIFIER = 10; + + // [Beta] ARIMA model. + ARIMA = 11; + + // [Beta] AutoML Tables regression model. + AUTOML_REGRESSOR = 12; + + // [Beta] AutoML Tables classification model. + AUTOML_CLASSIFIER = 13; } // Loss metric to evaluate model training performance. @@ -497,6 +816,243 @@ message Model { AUTO_SPLIT = 5; } + // Type of supported data frequency for time series forecasting models. + enum DataFrequency { + DATA_FREQUENCY_UNSPECIFIED = 0; + + // Automatically inferred from timestamps. + AUTO_FREQUENCY = 1; + + // Yearly data. + YEARLY = 2; + + // Quarterly data. + QUARTERLY = 3; + + // Monthly data. + MONTHLY = 4; + + // Weekly data. + WEEKLY = 5; + + // Daily data. + DAILY = 6; + + // Hourly data. + HOURLY = 7; + } + + // Type of supported holiday regions for time series forecasting models. + enum HolidayRegion { + // Holiday region unspecified. + HOLIDAY_REGION_UNSPECIFIED = 0; + + // Global. + GLOBAL = 1; + + // North America. + NA = 2; + + // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New + // Zealand. + JAPAC = 3; + + // Europe, the Middle East and Africa. + EMEA = 4; + + // Latin America and the Caribbean. + LAC = 5; + + // United Arab Emirates + AE = 6; + + // Argentina + AR = 7; + + // Austria + AT = 8; + + // Australia + AU = 9; + + // Belgium + BE = 10; + + // Brazil + BR = 11; + + // Canada + CA = 12; + + // Switzerland + CH = 13; + + // Chile + CL = 14; + + // China + CN = 15; + + // Colombia + CO = 16; + + // Czechoslovakia + CS = 17; + + // Czech Republic + CZ = 18; + + // Germany + DE = 19; + + // Denmark + DK = 20; + + // Algeria + DZ = 21; + + // Ecuador + EC = 22; + + // Estonia + EE = 23; + + // Egypt + EG = 24; + + // Spain + ES = 25; + + // Finland + FI = 26; + + // France + FR = 27; + + // Great Britain (United Kingdom) + GB = 28; + + // Greece + GR = 29; + + // Hong Kong + HK = 30; + + // Hungary + HU = 31; + + // Indonesia + ID = 32; + + // Ireland + IE = 33; + + // Israel + IL = 34; + + // India + IN = 35; + + // Iran + IR = 36; + + // Italy + IT = 37; + + // Japan + JP = 38; + + // Korea (South) + KR = 39; + + // Latvia + LV = 40; + + // Morocco + MA = 41; + + // Mexico + MX = 42; + + // Malaysia + MY = 43; + + // Nigeria + NG = 44; + + // Netherlands + NL = 45; + + // Norway + NO = 46; + + // New Zealand + NZ = 47; + + // Peru + PE = 48; + + // Philippines + PH = 49; + + // Pakistan + PK = 50; + + // Poland + PL = 51; + + // Portugal + PT = 52; + + // Romania + RO = 53; + + // Serbia + RS = 54; + + // Russian Federation + RU = 55; + + // Saudi Arabia + SA = 56; + + // Sweden + SE = 57; + + // Singapore + SG = 58; + + // Slovenia + SI = 59; + + // Slovakia + SK = 60; + + // Thailand + TH = 61; + + // Turkey + TR = 62; + + // Taiwan + TW = 63; + + // Ukraine + UA = 64; + + // United States + US = 65; + + // Venezuela + VE = 66; + + // Viet Nam + VN = 67; + + // South Africa + ZA = 68; + } + // Indicates the learning rate optimization strategy to use. enum LearnRateStrategy { LEARN_RATE_STRATEGY_UNSPECIFIED = 0; @@ -519,6 +1075,17 @@ message Model { NORMAL_EQUATION = 2; } + // Indicates the training algorithm to use for matrix factorization models. + enum FeedbackType { + FEEDBACK_TYPE_UNSPECIFIED = 0; + + // Use weighted-als for implicit feedback problems. + IMPLICIT = 1; + + // Use nonweighted-als for explicit feedback problems. + EXPLICIT = 2; + } + // Output only. A hash of this resource. string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; @@ -558,8 +1125,9 @@ message Model { // Custom encryption configuration (e.g., Cloud KMS keys). This shows the // encryption configuration of the model data while stored in BigQuery - // storage. - google.cloud.bigquery.v2.EncryptionConfiguration encryption_configuration = 17; + // storage. This field can be used with PatchModel to update encryption key + // for an already encrypted model. + EncryptionConfiguration encryption_configuration = 17; // Output only. Type of the model resource. ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; diff --git a/google/cloud/bigquery_v2/proto/model_reference.proto b/google/cloud/bigquery_v2/proto/model_reference.proto index fadd17514..c3d1a49a8 100644 --- a/google/cloud/bigquery_v2/proto/model_reference.proto +++ b/google/cloud/bigquery_v2/proto/model_reference.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; diff --git a/google/cloud/bigquery_v2/proto/standard_sql.proto b/google/cloud/bigquery_v2/proto/standard_sql.proto index ff69dfc4e..1514eccbb 100644 --- a/google/cloud/bigquery_v2/proto/standard_sql.proto +++ b/google/cloud/bigquery_v2/proto/standard_sql.proto @@ -1,4 +1,4 @@ -// Copyright 2019 Google LLC. +// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// syntax = "proto3"; @@ -73,6 +72,9 @@ message StandardSqlDataType { // Encoded as a decimal string. NUMERIC = 23; + // Encoded as a decimal string. + BIGNUMERIC = 24; + // Encoded as a list with types matching Type.array_type. ARRAY = 16; diff --git a/google/cloud/bigquery_v2/proto/table_reference.proto b/google/cloud/bigquery_v2/proto/table_reference.proto new file mode 100644 index 000000000..ba02f80c4 --- /dev/null +++ b/google/cloud/bigquery_v2/proto/table_reference.proto @@ -0,0 +1,39 @@ +// Copyright 2020 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.bigquery.v2; + +import "google/api/field_behavior.proto"; +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; +option java_outer_classname = "TableReferenceProto"; +option java_package = "com.google.cloud.bigquery.v2"; + +message TableReference { + // Required. The ID of the project containing this table. + string project_id = 1 [(google.api.field_behavior) = REQUIRED]; + + // Required. The ID of the dataset containing this table. + string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; + + // Required. The ID of the table. The ID must contain only + // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum + // length is 1,024 characters. Certain operations allow + // suffixing of the table ID with a partition decorator, such as + // `sample_table$20190123`. + string table_id = 3 [(google.api.field_behavior) = REQUIRED]; +} diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index a8839c74e..1e354641a 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -22,6 +22,7 @@ StandardSqlField, StandardSqlStructType, ) +from .table_reference import TableReference from .model import ( Model, GetModelRequest, @@ -38,6 +39,7 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "TableReference", "Model", "GetModelRequest", "PatchModelRequest", diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index a00720d48..3a7bbf43b 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -21,6 +21,7 @@ from google.cloud.bigquery_v2.types import encryption_config from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference from google.cloud.bigquery_v2.types import standard_sql +from google.cloud.bigquery_v2.types import table_reference from google.protobuf import timestamp_pb2 as timestamp # type: ignore from google.protobuf import wrappers_pb2 as wrappers # type: ignore @@ -84,7 +85,9 @@ class Model(proto.Message): Custom encryption configuration (e.g., Cloud KMS keys). This shows the encryption configuration of the model data while stored in - BigQuery storage. + BigQuery storage. This field can be used with + PatchModel to update encryption key for an + already encrypted model. model_type (~.gcb_model.Model.ModelType): Output only. Type of the model resource. training_runs (Sequence[~.gcb_model.Model.TrainingRun]): @@ -105,7 +108,15 @@ class ModelType(proto.Enum): LINEAR_REGRESSION = 1 LOGISTIC_REGRESSION = 2 KMEANS = 3 + MATRIX_FACTORIZATION = 4 + DNN_CLASSIFIER = 5 TENSORFLOW = 6 + DNN_REGRESSOR = 7 + BOOSTED_TREE_REGRESSOR = 9 + BOOSTED_TREE_CLASSIFIER = 10 + ARIMA = 11 + AUTOML_REGRESSOR = 12 + AUTOML_CLASSIFIER = 13 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -132,6 +143,93 @@ class DataSplitMethod(proto.Enum): NO_SPLIT = 4 AUTO_SPLIT = 5 + class DataFrequency(proto.Enum): + r"""Type of supported data frequency for time series forecasting + models. + """ + DATA_FREQUENCY_UNSPECIFIED = 0 + AUTO_FREQUENCY = 1 + YEARLY = 2 + QUARTERLY = 3 + MONTHLY = 4 + WEEKLY = 5 + DAILY = 6 + HOURLY = 7 + + class HolidayRegion(proto.Enum): + r"""Type of supported holiday regions for time series forecasting + models. + """ + HOLIDAY_REGION_UNSPECIFIED = 0 + GLOBAL = 1 + NA = 2 + JAPAC = 3 + EMEA = 4 + LAC = 5 + AE = 6 + AR = 7 + AT = 8 + AU = 9 + BE = 10 + BR = 11 + CA = 12 + CH = 13 + CL = 14 + CN = 15 + CO = 16 + CS = 17 + CZ = 18 + DE = 19 + DK = 20 + DZ = 21 + EC = 22 + EE = 23 + EG = 24 + ES = 25 + FI = 26 + FR = 27 + GB = 28 + GR = 29 + HK = 30 + HU = 31 + ID = 32 + IE = 33 + IL = 34 + IN = 35 + IR = 36 + IT = 37 + JP = 38 + KR = 39 + LV = 40 + MA = 41 + MX = 42 + MY = 43 + NG = 44 + NL = 45 + NO = 46 + NZ = 47 + PE = 48 + PH = 49 + PK = 50 + PL = 51 + PT = 52 + RO = 53 + RS = 54 + RU = 55 + SA = 56 + SE = 57 + SG = 58 + SI = 59 + SK = 60 + TH = 61 + TR = 62 + TW = 63 + UA = 64 + US = 65 + VE = 66 + VN = 67 + ZA = 68 + class LearnRateStrategy(proto.Enum): r"""Indicates the learning rate optimization strategy to use.""" LEARN_RATE_STRATEGY_UNSPECIFIED = 0 @@ -144,6 +242,27 @@ class OptimizationStrategy(proto.Enum): BATCH_GRADIENT_DESCENT = 1 NORMAL_EQUATION = 2 + class FeedbackType(proto.Enum): + r"""Indicates the training algorithm to use for matrix + factorization models. + """ + FEEDBACK_TYPE_UNSPECIFIED = 0 + IMPLICIT = 1 + EXPLICIT = 2 + + class SeasonalPeriod(proto.Message): + r"""""" + + class SeasonalPeriodType(proto.Enum): + r"""""" + SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0 + NO_SEASONALITY = 1 + DAILY = 2 + WEEKLY = 3 + MONTHLY = 4 + QUARTERLY = 5 + YEARLY = 6 + class KmeansEnums(proto.Message): r"""""" @@ -154,6 +273,7 @@ class KmeansInitializationMethod(proto.Enum): KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0 RANDOM = 1 CUSTOM = 2 + KMEANS_PLUS_PLUS = 3 class RegressionMetrics(proto.Message): r"""Evaluation metrics for regression and explicit feedback type @@ -529,6 +649,129 @@ class CategoryCount(proto.Message): proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", ) + class RankingMetrics(proto.Message): + r"""Evaluation metrics used by weighted-ALS models specified by + feedback_type=implicit. + + Attributes: + mean_average_precision (~.wrappers.DoubleValue): + Calculates a precision per user for all the + items by ranking them and then averages all the + precisions across all the users. + mean_squared_error (~.wrappers.DoubleValue): + Similar to the mean squared error computed in + regression and explicit recommendation models + except instead of computing the rating directly, + the output from evaluate is computed against a + preference which is 1 or 0 depending on if the + rating exists or not. + normalized_discounted_cumulative_gain (~.wrappers.DoubleValue): + A metric to determine the goodness of a + ranking calculated from the predicted confidence + by comparing it to an ideal rank measured by the + original ratings. + average_rank (~.wrappers.DoubleValue): + Determines the goodness of a ranking by + computing the percentile rank from the predicted + confidence and dividing it by the original rank. + """ + + mean_average_precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers.DoubleValue, + ) + + mean_squared_error = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + normalized_discounted_cumulative_gain = proto.Field( + proto.MESSAGE, number=3, message=wrappers.DoubleValue, + ) + + average_rank = proto.Field( + proto.MESSAGE, number=4, message=wrappers.DoubleValue, + ) + + class ArimaForecastingMetrics(proto.Message): + r"""Model evaluation metrics for ARIMA forecasting models. + + Attributes: + non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]): + Non-seasonal order. + arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]): + Arima model fitting metrics. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + has_drift (Sequence[bool]): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (Sequence[str]): + Id to differentiate different time series for + the large-scale case. + arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + Repeated as there can be many metric sets + (one for each model) in auto-arima and the + large-scale case. + """ + + class ArimaSingleModelForecastingMetrics(proto.Message): + r"""Model evaluation metrics for a single ARIMA forecasting + model. + + Attributes: + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + Non-seasonal order. + arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Is arima model fitted with drift or not. It + is always false when d is not 1. + time_series_id (str): + The id to indicate different time series. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + + has_drift = proto.Field(proto.BOOL, number=3) + + time_series_id = proto.Field(proto.STRING, number=4) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + non_seasonal_order = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_fitting_metrics = proto.RepeatedField( + proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", + ) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + has_drift = proto.RepeatedField(proto.BOOL, number=4) + + time_series_id = proto.RepeatedField(proto.STRING, number=5) + + arima_single_model_forecasting_metrics = proto.RepeatedField( + proto.MESSAGE, + number=6, + message="Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics", + ) + class EvaluationMetrics(proto.Message): r"""Evaluation metrics of a model. These are either computed on all training data or just the eval data based on whether eval @@ -547,6 +790,11 @@ class EvaluationMetrics(proto.Message): classification/classifier models. clustering_metrics (~.gcb_model.Model.ClusteringMetrics): Populated for clustering models. + ranking_metrics (~.gcb_model.Model.RankingMetrics): + Populated for implicit feedback type matrix + factorization models. + arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics): + Populated for ARIMA models. """ regression_metrics = proto.Field( @@ -571,6 +819,116 @@ class EvaluationMetrics(proto.Message): proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", ) + ranking_metrics = proto.Field( + proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", + ) + + arima_forecasting_metrics = proto.Field( + proto.MESSAGE, + number=6, + oneof="metrics", + message="Model.ArimaForecastingMetrics", + ) + + class DataSplitResult(proto.Message): + r"""Data split result. This contains references to the training + and evaluation data tables that were used to train the model. + + Attributes: + training_table (~.table_reference.TableReference): + Table reference of the training data after + split. + evaluation_table (~.table_reference.TableReference): + Table reference of the evaluation data after + split. + """ + + training_table = proto.Field( + proto.MESSAGE, number=1, message=table_reference.TableReference, + ) + + evaluation_table = proto.Field( + proto.MESSAGE, number=2, message=table_reference.TableReference, + ) + + class ArimaOrder(proto.Message): + r"""Arima order, can be used for both non-seasonal and seasonal + parts. + + Attributes: + p (int): + Order of the autoregressive part. + d (int): + Order of the differencing part. + q (int): + Order of the moving-average part. + """ + + p = proto.Field(proto.INT64, number=1) + + d = proto.Field(proto.INT64, number=2) + + q = proto.Field(proto.INT64, number=3) + + class ArimaFittingMetrics(proto.Message): + r"""ARIMA model fitting metrics. + + Attributes: + log_likelihood (float): + Log-likelihood. + aic (float): + AIC. + variance (float): + Variance. + """ + + log_likelihood = proto.Field(proto.DOUBLE, number=1) + + aic = proto.Field(proto.DOUBLE, number=2) + + variance = proto.Field(proto.DOUBLE, number=3) + + class GlobalExplanation(proto.Message): + r"""Global explanations containing the top most important + features after training. + + Attributes: + explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]): + A list of the top global explanations. Sorted + by absolute value of attribution in descending + order. + class_label (str): + Class label for this set of global + explanations. Will be empty/null for binary + logistic and linear regression models. Sorted + alphabetically in descending order. + """ + + class Explanation(proto.Message): + r"""Explanation for a single feature. + + Attributes: + feature_name (str): + Full name of the feature. For non-numerical features, will + be formatted like .. + Overall size of feature name will always be truncated to + first 120 characters. + attribution (~.wrappers.DoubleValue): + Attribution of feature. + """ + + feature_name = proto.Field(proto.STRING, number=1) + + attribution = proto.Field( + proto.MESSAGE, number=2, message=wrappers.DoubleValue, + ) + + explanations = proto.RepeatedField( + proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", + ) + + class_label = proto.Field(proto.STRING, number=2) + class TrainingRun(proto.Message): r"""Information about a single training query run for the model. @@ -587,6 +945,14 @@ class TrainingRun(proto.Message): evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): The evaluation metrics over training/eval data that were computed at the end of training. + data_split_result (~.gcb_model.Model.DataSplitResult): + Data split result of the training run. Only + set when the input data is actually split. + global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]): + Global explanations for important features of + the model. For multi-class models, there is one + entry for each label class. For other models, + there is only one entry in the list. """ class TrainingOptions(proto.Message): @@ -651,6 +1017,12 @@ class TrainingOptions(proto.Message): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. + user_column (str): + User column specified for matrix + factorization models. + item_column (str): + Item column specified for matrix + factorization models. distance_type (~.gcb_model.Model.DistanceType): Distance type for clustering models. num_clusters (int): @@ -661,12 +1033,71 @@ class TrainingOptions(proto.Message): optimization_strategy (~.gcb_model.Model.OptimizationStrategy): Optimization strategy for training linear regression models. + hidden_units (Sequence[int]): + Hidden units for dnn models. + batch_size (int): + Batch size for dnn models. + dropout (~.wrappers.DoubleValue): + Dropout probability for dnn models. + max_tree_depth (int): + Maximum depth of a tree for boosted tree + models. + subsample (float): + Subsample fraction of the training data to + grow tree to prevent overfitting for boosted + tree models. + min_split_loss (~.wrappers.DoubleValue): + Minimum split loss for boosted tree models. + num_factors (int): + Num factors specified for matrix + factorization models. + feedback_type (~.gcb_model.Model.FeedbackType): + Feedback type that specifies which algorithm + to run for matrix factorization. + wals_alpha (~.wrappers.DoubleValue): + Hyperparameter for matrix factoration when + implicit feedback type is specified. kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): The method used to initialize the centroids for kmeans algorithm. kmeans_initialization_column (str): The column used to provide the initial centroids for kmeans algorithm when kmeans_initialization_method is CUSTOM. + time_series_timestamp_column (str): + Column to be designated as time series + timestamp for ARIMA model. + time_series_data_column (str): + Column to be designated as time series data + for ARIMA model. + auto_arima (bool): + Whether to enable auto ARIMA or not. + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + A specification of the non-seasonal part of + the ARIMA model: the three components (p, d, q) + are the AR order, the degree of differencing, + and the MA order. + data_frequency (~.gcb_model.Model.DataFrequency): + The data frequency of a time series. + include_drift (bool): + Include drift when fitting an ARIMA model. + holiday_region (~.gcb_model.Model.HolidayRegion): + The geographical region based on which the + holidays are considered in time series modeling. + If a valid value is specified, then holiday + effects modeling is enabled. + time_series_id_column (str): + The id column that will be used to indicate + different time series to forecast in parallel. + horizon (int): + The number of periods ahead that need to be + forecasted. + preserve_input_structs (bool): + Whether to preserve the input structs in output feature + names. Suppose there is a struct A with field b. When false + (default), the output feature name is A_b. When true, the + output feature name is A.b. + auto_arima_max_order (int): + The max value of non-seasonal p and q. """ max_iterations = proto.Field(proto.INT64, number=1) @@ -713,6 +1144,10 @@ class TrainingOptions(proto.Message): label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) + user_column = proto.Field(proto.STRING, number=18) + + item_column = proto.Field(proto.STRING, number=19) + distance_type = proto.Field( proto.ENUM, number=20, enum="Model.DistanceType", ) @@ -725,6 +1160,32 @@ class TrainingOptions(proto.Message): proto.ENUM, number=23, enum="Model.OptimizationStrategy", ) + hidden_units = proto.RepeatedField(proto.INT64, number=24) + + batch_size = proto.Field(proto.INT64, number=25) + + dropout = proto.Field( + proto.MESSAGE, number=26, message=wrappers.DoubleValue, + ) + + max_tree_depth = proto.Field(proto.INT64, number=27) + + subsample = proto.Field(proto.DOUBLE, number=28) + + min_split_loss = proto.Field( + proto.MESSAGE, number=29, message=wrappers.DoubleValue, + ) + + num_factors = proto.Field(proto.INT64, number=30) + + feedback_type = proto.Field( + proto.ENUM, number=31, enum="Model.FeedbackType", + ) + + wals_alpha = proto.Field( + proto.MESSAGE, number=32, message=wrappers.DoubleValue, + ) + kmeans_initialization_method = proto.Field( proto.ENUM, number=33, @@ -733,6 +1194,34 @@ class TrainingOptions(proto.Message): kmeans_initialization_column = proto.Field(proto.STRING, number=34) + time_series_timestamp_column = proto.Field(proto.STRING, number=35) + + time_series_data_column = proto.Field(proto.STRING, number=36) + + auto_arima = proto.Field(proto.BOOL, number=37) + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=38, message="Model.ArimaOrder", + ) + + data_frequency = proto.Field( + proto.ENUM, number=39, enum="Model.DataFrequency", + ) + + include_drift = proto.Field(proto.BOOL, number=41) + + holiday_region = proto.Field( + proto.ENUM, number=42, enum="Model.HolidayRegion", + ) + + time_series_id_column = proto.Field(proto.STRING, number=43) + + horizon = proto.Field(proto.INT64, number=44) + + preserve_input_structs = proto.Field(proto.BOOL, number=45) + + auto_arima_max_order = proto.Field(proto.INT64, number=46) + class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -753,6 +1242,8 @@ class IterationResult(proto.Message): cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): Information about top clusters for clustering models. + arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult): + """ class ClusterInfo(proto.Message): @@ -779,6 +1270,102 @@ class ClusterInfo(proto.Message): proto.MESSAGE, number=3, message=wrappers.Int64Value, ) + class ArimaResult(proto.Message): + r"""(Auto-)arima fitting result. Wrap everything in ArimaResult + for easier refactoring if we want to use model-specific + iteration results. + + Attributes: + arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + This message is repeated because there are + multiple arima models fitted in auto-arima. For + non-auto-arima model, its size is one. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + class ArimaCoefficients(proto.Message): + r"""Arima coefficients. + + Attributes: + auto_regressive_coefficients (Sequence[float]): + Auto-regressive coefficients, an array of + double. + moving_average_coefficients (Sequence[float]): + Moving-average coefficients, an array of + double. + intercept_coefficient (float): + Intercept coefficient, just a double not an + array. + """ + + auto_regressive_coefficients = proto.RepeatedField( + proto.DOUBLE, number=1 + ) + + moving_average_coefficients = proto.RepeatedField( + proto.DOUBLE, number=2 + ) + + intercept_coefficient = proto.Field(proto.DOUBLE, number=3) + + class ArimaModelInfo(proto.Message): + r"""Arima model information. + + Attributes: + non_seasonal_order (~.gcb_model.Model.ArimaOrder): + Non-seasonal order. + arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + Arima coefficients. + arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + Arima fitting metrics. + has_drift (bool): + Whether Arima model fitted with drift or not. + It is always false when d is not 1. + time_series_id (str): + The id to indicate different time series. + seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + Seasonal periods. Repeated because multiple + periods are supported for one time series. + """ + + non_seasonal_order = proto.Field( + proto.MESSAGE, number=1, message="Model.ArimaOrder", + ) + + arima_coefficients = proto.Field( + proto.MESSAGE, + number=2, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", + ) + + arima_fitting_metrics = proto.Field( + proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", + ) + + has_drift = proto.Field(proto.BOOL, number=4) + + time_series_id = proto.Field(proto.STRING, number=5) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=6, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + + arima_model_info = proto.RepeatedField( + proto.MESSAGE, + number=1, + message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", + ) + + seasonal_periods = proto.RepeatedField( + proto.ENUM, + number=2, + enum="Model.SeasonalPeriod.SeasonalPeriodType", + ) + index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) duration_ms = proto.Field( @@ -801,6 +1388,12 @@ class ClusterInfo(proto.Message): message="Model.TrainingRun.IterationResult.ClusterInfo", ) + arima_result = proto.Field( + proto.MESSAGE, + number=9, + message="Model.TrainingRun.IterationResult.ArimaResult", + ) + training_options = proto.Field( proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", ) @@ -815,6 +1408,14 @@ class ClusterInfo(proto.Message): proto.MESSAGE, number=7, message="Model.EvaluationMetrics", ) + data_split_result = proto.Field( + proto.MESSAGE, number=9, message="Model.DataSplitResult", + ) + + global_explanations = proto.RepeatedField( + proto.MESSAGE, number=10, message="Model.GlobalExplanation", + ) + etag = proto.Field(proto.STRING, number=1) model_reference = proto.Field( diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 72f12f284..1a32a3c75 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -58,6 +58,7 @@ class TypeKind(proto.Enum): DATETIME = 21 GEOGRAPHY = 22 NUMERIC = 23 + BIGNUMERIC = 24 ARRAY = 16 STRUCT = 17 diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py new file mode 100644 index 000000000..d213e8bb6 --- /dev/null +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import proto # type: ignore + + +__protobuf__ = proto.module( + package="google.cloud.bigquery.v2", manifest={"TableReference",}, +) + + +class TableReference(proto.Message): + r""" + + Attributes: + project_id (str): + Required. The ID of the project containing + this table. + dataset_id (str): + Required. The ID of the dataset containing + this table. + table_id (str): + Required. The ID of the table. The ID must contain only + letters (a-z, A-Z), numbers (0-9), or underscores (_). The + maximum length is 1,024 characters. Certain operations allow + suffixing of the table ID with a partition decorator, such + as ``sample_table$20190123``. + """ + + project_id = proto.Field(proto.STRING, number=1) + + dataset_id = proto.Field(proto.STRING, number=2) + + table_id = proto.Field(proto.STRING, number=3) + + +__all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/synth.metadata b/synth.metadata index 008810630..db77e463d 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "31644d380b35a76a9147801a4b6b0271c246fd0c" + "sha": "5178b55682f5e264bfc082cde26acb1fdc953a18" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "c941026e5e3d600817a20e9ab4d4be03dff21a68", - "internalRef": "334645418" + "sha": "215c12ade72d9d9616457d9b8b2f8a37f38e79f3", + "internalRef": "337113354" } }, { @@ -101,18 +101,19 @@ "google/cloud/bigquery_v2/proto/model.proto", "google/cloud/bigquery_v2/proto/model_reference.proto", "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/table_reference.proto", "google/cloud/bigquery_v2/py.typed", "google/cloud/bigquery_v2/types/__init__.py", "google/cloud/bigquery_v2/types/encryption_config.py", "google/cloud/bigquery_v2/types/model.py", "google/cloud/bigquery_v2/types/model_reference.py", "google/cloud/bigquery_v2/types/standard_sql.py", + "google/cloud/bigquery_v2/types/table_reference.py", "mypy.ini", "renovate.json", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "scripts/decrypt-secrets.sh", - "scripts/fixup_bigquery_v2_keywords.py", "scripts/readme-gen/readme_gen.py", "scripts/readme-gen/templates/README.tmpl.rst", "scripts/readme-gen/templates/auth.tmpl.rst", From 82290c365e6b18e9d5c3a94a312f0326df9354bc Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 Oct 2020 15:04:03 +0000 Subject: [PATCH 030/341] chore: release 2.2.0 (#321) :robot: I have created a release \*beep\* \*boop\* --- ## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) ### Features * add method api_repr for table list item ([#299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) * add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) * add timeout paramter to load_table_from_file and it dependent methods ([#327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) * add to_api_repr method to Model ([#326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) * allow client options to be set in magics context ([#322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) ### Bug Fixes * make TimePartitioning repr evaluable ([#110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#109](https://www.github.com/googleapis/python-bigquery/issues/109) * use version.py instead of pkg_resources.get_distribution ([#307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) ### Performance Improvements * add size parameter for load table from dataframe and json methods ([#280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) ### Documentation * update clustering field docstrings ([#286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#285](https://www.github.com/googleapis/python-bigquery/issues/285) * update snippets samples to support version 2.0 ([#309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) ### Dependencies * add protobuf dependency ([#306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#305](https://www.github.com/googleapis/python-bigquery/issues/305) * require pyarrow for pandas support ([#314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#265](https://www.github.com/googleapis/python-bigquery/issues/265) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). --- CHANGELOG.md | 34 ++++++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad6c9551f..384704bbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,40 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) + + +### Features + +* add method api_repr for table list item ([#299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) +* add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) +* add timeout paramter to load_table_from_file and it dependent methods ([#327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) +* add to_api_repr method to Model ([#326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) +* allow client options to be set in magics context ([#322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) + + +### Bug Fixes + +* make TimePartitioning repr evaluable ([#110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#109](https://www.github.com/googleapis/python-bigquery/issues/109) +* use version.py instead of pkg_resources.get_distribution ([#307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) + + +### Performance Improvements + +* add size parameter for load table from dataframe and json methods ([#280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) + + +### Documentation + +* update clustering field docstrings ([#286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#285](https://www.github.com/googleapis/python-bigquery/issues/285) +* update snippets samples to support version 2.0 ([#309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) + + +### Dependencies + +* add protobuf dependency ([#306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#305](https://www.github.com/googleapis/python-bigquery/issues/305) +* require pyarrow for pandas support ([#314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#265](https://www.github.com/googleapis/python-bigquery/issues/265) + ## [2.1.0](https://www.github.com/googleapis/python-bigquery/compare/v2.0.0...v2.1.0) (2020-10-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8b5d3328c..bd0f8e5c7 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.1.0" +__version__ = "2.2.0" From 155bacc156f181384ca6dba699ab83d0398176d1 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 20 Oct 2020 15:07:55 -0400 Subject: [PATCH 031/341] fix: create_job method accepts dictionary arguments (#300) * fix: broken create_job method * fix: changes in unit tests * fix: fix sourceTable thing * fix: handle sourceTable passed in job resource * fix: remove delete destination table from query * fix: revert destination table for query --- google/cloud/bigquery/client.py | 12 ++++--- tests/unit/test_client.py | 57 ++++++++++++++++----------------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cce393d6c..e4b5b22ab 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -48,11 +48,11 @@ from google.cloud import exceptions from google.cloud.client import ClientWithProject +from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery._helpers import _verify_job_config_type -from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset @@ -1619,6 +1619,7 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): ) destination = _get_sub_prop(job_config, ["load", "destinationTable"]) source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) + destination = TableReference.from_api_repr(destination) return self.load_table_from_uri( source_uris, destination, @@ -1631,9 +1632,9 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) + destination = TableReference.from_api_repr(destination) sources = [] source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"]) - if source_configs is None: source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])] for source_config in source_configs: @@ -1651,10 +1652,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): job_config ) source = _get_sub_prop(job_config, ["extract", "sourceTable"]) - source_type = "Table" - if not source: + if source: + source_type = "Table" + source = TableReference.from_api_repr(source) + else: source = _get_sub_prop(job_config, ["extract", "sourceModel"]) source_type = "Model" + source = ModelReference.from_api_repr(source) destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) return self.extract_table( source, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2001ad42b..e507834f6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -3573,21 +3573,28 @@ def test_delete_table_w_not_found_ok_true(self): conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) - def _create_job_helper(self, job_config, client_method): + def _create_job_helper(self, job_config): + from google.cloud.bigquery import _helpers + creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - client._connection = make_connection() - rf1 = mock.Mock() - get_config_patch = mock.patch( - "google.cloud.bigquery.job._JobConfig.from_api_repr", return_value=rf1, - ) - load_patch = mock.patch(client_method, autospec=True) + RESOURCE = { + "jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY}, + "configuration": job_config, + } + conn = client._connection = make_connection(RESOURCE) + client.create_job(job_config=job_config) + if "query" in job_config: + _helpers._del_sub_prop(job_config, ["query", "destinationTable"]) - with load_patch as client_method, get_config_patch: - client.create_job(job_config=job_config) - client_method.assert_called_once() + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/%s/jobs" % self.PROJECT, + data=RESOURCE, + timeout=None, + ) def test_create_job_load_config(self): configuration = { @@ -3601,9 +3608,7 @@ def test_create_job_load_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.load_table_from_uri" - ) + self._create_job_helper(configuration) def test_create_job_copy_config(self): configuration = { @@ -3623,9 +3628,7 @@ def test_create_job_copy_config(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_copy_config_w_single_source(self): configuration = { @@ -3643,9 +3646,7 @@ def test_create_job_copy_config_w_single_source(self): } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.copy_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config(self): configuration = { @@ -3658,9 +3659,7 @@ def test_create_job_extract_config(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_extract_config_for_model(self): configuration = { @@ -3673,17 +3672,17 @@ def test_create_job_extract_config_for_model(self): "destinationUris": ["gs://test_bucket/dst_object*"], } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.extract_table", - ) + self._create_job_helper(configuration) def test_create_job_query_config(self): configuration = { - "query": {"query": "query", "destinationTable": {"tableId": "table_id"}} + "query": { + "query": "query", + "destinationTable": {"tableId": "table_id"}, + "useLegacySql": False, + } } - self._create_job_helper( - configuration, "google.cloud.bigquery.client.Client.query", - ) + self._create_job_helper(configuration) def test_create_job_query_config_w_rateLimitExceeded_error(self): from google.cloud.exceptions import Forbidden From fd082551f4018d6c31fa48922bd0c2e301411213 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Oct 2020 20:47:13 +0200 Subject: [PATCH 032/341] chore(deps): update dependency grpcio to v1.33.1 (#338) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7d001fa2f..544e92eb1 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.1 -grpcio==1.32.0 +grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From c9823d932205f128b673b05d6086ca783c85c354 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Oct 2020 10:04:34 -0500 Subject: [PATCH 033/341] test: make `_AsyncJob` tests mock at a lower layer (#340) This is intented to make the `_AsyncJob` tests more robust to changes in retry behavior. It also more explicitly tests the retry behavior by observing API calls rather than calls to certain methods. --- google/cloud/bigquery/client.py | 1 - google/cloud/bigquery/job.py | 7 +- tests/unit/test_job.py | 121 ++++++++++++++++++++++++-------- 3 files changed, 95 insertions(+), 34 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e4b5b22ab..57df9455e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -625,7 +625,6 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None def _call_api( self, retry, span_name=None, span_attributes=None, job_ref=None, **kwargs ): - call = functools.partial(self._connection.api_request, **kwargs) if retry: call = retry(call) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 766db1d42..6cb138acf 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -529,9 +529,8 @@ def state(self): Optional[str]: the state (None until set from the server). """ - status = self._properties.get("status") - if status is not None: - return status.get("state") + status = self._properties.get("status", {}) + return status.get("state") def _set_properties(self, api_response): """Update properties from resource in body of ``api_response`` @@ -588,7 +587,7 @@ def _check_resource_config(cls, resource): def to_api_repr(self): """Generate a resource for the job.""" - raise NotImplementedError("Abstract") + return copy.deepcopy(self._properties) _build_resource = to_api_repr # backward-compatibility alias diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 75212ae95..f577b08bd 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -20,6 +20,8 @@ import warnings import freezegun +from google.api_core import exceptions +import google.api_core.retry import mock import pytest import requests @@ -70,6 +72,12 @@ def _make_connection(*responses): return mock_conn +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + def _make_job_resource( creation_time_ms=1437767599006, started_time_ms=1437767600007, @@ -84,6 +92,7 @@ def _make_job_resource( user_email="bq-user@example.com", ): resource = { + "status": {"state": "PENDING"}, "configuration": {job_type: {}}, "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, @@ -97,9 +106,11 @@ def _make_job_resource( if started or ended: resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" if ended: resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" if job_type == "query": resource["configuration"]["query"]["destinationTable"] = { @@ -555,14 +566,14 @@ def test__check_resource_config_ok(self): def test__build_resource(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job._build_resource() + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test_to_api_repr(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - with self.assertRaises(NotImplementedError): - job.to_api_repr() + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID def test__begin_already(self): job = self._set_properties_job() @@ -965,43 +976,95 @@ def test_done_already(self): self.assertTrue(job.done()) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_default_wo_state(self, result): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - self.assertIs(job.result(), result.return_value) + self.assertIs(job.result(), job) - begin.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_w_retry_wo_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - begin = job._begin = mock.Mock() - retry = mock.Mock() + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=retry), result.return_value) + self.assertIs(job.result(retry=custom_retry), job) - begin.assert_called_once_with(retry=retry, timeout=None) - result.assert_called_once_with(timeout=None) + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) - @mock.patch("google.api_core.future.polling.PollingFuture.result") - def test_result_explicit_w_state(self, result): - client = _make_client(project=self.PROJECT) + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - begin = job._begin = mock.Mock() + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) timeout = 1 - self.assertIs(job.result(timeout=timeout), result.return_value) + self.assertIs(job.result(timeout=timeout), job) - begin.assert_not_called() - result.assert_called_once_with(timeout=timeout) + conn.api_request.assert_not_called() def test_cancelled_wo_error_result(self): client = _make_client(project=self.PROJECT) From dca2e4ca7c2ae183ac4bb60f653d425a43a86bea Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 27 Oct 2020 10:34:55 -0400 Subject: [PATCH 034/341] docs: add documents for QueryPlanEntry and QueryPlanEntryStep (#344) --- docs/reference.rst | 2 ++ google/cloud/bigquery/job.py | 8 ++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 21dd8e43d..3643831cb 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -59,6 +59,8 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.QueryPlanEntry + job.QueryPlanEntryStep job.QueryPriority job.SourceFormat job.WriteDisposition diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 6cb138acf..977d7a559 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -2844,7 +2844,7 @@ def query_plan(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan Returns: - List[QueryPlanEntry]: + List[google.cloud.bigquery.job.QueryPlanEntry]: mappings describing the query plan, or an empty list if the query has not yet completed. """ @@ -3418,7 +3418,6 @@ class QueryPlanEntryStep(object): Args: kind (str): step type. - substeps (List): names of substeps. """ @@ -3434,7 +3433,8 @@ def from_api_repr(cls, resource): resource (Dict): JSON representation of the entry. Returns: - QueryPlanEntryStep: new instance built from the resource. + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. """ return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) @@ -3464,7 +3464,7 @@ def from_api_repr(cls, resource): ExplainQueryStage representation returned from API. Returns: - google.cloud.bigquery.QueryPlanEntry: + google.cloud.bigquery.job.QueryPlanEntry: Query plan entry parsed from ``resource``. """ entry = cls() From e86aafe0258e45d2e9baa0fff9c47594db878a55 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 18:56:58 +0100 Subject: [PATCH 035/341] chore(deps): update dependency google-auth-oauthlib to v0.4.2 (#349) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 544e92eb1..fab797494 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 -google-auth-oauthlib==0.4.1 +google-auth-oauthlib==0.4.2 grpcio==1.33.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From d2ffc22013ca776bfa99d046b0419a9666c9d18e Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:16:06 +0100 Subject: [PATCH 036/341] chore(deps): update dependency grpcio to v1.33.2 (#350) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [grpcio](https://grpc.io) | patch | `==1.33.1` -> `==1.33.2` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fab797494..3bcab1ace 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.1.0 google-cloud-bigquery-storage==2.0.0 google-auth-oauthlib==0.4.2 -grpcio==1.33.1 +grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 From e8be4898d70303cc4dfdf952114bb7adef46e39a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:36:05 +0100 Subject: [PATCH 037/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.0.1 (#337) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | patch | `==2.0.0` -> `==2.0.1` | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.0.1`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​201-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev200v201-2020-10-21) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.0.0...v2.0.1)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3bcab1ace..411a86dae 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.1.0 -google-cloud-bigquery-storage==2.0.0 +google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' From 5a925ec5b511a19aca1fc7640e54c55586078403 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Oct 2020 19:48:03 +0100 Subject: [PATCH 038/341] chore(deps): update dependency google-cloud-bigquery to v2.2.0 (#333) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.1.0` -> `==2.2.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.2.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​220-httpswwwgithubcomgoogleapispython-bigquerycomparev210v220-2020-10-19) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) ##### Features - add method api_repr for table list item ([#​299](https://www.github.com/googleapis/python-bigquery/issues/299)) ([07c70f0](https://www.github.com/googleapis/python-bigquery/commit/07c70f0292f9212f0c968cd5c9206e8b0409c0da)) - add support for listing arima, automl, boosted tree, DNN, and matrix factorization models ([#​328](https://www.github.com/googleapis/python-bigquery/issues/328)) ([502a092](https://www.github.com/googleapis/python-bigquery/commit/502a0926018abf058cb84bd18043c25eba15a2cc)) - add timeout paramter to load_table_from_file and it dependent methods ([#​327](https://www.github.com/googleapis/python-bigquery/issues/327)) ([b0dd892](https://www.github.com/googleapis/python-bigquery/commit/b0dd892176e31ac25fddd15554b5bfa054299d4d)) - add to_api_repr method to Model ([#​326](https://www.github.com/googleapis/python-bigquery/issues/326)) ([fb401bd](https://www.github.com/googleapis/python-bigquery/commit/fb401bd94477323bba68cf252dd88166495daf54)) - allow client options to be set in magics context ([#​322](https://www.github.com/googleapis/python-bigquery/issues/322)) ([5178b55](https://www.github.com/googleapis/python-bigquery/commit/5178b55682f5e264bfc082cde26acb1fdc953a18)) ##### Bug Fixes - make TimePartitioning repr evaluable ([#​110](https://www.github.com/googleapis/python-bigquery/issues/110)) ([20f473b](https://www.github.com/googleapis/python-bigquery/commit/20f473bfff5ae98377f5d9cdf18bfe5554d86ff4)), closes [#​109](https://www.github.com/googleapis/python-bigquery/issues/109) - use version.py instead of pkg_resources.get_distribution ([#​307](https://www.github.com/googleapis/python-bigquery/issues/307)) ([b8f502b](https://www.github.com/googleapis/python-bigquery/commit/b8f502b14f21d1815697e4d57cf1225dfb4a7c5e)) ##### Performance Improvements - add size parameter for load table from dataframe and json methods ([#​280](https://www.github.com/googleapis/python-bigquery/issues/280)) ([3be78b7](https://www.github.com/googleapis/python-bigquery/commit/3be78b737add7111e24e912cd02fc6df75a07de6)) ##### Documentation - update clustering field docstrings ([#​286](https://www.github.com/googleapis/python-bigquery/issues/286)) ([5ea1ece](https://www.github.com/googleapis/python-bigquery/commit/5ea1ece2d911cdd1f3d9549ee01559ce8ed8269a)), closes [#​285](https://www.github.com/googleapis/python-bigquery/issues/285) - update snippets samples to support version 2.0 ([#​309](https://www.github.com/googleapis/python-bigquery/issues/309)) ([61634be](https://www.github.com/googleapis/python-bigquery/commit/61634be9bf9e3df7589fc1bfdbda87288859bb13)) ##### Dependencies - add protobuf dependency ([#​306](https://www.github.com/googleapis/python-bigquery/issues/306)) ([cebb5e0](https://www.github.com/googleapis/python-bigquery/commit/cebb5e0e911e8c9059bc8c9e7fce4440e518bff3)), closes [#​305](https://www.github.com/googleapis/python-bigquery/issues/305) - require pyarrow for pandas support ([#​314](https://www.github.com/googleapis/python-bigquery/issues/314)) ([801e4c0](https://www.github.com/googleapis/python-bigquery/commit/801e4c0574b7e421aa3a28cafec6fd6bcce940dd)), closes [#​265](https://www.github.com/googleapis/python-bigquery/issues/265)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 411a86dae..c5ab3ef3d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.1.0 +google-cloud-bigquery==2.2.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From e51fd45fdb0481ac5d59cc0edbfa0750928b2596 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Oct 2020 16:32:02 -0500 Subject: [PATCH 039/341] feat: add `reload` argument to `*Job.done()` functions (#341) This enables checking the job status without making an API call. It also fixes an inconsistency in `QueryJob`, where a job can be reported as "done" without having the results of a `getQueryResults` API call. Follow-up to https://github.com/googleapis/python-bigquery/pull/340 --- google/cloud/bigquery/job.py | 67 +++++++------- tests/unit/test_job.py | 101 +++++++++++++++++--- tests/unit/test_magics.py | 173 +++++++++++++++++++---------------- 3 files changed, 218 insertions(+), 123 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 977d7a559..204c5f774 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -767,7 +767,7 @@ def _set_future_result(self): # set, do not call set_result/set_exception again. # Note: self._result_set is set to True in set_result and # set_exception, in case those methods are invoked directly. - if self.state != _DONE_STATE or self._result_set: + if not self.done(reload=False) or self._result_set: return if self.error_result is not None: @@ -776,21 +776,24 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None): - """Refresh the job and checks if it is complete. + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. Args: retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ # Do not refresh is the state is already done, as the job will not # change once complete. - if self.state != _DONE_STATE: + if self.state != _DONE_STATE and reload: self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE @@ -3073,7 +3076,7 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None): + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Refresh the job and checks if it is complete. Args: @@ -3082,10 +3085,25 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. Returns: bool: True if the job is complete, False otherwise. """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + # Since the API to getQueryResults can hang up to the timeout value # (default of 10 seconds), set the timeout parameter to ensure that # the timeout from the futures API is respected. See: @@ -3103,23 +3121,20 @@ def done(self, retry=DEFAULT_RETRY, timeout=None): # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout - # Do not refresh if the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE @@ -3231,16 +3246,6 @@ def result( """ try: super(QueryJob, self).result(retry=retry, timeout=timeout) - - # Return an iterator instead of returning the job. - if not self._query_results: - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - location=self.location, - timeout=timeout, - ) except exceptions.GoogleCloudError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index f577b08bd..2d1e8fec8 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -45,6 +45,8 @@ except (ImportError, AttributeError): # pragma: NO COVER tqdm = None +import google.cloud.bigquery.query + def _make_credentials(): import google.auth.credentials @@ -3942,10 +3944,6 @@ def _make_resource(self, started=False, ended=False): resource = super(TestQueryJob, self)._make_resource(started, ended) config = resource["configuration"]["query"] config["query"] = self.QUERY - - if ended: - resource["status"] = {"state": "DONE"} - return resource def _verifyBooleanResourceProperties(self, job, config): @@ -4211,6 +4209,9 @@ def test_done(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) self.assertTrue(job.done()) def test_done_w_timeout(self): @@ -4668,28 +4669,39 @@ def test_result(self): from google.cloud.bigquery.table import RowIterator query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } tabledata_resource = { - # Explicitly set totalRows to be different from the query response. - # to test update during iteration. + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) result = job.result() self.assertIsInstance(result, RowIterator) self.assertEqual(result.total_rows, 2) - rows = list(result) self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") @@ -4697,6 +4709,70 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -4938,6 +5014,9 @@ def test_result_error(self): "errors": [error_result], "state": "DONE", } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) job._set_future_result() with self.assertRaises(exceptions.GoogleCloudError) as exc_info: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 30ca4d70c..b2877845a 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -19,7 +19,6 @@ import mock import pytest -import six try: import pandas @@ -101,27 +100,38 @@ def fail_if(name, globals, locals, fromlist, level): return maybe_fail_import(predicate=fail_if) -JOB_REFERENCE_RESOURCE = {"projectId": "its-a-project-eh", "jobId": "some-random-id"} +PROJECT_ID = "its-a-project-eh" +JOB_ID = "some-random-id" +JOB_REFERENCE_RESOURCE = {"projectId": PROJECT_ID, "jobId": JOB_ID} +DATASET_ID = "dest_dataset" +TABLE_ID = "dest_table" TABLE_REFERENCE_RESOURCE = { - "projectId": "its-a-project-eh", - "datasetId": "ds", - "tableId": "persons", + "projectId": PROJECT_ID, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, } +QUERY_STRING = "SELECT 42 AS the_answer FROM `life.the_universe.and_everything`;" QUERY_RESOURCE = { "jobReference": JOB_REFERENCE_RESOURCE, "configuration": { "query": { "destinationTable": TABLE_REFERENCE_RESOURCE, - "query": "SELECT 42 FROM `life.the_universe.and_everything`;", + "query": QUERY_STRING, "queryParameters": [], "useLegacySql": False, } }, "status": {"state": "DONE"}, } +QUERY_RESULTS_RESOURCE = { + "jobReference": JOB_REFERENCE_RESOURCE, + "totalRows": 1, + "jobComplete": True, + "schema": {"fields": [{"name": "the_answer", "type": "INTEGER"}]}, +} -def test_context_credentials_auto_set_w_application_default_credentials(): +def test_context_with_default_credentials(): """When Application Default Credentials are set, the context credentials will be created the first time it is called """ @@ -142,6 +152,50 @@ def test_context_credentials_auto_set_w_application_default_credentials(): assert default_mock.call_count == 2 +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_context_with_default_connection(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._credentials = None + magics.context._project = None + magics.context._connection = None + + default_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") + ) + default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) + conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) + list_rows_patch = mock.patch( + "google.cloud.bigquery.client.Client.list_rows", + return_value=google.cloud.bigquery.table._EmptyRowIterator(), + ) + + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + + # Check that query actually starts the job. + conn.assert_called() + list_rows.assert_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + default_conn.api_request.assert_has_calls([begin_call, query_results_call]) + + def test_context_credentials_and_project_can_be_set_explicitly(): project1 = "one-project-55564" project2 = "other-project-52569" @@ -163,93 +217,47 @@ def test_context_credentials_and_project_can_be_set_explicitly(): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_connection_can_be_overriden(): +def test_context_with_custom_connection(): ip = IPython.get_ipython() ip.extension_manager.load_extension("google.cloud.bigquery") magics.context._project = None magics.context._credentials = None - - credentials_mock = mock.create_autospec( - google.auth.credentials.Credentials, instance=True - ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) - ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn = magics.context._connection = make_connection(resource, data) - list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", - return_value=google.cloud.bigquery.table._EmptyRowIterator(), + context_conn = magics.context._connection = make_connection( + QUERY_RESOURCE, QUERY_RESULTS_RESOURCE ) - with list_rows_patch as list_rows, default_patch: - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. - list_rows.assert_called() - assert len(conn.api_request.call_args_list) == 2 - _, req = conn.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query - - -@pytest.mark.usefixtures("ipython_interactive") -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_context_no_connection(): - ip = IPython.get_ipython() - ip.extension_manager.load_extension("google.cloud.bigquery") - magics.context._project = None - magics.context._credentials = None - magics.context._connection = None - - credentials_mock = mock.create_autospec( + default_credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - project = "project-123" - default_patch = mock.patch( - "google.auth.default", return_value=(credentials_mock, project) + credentials_patch = mock.patch( + "google.auth.default", return_value=(default_credentials, "project-from-env") ) - job_reference = copy.deepcopy(JOB_REFERENCE_RESOURCE) - job_reference["projectId"] = project - - query = "select * from persons" - resource = copy.deepcopy(QUERY_RESOURCE) - resource["jobReference"] = job_reference - resource["configuration"]["query"]["query"] = query - data = {"jobReference": job_reference, "totalRows": 0, "rows": []} - - conn_mock = make_connection(resource, data, data, data) + default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) - with conn_patch as conn, list_rows_patch as list_rows, default_patch: - conn.return_value = conn_mock - ip.run_cell_magic("bigquery", "", query) - # Check that query actually starts the job. + with conn_patch as conn, credentials_patch, list_rows_patch as list_rows: + conn.return_value = default_conn + ip.run_cell_magic("bigquery", "", QUERY_STRING) + list_rows.assert_called() - assert len(conn_mock.api_request.call_args_list) == 2 - _, req = conn_mock.api_request.call_args_list[0] - assert req["method"] == "POST" - assert req["path"] == "/projects/{}/jobs".format(project) - sent = req["data"] - assert isinstance(sent["jobReference"]["jobId"], six.string_types) - sent_config = sent["configuration"]["query"] - assert sent_config["query"] == query + default_conn.api_request.assert_not_called() + begin_call = mock.call( + method="POST", + path="/projects/project-from-env/jobs", + data=mock.ANY, + timeout=None, + ) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT_ID}/queries/{JOB_ID}", + query_params=mock.ANY, + timeout=mock.ANY, + ) + context_conn.api_request.assert_has_calls([begin_call, query_results_call]) def test__run_query(): @@ -1060,6 +1068,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1067,7 +1076,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1098,6 +1107,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1105,7 +1115,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), @@ -1136,6 +1146,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): resource = copy.deepcopy(QUERY_RESOURCE) resource["jobReference"] = job_reference resource["configuration"]["query"]["query"] = query + query_results = {"jobReference": job_reference, "totalRows": 0, "jobComplete": True} data = {"jobReference": job_reference, "totalRows": 0, "rows": []} credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True @@ -1143,7 +1154,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): default_patch = mock.patch( "google.auth.default", return_value=(credentials_mock, "general-project") ) - conn = magics.context._connection = make_connection(resource, data) + conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( "google.cloud.bigquery.client.Client.list_rows", return_value=google.cloud.bigquery.table._EmptyRowIterator(), From 5dd1a5e77f13b8e576e917069e247c5390a81900 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Thu, 29 Oct 2020 10:16:07 -0400 Subject: [PATCH 040/341] fix(dbapi): avoid running % format with no query parameters (#348) * fix: aviod running %format when no query params * fix: nit * fix: change in unit test --- google/cloud/bigquery/dbapi/cursor.py | 2 +- tests/unit/test_dbapi_cursor.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 63264e9ab..597313fd6 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -441,7 +441,7 @@ def _format_operation(operation, parameters=None): if a parameter used in the operation is not found in the ``parameters`` argument. """ - if parameters is None: + if parameters is None or len(parameters) == 0: return operation if isinstance(parameters, collections_abc.Mapping): diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 9a1a6b1e8..5c3bfcae9 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -601,3 +601,9 @@ def test__format_operation_w_too_short_sequence(self): "SELECT %s, %s;", ("hello",), ) + + def test__format_operation_w_empty_dict(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%f'", {}) + self.assertEqual(formatted_operation, "SELECT '%f'") From 8a8080ba04647291907e61eea5f21f649fadadb3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 31 Oct 2020 00:28:57 +0100 Subject: [PATCH 041/341] chore(deps): update dependency pandas to v1.1.4 (#355) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c5ab3ef3d..bf895a1ae 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -5,6 +5,6 @@ grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.2 -pandas==1.1.3 +pandas==1.1.4 pyarrow==1.0.1 pytz==2020.1 From 284e17a17adf6844a17db2c6fed54a649b1f997e Mon Sep 17 00:00:00 2001 From: Ilya Gurov Date: Tue, 3 Nov 2020 18:43:15 +0300 Subject: [PATCH 042/341] feat: pass retry from Job.result() to Job.done() (#41) * feat(bigquery): pass retry from Job.result() to Job.done(). * fix merge conflicts * drop the comment * use kwargs sentinel * check the mock retry * update dependencies * use kwargs pattern * feat: added unit test for retry * feat: added more exceptions Co-authored-by: Tim Swast Co-authored-by: HemangChothani --- google/cloud/bigquery/job.py | 16 ++++----- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/test_job.py | 67 ++++++++++++++++++++++++++++++++---- 4 files changed, 69 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py index 204c5f774..e2e7e839a 100644 --- a/google/cloud/bigquery/job.py +++ b/google/cloud/bigquery/job.py @@ -819,8 +819,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """ if self.state is None: self._begin(retry=retry, timeout=timeout) - # TODO: modify PollingFuture so it can pass a retry argument to done(). - return super(_AsyncJob, self).result(timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) def cancelled(self): """Check if the job has been cancelled. @@ -1845,7 +1846,7 @@ def destination(self): """ return TableReference.from_api_repr( _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"], + self._properties, ["configuration", "copy", "destinationTable"] ) ) @@ -2043,10 +2044,7 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None): self._configuration = job_config if source: - source_ref = { - "projectId": source.project, - "datasetId": source.dataset_id, - } + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} if isinstance(source, (Table, TableListItem, TableReference)): source_ref["tableId"] = source.table_id @@ -3138,10 +3136,10 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None): + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout) + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) @staticmethod def _format_for_exception(query, job_id): diff --git a/setup.py b/setup.py index c7410601e..548ceac09 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.22.2, < 2.0.0dev", + "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index cea0ed84e..91a507a5c 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -1,4 +1,4 @@ -google-api-core==1.22.2 +google-api-core==1.23.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py index 2d1e8fec8..8590e0576 100644 --- a/tests/unit/test_job.py +++ b/tests/unit/test_job.py @@ -864,7 +864,7 @@ def test_cancel_w_custom_retry(self): job = self._set_properties_job() api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response], + job._client._connection, "api_request", side_effect=[ValueError, response] ) retry = DEFAULT_RETRY.with_deadline(1).with_predicate( lambda exc: isinstance(exc, ValueError) @@ -885,7 +885,7 @@ def test_cancel_w_custom_retry(self): [ mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5, + method="POST", path=api_path, query_params={}, timeout=7.5 ), # was retried once ], ) @@ -1034,7 +1034,6 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( @@ -2757,7 +2756,7 @@ def test_cancel_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -2779,7 +2778,7 @@ def test_cancel_w_alternate_client(self): conn1.api_request.assert_not_called() conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None, + method="POST", path=PATH, query_params={}, timeout=None ) self._verifyResourceProperties(job, RESOURCE) @@ -3205,7 +3204,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -3620,7 +3619,7 @@ def test_exists_miss_w_bound_client(self): final_attributes.assert_called_with({"path": PATH}, client, job) conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None, + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None ) def test_exists_hit_w_alternate_client(self): @@ -4812,6 +4811,60 @@ def test_result_with_max_results(self): tabledata_list_request[1]["query_params"]["maxResults"], max_results ) + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator From 4f326b1ca4411cfbf5ded86955a963d3e05a409f Mon Sep 17 00:00:00 2001 From: Rickard von Essen Date: Tue, 3 Nov 2020 16:58:03 +0100 Subject: [PATCH 043/341] fix: add missing spaces in opentelemetry log message (#360) Currently this log message renders like this: ``` This service is instrumented using OpenTelemetry.OpenTelemetry could not be imported; pleaseadd opentelemetry-api and opentelemetry-instrumentationpackages in order to get BigQuery Tracing data. ``` where it should be ``` This service is instrumented using OpenTelemetry. OpenTelemetry could not be imported; please add opentelemetry-api and opentelemetry-instrumentation packages in order to get BigQuery Tracing data." ``` --- google/cloud/bigquery/opentelemetry_tracing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index f7375c346..b9d18efad 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -26,9 +26,9 @@ except ImportError: logger.info( - "This service is instrumented using OpenTelemetry." - "OpenTelemetry could not be imported; please" - "add opentelemetry-api and opentelemetry-instrumentation" + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry could not be imported; please " + "add opentelemetry-api and opentelemetry-instrumentation " "packages in order to get BigQuery Tracing data." ) From 2849e569d0423e6e40bda953b0e9d38157aaf2df Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 09:09:35 -0600 Subject: [PATCH 044/341] refactor: break job into multiple modules (#361) Original paths are retained for backwards compatibility. --- google/cloud/bigquery/enums.py | 139 + google/cloud/bigquery/job.py | 3846 --------------- google/cloud/bigquery/job/__init__.py | 77 + google/cloud/bigquery/job/base.py | 912 ++++ google/cloud/bigquery/job/copy_.py | 223 + google/cloud/bigquery/job/extract.py | 266 + google/cloud/bigquery/job/load.py | 758 +++ google/cloud/bigquery/job/query.py | 1644 +++++++ tests/unit/job/__init__.py | 13 + tests/unit/job/helpers.py | 198 + tests/unit/job/test_base.py | 1105 +++++ tests/unit/job/test_copy.py | 477 ++ tests/unit/job/test_extract.py | 437 ++ tests/unit/job/test_load.py | 838 ++++ tests/unit/job/test_load_config.py | 710 +++ tests/unit/job/test_query.py | 1811 +++++++ tests/unit/job/test_query_config.py | 255 + tests/unit/job/test_query_pandas.py | 450 ++ tests/unit/job/test_query_stats.py | 356 ++ tests/unit/test_job.py | 6448 ------------------------- 20 files changed, 10669 insertions(+), 10294 deletions(-) delete mode 100644 google/cloud/bigquery/job.py create mode 100644 google/cloud/bigquery/job/__init__.py create mode 100644 google/cloud/bigquery/job/base.py create mode 100644 google/cloud/bigquery/job/copy_.py create mode 100644 google/cloud/bigquery/job/extract.py create mode 100644 google/cloud/bigquery/job/load.py create mode 100644 google/cloud/bigquery/job/query.py create mode 100644 tests/unit/job/__init__.py create mode 100644 tests/unit/job/helpers.py create mode 100644 tests/unit/job/test_base.py create mode 100644 tests/unit/job/test_copy.py create mode 100644 tests/unit/job/test_extract.py create mode 100644 tests/unit/job/test_load.py create mode 100644 tests/unit/job/test_load_config.py create mode 100644 tests/unit/job/test_query.py create mode 100644 tests/unit/job/test_query_config.py create mode 100644 tests/unit/job/test_query_pandas.py create mode 100644 tests/unit/job/test_query_stats.py delete mode 100644 tests/unit/test_job.py diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index eb33e4276..3f72333af 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -20,6 +20,124 @@ from google.cloud.bigquery_v2 import types as gapic_types +class Compression(object): + """The compression type to use for exported files. The default value is + :attr:`NONE`. + + :attr:`DEFLATE` and :attr:`SNAPPY` are + only supported for Avro. + """ + + GZIP = "GZIP" + """Specifies GZIP format.""" + + DEFLATE = "DEFLATE" + """Specifies DEFLATE format.""" + + SNAPPY = "SNAPPY" + """Specifies SNAPPY format.""" + + NONE = "NONE" + """Specifies no compression.""" + + +class CreateDisposition(object): + """Specifies whether the job is allowed to create new tables. The default + value is :attr:`CREATE_IF_NEEDED`. + + Creation, truncation and append actions occur as one atomic update + upon job completion. + """ + + CREATE_IF_NEEDED = "CREATE_IF_NEEDED" + """If the table does not exist, BigQuery creates the table.""" + + CREATE_NEVER = "CREATE_NEVER" + """The table must already exist. If it does not, a 'notFound' error is + returned in the job result.""" + + +class DestinationFormat(object): + """The exported file format. The default value is :attr:`CSV`. + + Tables with nested or repeated fields cannot be exported as CSV. + """ + + CSV = "CSV" + """Specifies CSV format.""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + +class Encoding(object): + """The character encoding of the data. The default is :attr:`UTF_8`. + + BigQuery decodes the data after the raw, binary data has been + split using the values of the quote and fieldDelimiter properties. + """ + + UTF_8 = "UTF-8" + """Specifies UTF-8 encoding.""" + + ISO_8859_1 = "ISO-8859-1" + """Specifies ISO-8859-1 encoding.""" + + +class QueryPriority(object): + """Specifies a priority for the query. The default value is + :attr:`INTERACTIVE`. + """ + + INTERACTIVE = "INTERACTIVE" + """Specifies interactive priority.""" + + BATCH = "BATCH" + """Specifies batch priority.""" + + +class SchemaUpdateOption(object): + """Specifies an update to the destination table schema as a side effect of + a load job. + """ + + ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" + """Allow adding a nullable field to the schema.""" + + ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" + """Allow relaxing a required field in the original schema to nullable.""" + + +class SourceFormat(object): + """The format of the data files. The default value is :attr:`CSV`. + + Note that the set of allowed values for loading data is different + than the set used for external data sources (see + :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). + """ + + CSV = "CSV" + """Specifies CSV format.""" + + DATASTORE_BACKUP = "DATASTORE_BACKUP" + """Specifies datastore backup format""" + + NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" + """Specifies newline delimited JSON format.""" + + AVRO = "AVRO" + """Specifies Avro format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + + ORC = "ORC" + """Specifies Orc format.""" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", @@ -92,3 +210,24 @@ class SqlTypeNames(str, enum.Enum): DATE = "DATE" TIME = "TIME" DATETIME = "DATETIME" + + +class WriteDisposition(object): + """Specifies the action that occurs if destination table already exists. + + The default value is :attr:`WRITE_APPEND`. + + Each action is atomic and only occurs if BigQuery is able to complete + the job successfully. Creation, truncation and append actions occur as one + atomic update upon job completion. + """ + + WRITE_APPEND = "WRITE_APPEND" + """If the table already exists, BigQuery appends the data to the table.""" + + WRITE_TRUNCATE = "WRITE_TRUNCATE" + """If the table already exists, BigQuery overwrites the table data.""" + + WRITE_EMPTY = "WRITE_EMPTY" + """If the table already exists and contains data, a 'duplicate' error is + returned in the job result.""" diff --git a/google/cloud/bigquery/job.py b/google/cloud/bigquery/job.py deleted file mode 100644 index e2e7e839a..000000000 --- a/google/cloud/bigquery/job.py +++ /dev/null @@ -1,3846 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Define API Jobs.""" - -from __future__ import division - -import concurrent.futures -import copy -import re -import threading - -import requests -import six -from six.moves import http_client - -import google.api_core.future.polling -from google.cloud import exceptions -from google.cloud.exceptions import NotFound -from google.cloud.bigquery.dataset import Dataset -from google.cloud.bigquery.dataset import DatasetListItem -from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration -from google.cloud.bigquery.external_config import ExternalConfig -from google.cloud.bigquery.external_config import HivePartitioningOptions -from google.cloud.bigquery import _helpers -from google.cloud.bigquery.model import ModelReference -from google.cloud.bigquery.query import _query_param_from_api_repr -from google.cloud.bigquery.query import ArrayQueryParameter -from google.cloud.bigquery.query import ScalarQueryParameter -from google.cloud.bigquery.query import StructQueryParameter -from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY -from google.cloud.bigquery.routine import RoutineReference -from google.cloud.bigquery.schema import SchemaField -from google.cloud.bigquery.schema import _to_schema_fields -from google.cloud.bigquery.table import _EmptyRowIterator -from google.cloud.bigquery.table import RangePartitioning -from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table -from google.cloud.bigquery.table import TableListItem -from google.cloud.bigquery.table import TableReference -from google.cloud.bigquery.table import TimePartitioning - -_DONE_STATE = "DONE" -_STOPPED_REASON = "stopped" -_TIMEOUT_BUFFER_SECS = 0.1 -_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) - -_ERROR_REASON_TO_EXCEPTION = { - "accessDenied": http_client.FORBIDDEN, - "backendError": http_client.INTERNAL_SERVER_ERROR, - "billingNotEnabled": http_client.FORBIDDEN, - "billingTierLimitExceeded": http_client.BAD_REQUEST, - "blocked": http_client.FORBIDDEN, - "duplicate": http_client.CONFLICT, - "internalError": http_client.INTERNAL_SERVER_ERROR, - "invalid": http_client.BAD_REQUEST, - "invalidQuery": http_client.BAD_REQUEST, - "notFound": http_client.NOT_FOUND, - "notImplemented": http_client.NOT_IMPLEMENTED, - "quotaExceeded": http_client.FORBIDDEN, - "rateLimitExceeded": http_client.FORBIDDEN, - "resourceInUse": http_client.BAD_REQUEST, - "resourcesExceeded": http_client.BAD_REQUEST, - "responseTooLarge": http_client.FORBIDDEN, - "stopped": http_client.OK, - "tableUnavailable": http_client.BAD_REQUEST, -} - - -def _error_result_to_exception(error_result): - """Maps BigQuery error reasons to an exception. - - The reasons and their matching HTTP status codes are documented on - the `troubleshooting errors`_ page. - - .. _troubleshooting errors: https://cloud.google.com/bigquery\ - /troubleshooting-errors - - Args: - error_result (Mapping[str, str]): The error result from BigQuery. - - Returns: - google.cloud.exceptions.GoogleCloudError: The mapped exception. - """ - reason = error_result.get("reason") - status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR - ) - return exceptions.from_http_status( - status_code, error_result.get("message", ""), errors=[error_result] - ) - - -def _contains_order_by(query): - """Do we need to preserve the order of the query results? - - This function has known false positives, such as with ordered window - functions: - - .. code-block:: sql - - SELECT SUM(x) OVER ( - window_name - PARTITION BY... - ORDER BY... - window_frame_clause) - FROM ... - - This false positive failure case means the behavior will be correct, but - downloading results with the BigQuery Storage API may be slower than it - otherwise would. This is preferable to the false negative case, where - results are expected to be in order but are not (due to parallel reads). - """ - return query and _CONTAINS_ORDER_BY.search(query) - - -class Compression(object): - """The compression type to use for exported files. The default value is - :attr:`NONE`. - - :attr:`DEFLATE` and :attr:`SNAPPY` are - only supported for Avro. - """ - - GZIP = "GZIP" - """Specifies GZIP format.""" - - DEFLATE = "DEFLATE" - """Specifies DEFLATE format.""" - - SNAPPY = "SNAPPY" - """Specifies SNAPPY format.""" - - NONE = "NONE" - """Specifies no compression.""" - - -class CreateDisposition(object): - """Specifies whether the job is allowed to create new tables. The default - value is :attr:`CREATE_IF_NEEDED`. - - Creation, truncation and append actions occur as one atomic update - upon job completion. - """ - - CREATE_IF_NEEDED = "CREATE_IF_NEEDED" - """If the table does not exist, BigQuery creates the table.""" - - CREATE_NEVER = "CREATE_NEVER" - """The table must already exist. If it does not, a 'notFound' error is - returned in the job result.""" - - -class DestinationFormat(object): - """The exported file format. The default value is :attr:`CSV`. - - Tables with nested or repeated fields cannot be exported as CSV. - """ - - CSV = "CSV" - """Specifies CSV format.""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - -class Encoding(object): - """The character encoding of the data. The default is :attr:`UTF_8`. - - BigQuery decodes the data after the raw, binary data has been - split using the values of the quote and fieldDelimiter properties. - """ - - UTF_8 = "UTF-8" - """Specifies UTF-8 encoding.""" - - ISO_8859_1 = "ISO-8859-1" - """Specifies ISO-8859-1 encoding.""" - - -class QueryPriority(object): - """Specifies a priority for the query. The default value is - :attr:`INTERACTIVE`. - """ - - INTERACTIVE = "INTERACTIVE" - """Specifies interactive priority.""" - - BATCH = "BATCH" - """Specifies batch priority.""" - - -class SourceFormat(object): - """The format of the data files. The default value is :attr:`CSV`. - - Note that the set of allowed values for loading data is different - than the set used for external data sources (see - :class:`~google.cloud.bigquery.external_config.ExternalSourceFormat`). - """ - - CSV = "CSV" - """Specifies CSV format.""" - - DATASTORE_BACKUP = "DATASTORE_BACKUP" - """Specifies datastore backup format""" - - NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" - """Specifies newline delimited JSON format.""" - - AVRO = "AVRO" - """Specifies Avro format.""" - - PARQUET = "PARQUET" - """Specifies Parquet format.""" - - ORC = "ORC" - """Specifies Orc format.""" - - -class WriteDisposition(object): - """Specifies the action that occurs if destination table already exists. - - The default value is :attr:`WRITE_APPEND`. - - Each action is atomic and only occurs if BigQuery is able to complete - the job successfully. Creation, truncation and append actions occur as one - atomic update upon job completion. - """ - - WRITE_APPEND = "WRITE_APPEND" - """If the table already exists, BigQuery appends the data to the table.""" - - WRITE_TRUNCATE = "WRITE_TRUNCATE" - """If the table already exists, BigQuery overwrites the table data.""" - - WRITE_EMPTY = "WRITE_EMPTY" - """If the table already exists and contains data, a 'duplicate' error is - returned in the job result.""" - - -class SchemaUpdateOption(object): - """Specifies an update to the destination table schema as a side effect of - a load job. - """ - - ALLOW_FIELD_ADDITION = "ALLOW_FIELD_ADDITION" - """Allow adding a nullable field to the schema.""" - - ALLOW_FIELD_RELAXATION = "ALLOW_FIELD_RELAXATION" - """Allow relaxing a required field in the original schema to nullable.""" - - -class _JobReference(object): - """A reference to a job. - - Args: - job_id (str): ID of the job to run. - project (str): ID of the project where the job runs. - location (str): Location of where the job runs. - """ - - def __init__(self, job_id, project, location): - self._properties = {"jobId": job_id, "projectId": project} - # The location field must not be populated if it is None. - if location: - self._properties["location"] = location - - @property - def job_id(self): - """str: ID of the job.""" - return self._properties.get("jobId") - - @property - def project(self): - """str: ID of the project where the job runs.""" - return self._properties.get("projectId") - - @property - def location(self): - """str: Location where the job runs.""" - return self._properties.get("location") - - def _to_api_repr(self): - """Returns the API resource representation of the job reference.""" - return copy.deepcopy(self._properties) - - @classmethod - def _from_api_repr(cls, resource): - """Returns a job reference for an API resource representation.""" - job_id = resource.get("jobId") - project = resource.get("projectId") - location = resource.get("location") - job_ref = cls(job_id, project, location) - return job_ref - - -class _AsyncJob(google.api_core.future.polling.PollingFuture): - """Base class for asynchronous jobs. - - Args: - job_id (Union[str, _JobReference]): - Job's ID in the project associated with the client or a - fully-qualified job reference. - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project configuration. - """ - - def __init__(self, job_id, client): - super(_AsyncJob, self).__init__() - - # The job reference can be either a plain job ID or the full resource. - # Populate the properties dictionary consistently depending on what has - # been passed in. - job_ref = job_id - if not isinstance(job_id, _JobReference): - job_ref = _JobReference(job_id, client.project, None) - self._properties = {"jobReference": job_ref._to_api_repr()} - - self._client = client - self._result_set = False - self._completion_lock = threading.Lock() - - @property - def job_id(self): - """str: ID of the job.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) - - @property - def parent_job_id(self): - """Return the ID of the parent job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id - - Returns: - Optional[str]: parent job id. - """ - return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) - - @property - def script_statistics(self): - resource = _helpers._get_sub_prop( - self._properties, ["statistics", "scriptStatistics"] - ) - if resource is None: - return None - return ScriptStatistics(resource) - - @property - def num_child_jobs(self): - """The number of child jobs executed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs - - Returns: - int - """ - count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) - return int(count) if count is not None else 0 - - @property - def project(self): - """Project bound to the job. - - Returns: - str: the project (derived from the client). - """ - return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) - - @property - def location(self): - """str: Location where the job runs.""" - return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) - - def _require_client(self, client): - """Check client or verify over-ride. - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - Returns: - google.cloud.bigquery.client.Client: - The client passed in or the currently bound client. - """ - if client is None: - client = self._client - return client - - @property - def job_type(self): - """Type of job. - - Returns: - str: one of 'load', 'copy', 'extract', 'query'. - """ - return self._JOB_TYPE - - @property - def path(self): - """URL path for the job's APIs. - - Returns: - str: the path based on project and job ID. - """ - return "/projects/%s/jobs/%s" % (self.project, self.job_id) - - @property - def labels(self): - """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault("labels", {}) - - @property - def etag(self): - """ETag for the job resource. - - Returns: - Optional[str]: the ETag (None until set from the server). - """ - return self._properties.get("etag") - - @property - def self_link(self): - """URL for the job resource. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("selfLink") - - @property - def user_email(self): - """E-mail address of user who submitted the job. - - Returns: - Optional[str]: the URL (None until set from the server). - """ - return self._properties.get("user_email") - - @property - def created(self): - """Datetime at which the job was created. - - Returns: - Optional[datetime.datetime]: - the creation time (None until set from the server). - """ - millis = _helpers._get_sub_prop( - self._properties, ["statistics", "creationTime"] - ) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def started(self): - """Datetime at which the job was started. - - Returns: - Optional[datetime.datetime]: - the start time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - @property - def ended(self): - """Datetime at which the job finished. - - Returns: - Optional[datetime.datetime]: - the end time (None until set from the server). - """ - millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) - if millis is not None: - return _helpers._datetime_from_microseconds(millis * 1000.0) - - def _job_statistics(self): - """Helper for job-type specific statistics-based properties.""" - statistics = self._properties.get("statistics", {}) - return statistics.get(self._JOB_TYPE, {}) - - @property - def error_result(self): - """Error information about the job as a whole. - - Returns: - Optional[Mapping]: the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errorResult") - - @property - def errors(self): - """Information about individual errors generated by the job. - - Returns: - Optional[List[Mapping]]: - the error information (None until set from the server). - """ - status = self._properties.get("status") - if status is not None: - return status.get("errors") - - @property - def state(self): - """Status of the job. - - Returns: - Optional[str]: - the state (None until set from the server). - """ - status = self._properties.get("status", {}) - return status.get("state") - - def _set_properties(self, api_response): - """Update properties from resource in body of ``api_response`` - - Args: - api_response (Dict): response returned from an API call. - """ - cleaned = api_response.copy() - - statistics = cleaned.get("statistics", {}) - if "creationTime" in statistics: - statistics["creationTime"] = float(statistics["creationTime"]) - if "startTime" in statistics: - statistics["startTime"] = float(statistics["startTime"]) - if "endTime" in statistics: - statistics["endTime"] = float(statistics["endTime"]) - - # Save configuration to keep reference same in self._configuration. - cleaned_config = cleaned.pop("configuration", {}) - configuration = self._properties.pop("configuration", {}) - self._properties.clear() - self._properties.update(cleaned) - self._properties["configuration"] = configuration - self._properties["configuration"].update(cleaned_config) - - # For Future interface - self._set_future_result() - - @classmethod - def _check_resource_config(cls, resource): - """Helper for :meth:`from_api_repr` - - Args: - resource (Dict): resource for the job. - - Raises: - KeyError: - If the resource has no identifier, or - is missing the appropriate configuration. - """ - if "jobReference" not in resource or "jobId" not in resource["jobReference"]: - raise KeyError( - "Resource lacks required identity information: " - '["jobReference"]["jobId"]' - ) - if ( - "configuration" not in resource - or cls._JOB_TYPE not in resource["configuration"] - ): - raise KeyError( - "Resource lacks required configuration: " - '["configuration"]["%s"]' % cls._JOB_TYPE - ) - - def to_api_repr(self): - """Generate a resource for the job.""" - return copy.deepcopy(self._properties) - - _build_resource = to_api_repr # backward-compatibility alias - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType`` - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: - If the job has already begun. - """ - if self.state is not None: - raise ValueError("Job already begun.") - - client = self._require_client(client) - path = "/projects/%s/jobs" % (self.project,) - - # jobs.insert is idempotent because we ensure that every new - # job has an ID. - span_attributes = {"path": path} - api_response = client._call_api( - retry, - span_name="BigQuery.job.begin", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - data=self.to_api_repr(), - timeout=timeout, - ) - self._set_properties(api_response) - - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: test for the existence of the job via a GET request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Returns: - bool: Boolean indicating existence of the job. - """ - client = self._require_client(client) - - extra_params = {"fields": "id"} - if self.location: - extra_params["location"] = self.location - - try: - span_attributes = {"path": self.path} - - client._call_api( - retry, - span_name="BigQuery.job.exists", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - except NotFound: - return False - else: - return True - - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: refresh job properties via a GET request. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - span_attributes = {"path": self.path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.reload", - span_attributes=span_attributes, - job_ref=self, - method="GET", - path=self.path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response) - - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: cancel job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - the client to use. If not passed, falls back to the - ``client`` stored on the current dataset. - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry`` - - Returns: - bool: Boolean indicating that the cancel request was sent. - """ - client = self._require_client(client) - - extra_params = {} - if self.location: - extra_params["location"] = self.location - - path = "{}/cancel".format(self.path) - span_attributes = {"path": path} - - api_response = client._call_api( - retry, - span_name="BigQuery.job.cancel", - span_attributes=span_attributes, - job_ref=self, - method="POST", - path=path, - query_params=extra_params, - timeout=timeout, - ) - self._set_properties(api_response["job"]) - # The Future interface requires that we return True if the *attempt* - # to cancel was successful. - return True - - # The following methods implement the PollingFuture interface. Note that - # the methods above are from the pre-Future interface and are left for - # compatibility. The only "overloaded" method is :meth:`cancel`, which - # satisfies both interfaces. - - def _set_future_result(self): - """Set the result or exception from the job if it is complete.""" - # This must be done in a lock to prevent the polling thread - # and main thread from both executing the completion logic - # at the same time. - with self._completion_lock: - # If the operation isn't complete or if the result has already been - # set, do not call set_result/set_exception again. - # Note: self._result_set is set to True in set_result and - # set_exception, in case those methods are invoked directly. - if not self.done(reload=False) or self._result_set: - return - - if self.error_result is not None: - exception = _error_result_to_exception(self.error_result) - self.set_exception(exception) - else: - self.set_result(self) - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Checks if the job is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - # Do not refresh is the state is already done, as the job will not - # change once complete. - if self.state != _DONE_STATE and reload: - self.reload(retry=retry, timeout=timeout) - return self.state == _DONE_STATE - - def result(self, retry=DEFAULT_RETRY, timeout=None): - """Start the job and wait for it to complete and get the result. - - Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - - Returns: - _AsyncJob: This instance. - - Raises: - google.cloud.exceptions.GoogleCloudError: - if the job failed. - concurrent.futures.TimeoutError: - if the job did not complete in the given timeout. - """ - if self.state is None: - self._begin(retry=retry, timeout=timeout) - - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) - - def cancelled(self): - """Check if the job has been cancelled. - - This always returns False. It's not possible to check if a job was - cancelled in the API. This method is here to satisfy the interface - for :class:`google.api_core.future.Future`. - - Returns: - bool: False - """ - return ( - self.error_result is not None - and self.error_result.get("reason") == _STOPPED_REASON - ) - - -class _JobConfig(object): - """Abstract base class for job configuration objects. - - Args: - job_type (str): The key to use for the job configuration. - """ - - def __init__(self, job_type, **kwargs): - self._job_type = job_type - self._properties = {job_type: {}} - for prop, val in kwargs.items(): - setattr(self, prop, val) - - @property - def labels(self): - """Dict[str, str]: Labels for the job. - - This method always returns a dict. To change a job's labels, - modify the dict, then call ``Client.update_job``. To delete a - label, set its value to :data:`None` before updating. - - Raises: - ValueError: If ``value`` type is invalid. - """ - return self._properties.setdefault("labels", {}) - - @labels.setter - def labels(self, value): - if not isinstance(value, dict): - raise ValueError("Pass a dict") - self._properties["labels"] = value - - def _get_sub_prop(self, key, default=None): - """Get a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access - those properties:: - - self._get_sub_prop('destinationTable') - - This is equivalent to using the ``_helpers._get_sub_prop`` function:: - - _helpers._get_sub_prop( - self._properties, ['query', 'destinationTable']) - - Args: - key (str): - Key for the value to get in the - ``self._properties[self._job_type]`` dictionary. - default (Optional[object]): - Default value to return if the key is not found. - Defaults to :data:`None`. - - Returns: - object: The value if present or the default. - """ - return _helpers._get_sub_prop( - self._properties, [self._job_type, key], default=default - ) - - def _set_sub_prop(self, key, value): - """Set a value in the ``self._properties[self._job_type]`` dictionary. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set - those properties:: - - self._set_sub_prop('useLegacySql', False) - - This is equivalent to using the ``_helper._set_sub_prop`` function:: - - _helper._set_sub_prop( - self._properties, ['query', 'useLegacySql'], False) - - Args: - key (str): - Key to set in the ``self._properties[self._job_type]`` - dictionary. - value (object): Value to set. - """ - _helpers._set_sub_prop(self._properties, [self._job_type, key], value) - - def _del_sub_prop(self, key): - """Remove ``key`` from the ``self._properties[self._job_type]`` dict. - - Most job properties are inside the dictionary related to the job type - (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear - those properties:: - - self._del_sub_prop('useLegacySql') - - This is equivalent to using the ``_helper._del_sub_prop`` function:: - - _helper._del_sub_prop( - self._properties, ['query', 'useLegacySql']) - - Args: - key (str): - Key to remove in the ``self._properties[self._job_type]`` - dictionary. - """ - _helpers._del_sub_prop(self._properties, [self._job_type, key]) - - def to_api_repr(self): - """Build an API representation of the job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - return copy.deepcopy(self._properties) - - def _fill_from_default(self, default_job_config): - """Merge this job config with a default job config. - - The keys in this object take precedence over the keys in the default - config. The merge is done at the top-level as well as for keys one - level below the job type. - - Args: - default_job_config (google.cloud.bigquery.job._JobConfig): - The default job config that will be used to fill in self. - - Returns: - google.cloud.bigquery.job._JobConfig: A new (merged) job config. - """ - if self._job_type != default_job_config._job_type: - raise TypeError( - "attempted to merge two incompatible job types: " - + repr(self._job_type) - + ", " - + repr(default_job_config._job_type) - ) - - new_job_config = self.__class__() - - default_job_properties = copy.deepcopy(default_job_config._properties) - for key in self._properties: - if key != self._job_type: - default_job_properties[key] = self._properties[key] - - default_job_properties[self._job_type].update(self._properties[self._job_type]) - new_job_config._properties = default_job_properties - - return new_job_config - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct a job configuration given its API representation - - Args: - resource (Dict): - A job configuration in the same representation as is returned - from the API. - - Returns: - google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. - """ - job_config = cls() - job_config._properties = resource - return job_config - - -class LoadJobConfig(_JobConfig): - """Configuration options for load jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(LoadJobConfig, self).__init__("load", **kwargs) - - @property - def allow_jagged_rows(self): - """Optional[bool]: Allow missing trailing optional columns (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows - """ - return self._get_sub_prop("allowJaggedRows") - - @allow_jagged_rows.setter - def allow_jagged_rows(self, value): - self._set_sub_prop("allowJaggedRows", value) - - @property - def allow_quoted_newlines(self): - """Optional[bool]: Allow quoted data containing newline characters (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines - """ - return self._get_sub_prop("allowQuotedNewlines") - - @allow_quoted_newlines.setter - def allow_quoted_newlines(self, value): - self._set_sub_prop("allowQuotedNewlines", value) - - @property - def autodetect(self): - """Optional[bool]: Automatically infer the schema from a sample of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect - """ - return self._get_sub_prop("autodetect") - - @autodetect.setter - def autodetect(self, value): - self._set_sub_prop("autodetect", value) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def create_disposition(self): - """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior - for creating tables. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def destination_encryption_configuration(self): - """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - else: - self._del_sub_prop("destinationEncryptionConfiguration") - - @property - def destination_table_description(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["description"] - - @destination_table_description.setter - def destination_table_description(self, value): - keys = [self._job_type, "destinationTableProperties", "description"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def destination_table_friendly_name(self): - """Optional[str]: Name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - prop = self._get_sub_prop("destinationTableProperties") - if prop is not None: - return prop["friendlyName"] - - @destination_table_friendly_name.setter - def destination_table_friendly_name(self, value): - keys = [self._job_type, "destinationTableProperties", "friendlyName"] - if value is not None: - _helpers._set_sub_prop(self._properties, keys, value) - else: - _helpers._del_sub_prop(self._properties, keys) - - @property - def encoding(self): - """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the - data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding - """ - return self._get_sub_prop("encoding") - - @encoding.setter - def encoding(self, value): - self._set_sub_prop("encoding", value) - - @property - def field_delimiter(self): - """Optional[str]: The separator for fields in a CSV file. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ - it configures hive partitioning support. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options - """ - prop = self._get_sub_prop("hivePartitioningOptions") - if prop is None: - return None - return HivePartitioningOptions.from_api_repr(prop) - - @hive_partitioning.setter - def hive_partitioning(self, value): - if value is not None: - if isinstance(value, HivePartitioningOptions): - value = value.to_api_repr() - else: - raise TypeError("Expected a HivePartitioningOptions instance or None.") - - self._set_sub_prop("hivePartitioningOptions", value) - - @property - def ignore_unknown_values(self): - """Optional[bool]: Ignore extra values not represented in the table schema. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values - """ - return self._get_sub_prop("ignoreUnknownValues") - - @ignore_unknown_values.setter - def ignore_unknown_values(self, value): - self._set_sub_prop("ignoreUnknownValues", value) - - @property - def max_bad_records(self): - """Optional[int]: Number of invalid rows to ignore. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records - """ - return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) - - @max_bad_records.setter - def max_bad_records(self, value): - self._set_sub_prop("maxBadRecords", value) - - @property - def null_marker(self): - """Optional[str]: Represents a null value (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker - """ - return self._get_sub_prop("nullMarker") - - @null_marker.setter - def null_marker(self, value): - self._set_sub_prop("nullMarker", value) - - @property - def quote_character(self): - """Optional[str]: Character used to quote data sections (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote - """ - return self._get_sub_prop("quote") - - @quote_character.setter - def quote_character(self, value): - self._set_sub_prop("quote", value) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def schema(self): - """Optional[Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]]: Schema of the destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema - """ - schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) - if schema is None: - return - return [SchemaField.from_api_repr(field) for field in schema] - - @schema.setter - def schema(self, value): - if value is None: - self._del_sub_prop("schema") - return - - value = _to_schema_fields(value) - - _helpers._set_sub_prop( - self._properties, - ["load", "schema", "fields"], - [field.to_api_repr() for field in value], - ) - - @property - def schema_update_options(self): - """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies - updates to the destination table schema to allow as a side effect of - the load job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - @property - def skip_leading_rows(self): - """Optional[int]: Number of rows to skip when reading data (CSV only). - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows - """ - return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) - - @skip_leading_rows.setter - def skip_leading_rows(self, value): - self._set_sub_prop("skipLeadingRows", str(value)) - - @property - def source_format(self): - """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format - """ - return self._get_sub_prop("sourceFormat") - - @source_format.setter - def source_format(self, value): - self._set_sub_prop("sourceFormat", value) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based - partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - else: - self._del_sub_prop("timePartitioning") - - @property - def use_avro_logical_types(self): - """Optional[bool]: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - @property - def write_disposition(self): - """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if - the destination table already exists. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - -class LoadJob(_AsyncJob): - """Asynchronous job for loading data into a table. - - Can load from Google Cloud Storage URIs or from a file. - - Args: - job_id (str): the job's ID - - source_uris (Optional[Sequence[str]]): - URIs of one or more data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. Pass None for jobs that load from a file. - - destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - """ - - _JOB_TYPE = "load" - - def __init__(self, job_id, source_uris, destination, client, job_config=None): - super(LoadJob, self).__init__(job_id, client) - - if not job_config: - job_config = LoadJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if source_uris is not None: - _helpers._set_sub_prop( - self._properties, ["configuration", "load", "sourceUris"], source_uris - ) - - if destination is not None: - _helpers._set_sub_prop( - self._properties, - ["configuration", "load", "destinationTable"], - destination.to_api_repr(), - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where loaded rows are written - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table - """ - dest_config = _helpers._get_sub_prop( - self._properties, ["configuration", "load", "destinationTable"] - ) - return TableReference.from_api_repr(dest_config) - - @property - def source_uris(self): - """Optional[Sequence[str]]: URIs of data files to be loaded. See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris - for supported URI formats. None for jobs that load from a file. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "load", "sourceUris"] - ) - - @property - def allow_jagged_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. - """ - return self._configuration.allow_jagged_rows - - @property - def allow_quoted_newlines(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. - """ - return self._configuration.allow_quoted_newlines - - @property - def autodetect(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. - """ - return self._configuration.autodetect - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def encoding(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. - """ - return self._configuration.encoding - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def ignore_unknown_values(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. - """ - return self._configuration.ignore_unknown_values - - @property - def max_bad_records(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. - """ - return self._configuration.max_bad_records - - @property - def null_marker(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. - """ - return self._configuration.null_marker - - @property - def quote_character(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. - """ - return self._configuration.quote_character - - @property - def skip_leading_rows(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. - """ - return self._configuration.skip_leading_rows - - @property - def source_format(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. - """ - return self._configuration.source_format - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def schema(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. - """ - return self._configuration.schema - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) - or :data:`None` if using default encryption. - - See - :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def destination_table_description(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description - """ - return self._configuration.destination_table_description - - @property - def destination_table_friendly_name(self): - """Optional[str] name given to destination table. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name - """ - return self._configuration.destination_table_friendly_name - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def use_avro_logical_types(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. - """ - return self._configuration.use_avro_logical_types - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - @property - def input_file_bytes(self): - """Count of bytes loaded from source files. - - Returns: - Optional[int]: the count (None until set from the server). - - Raises: - ValueError: for invalid value types. - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFileBytes"] - ) - ) - - @property - def input_files(self): - """Count of source files. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "inputFiles"] - ) - ) - - @property - def output_bytes(self): - """Count of bytes saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputBytes"] - ) - ) - - @property - def output_rows(self): - """Count of rows saved to destination table. - - Returns: - Optional[int]: the count (None until set from the server). - """ - return _helpers._int_or_none( - _helpers._get_sub_prop( - self._properties, ["statistics", "load", "outputRows"] - ) - ) - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client) - job._set_properties(resource) - return job - - -class CopyJobConfig(_JobConfig): - """Configuration options for copy jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(CopyJobConfig, self).__init__("copy", **kwargs) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - -class CopyJob(_AsyncJob): - """Asynchronous job: copy data into a table from other tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. - - destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): - Extra configuration options for the copy job. - """ - - _JOB_TYPE = "copy" - - def __init__(self, job_id, sources, destination, client, job_config=None): - super(CopyJob, self).__init__(job_id, client) - - if not job_config: - job_config = CopyJobConfig() - - self._configuration = job_config - self._properties["configuration"] = job_config._properties - - if destination: - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "destinationTable"], - destination.to_api_repr(), - ) - - if sources: - source_resources = [source.to_api_repr() for source in sources] - _helpers._set_sub_prop( - self._properties, - ["configuration", "copy", "sourceTables"], - source_resources, - ) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: Table into which data - is to be loaded. - """ - return TableReference.from_api_repr( - _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "destinationTable"] - ) - ) - - @property - def sources(self): - """List[google.cloud.bigquery.table.TableReference]): Table(s) from - which data is to be loaded. - """ - source_configs = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTables"] - ) - if source_configs is None: - single = _helpers._get_sub_prop( - self._properties, ["configuration", "copy", "sourceTable"] - ) - if single is None: - raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") - source_configs = [single] - - sources = [] - for source_config in source_configs: - table_ref = TableReference.from_api_repr(source_config) - sources.append(table_ref) - return sources - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -class ExtractJobConfig(_JobConfig): - """Configuration options for extract jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(ExtractJobConfig, self).__init__("extract", **kwargs) - - @property - def compression(self): - """google.cloud.bigquery.job.Compression: Compression type to use for - exported files. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression - """ - return self._get_sub_prop("compression") - - @compression.setter - def compression(self, value): - self._set_sub_prop("compression", value) - - @property - def destination_format(self): - """google.cloud.bigquery.job.DestinationFormat: Exported file format. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format - """ - return self._get_sub_prop("destinationFormat") - - @destination_format.setter - def destination_format(self, value): - self._set_sub_prop("destinationFormat", value) - - @property - def field_delimiter(self): - """str: Delimiter to use between fields in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter - """ - return self._get_sub_prop("fieldDelimiter") - - @field_delimiter.setter - def field_delimiter(self, value): - self._set_sub_prop("fieldDelimiter", value) - - @property - def print_header(self): - """bool: Print a header row in the exported data. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header - """ - return self._get_sub_prop("printHeader") - - @print_header.setter - def print_header(self, value): - self._set_sub_prop("printHeader", value) - - @property - def use_avro_logical_types(self): - """bool: For loads of Avro data, governs whether Avro logical types are - converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than - raw types (e.g. INTEGER). - """ - return self._get_sub_prop("useAvroLogicalTypes") - - @use_avro_logical_types.setter - def use_avro_logical_types(self, value): - self._set_sub_prop("useAvroLogicalTypes", bool(value)) - - -class ExtractJob(_AsyncJob): - """Asynchronous job: extract data from a table into Cloud Storage. - - Args: - job_id (str): the job's ID. - - source (Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]): - Table or Model from which data is to be loaded or extracted. - - destination_uris (List[str]): - URIs describing where the extracted data will be written in Cloud - Storage, using the format ``gs:///``. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration. - - job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): - Extra configuration options for the extract job. - """ - - _JOB_TYPE = "extract" - - def __init__(self, job_id, source, destination_uris, client, job_config=None): - super(ExtractJob, self).__init__(job_id, client) - - if job_config is None: - job_config = ExtractJobConfig() - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if source: - source_ref = {"projectId": source.project, "datasetId": source.dataset_id} - - if isinstance(source, (Table, TableListItem, TableReference)): - source_ref["tableId"] = source.table_id - source_key = "sourceTable" - else: - source_ref["modelId"] = source.model_id - source_key = "sourceModel" - - _helpers._set_sub_prop( - self._properties, ["configuration", "extract", source_key], source_ref - ) - - if destination_uris: - _helpers._set_sub_prop( - self._properties, - ["configuration", "extract", "destinationUris"], - destination_uris, - ) - - @property - def source(self): - """Union[ \ - google.cloud.bigquery.table.TableReference, \ - google.cloud.bigquery.model.ModelReference \ - ]: Table or Model from which data is to be loaded or extracted. - """ - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceTable"] - ) - if source_config: - return TableReference.from_api_repr(source_config) - else: - source_config = _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "sourceModel"] - ) - return ModelReference.from_api_repr(source_config) - - @property - def destination_uris(self): - """List[str]: URIs describing where the extracted data will be - written in Cloud Storage, using the format - ``gs:///``. - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "extract", "destinationUris"] - ) - - @property - def compression(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. - """ - return self._configuration.compression - - @property - def destination_format(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. - """ - return self._configuration.destination_format - - @property - def field_delimiter(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. - """ - return self._configuration.field_delimiter - - @property - def print_header(self): - """See - :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. - """ - return self._configuration.print_header - - @property - def destination_uri_file_counts(self): - """Return file counts from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts - - Returns: - List[int]: - A list of integer counts, each representing the number of files - per destination URI or URI pattern specified in the extract - configuration. These values will be in the same order as the URIs - specified in the 'destinationUris' field. Returns None if job is - not yet complete. - """ - counts = self._job_statistics().get("destinationUriFileCounts") - if counts is not None: - return [int(count) for count in counts] - return None - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Exclude statistics, if set. - return { - "jobReference": self._properties["jobReference"], - "configuration": self._properties["configuration"], - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - .. note: - - This method assumes that the project found in the resource matches - the client's project. - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, None, client=client) - job._set_properties(resource) - return job - - -def _from_api_repr_query_parameters(resource): - return [_query_param_from_api_repr(mapping) for mapping in resource] - - -def _to_api_repr_query_parameters(value): - return [query_parameter.to_api_repr() for query_parameter in value] - - -def _from_api_repr_udf_resources(resource): - udf_resources = [] - for udf_mapping in resource: - for udf_type, udf_value in udf_mapping.items(): - udf_resources.append(UDFResource(udf_type, udf_value)) - return udf_resources - - -def _to_api_repr_udf_resources(value): - return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] - - -def _from_api_repr_table_defs(resource): - return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} - - -def _to_api_repr_table_defs(value): - return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} - - -class QueryJobConfig(_JobConfig): - """Configuration options for query jobs. - - All properties in this class are optional. Values which are :data:`None` -> - server defaults. Set properties on the constructed configuration by using - the property name as the name of a keyword argument. - """ - - def __init__(self, **kwargs): - super(QueryJobConfig, self).__init__("query", **kwargs) - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration - """ - prop = self._get_sub_prop("destinationEncryptionConfiguration") - if prop is not None: - prop = EncryptionConfiguration.from_api_repr(prop) - return prop - - @destination_encryption_configuration.setter - def destination_encryption_configuration(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("destinationEncryptionConfiguration", api_repr) - - @property - def allow_large_results(self): - """bool: Allow large query results tables (legacy SQL, only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results - """ - return self._get_sub_prop("allowLargeResults") - - @allow_large_results.setter - def allow_large_results(self, value): - self._set_sub_prop("allowLargeResults", value) - - @property - def create_disposition(self): - """google.cloud.bigquery.job.CreateDisposition: Specifies behavior - for creating tables. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition - """ - return self._get_sub_prop("createDisposition") - - @create_disposition.setter - def create_disposition(self, value): - self._set_sub_prop("createDisposition", value) - - @property - def default_dataset(self): - """google.cloud.bigquery.dataset.DatasetReference: the default dataset - to use for unqualified table names in the query or :data:`None` if not - set. - - The ``default_dataset`` setter accepts: - - - a :class:`~google.cloud.bigquery.dataset.Dataset`, or - - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or - - a :class:`str` of the fully-qualified dataset ID in standard SQL - format. The value must included a project ID and dataset ID - separated by ``.``. For example: ``your-project.your_dataset``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset - """ - prop = self._get_sub_prop("defaultDataset") - if prop is not None: - prop = DatasetReference.from_api_repr(prop) - return prop - - @default_dataset.setter - def default_dataset(self, value): - if value is None: - self._set_sub_prop("defaultDataset", None) - return - - if isinstance(value, six.string_types): - value = DatasetReference.from_string(value) - - if isinstance(value, (Dataset, DatasetListItem)): - value = value.reference - - resource = value.to_api_repr() - self._set_sub_prop("defaultDataset", resource) - - @property - def destination(self): - """google.cloud.bigquery.table.TableReference: table where results are - written or :data:`None` if not set. - - The ``destination`` setter accepts: - - - a :class:`~google.cloud.bigquery.table.Table`, or - - a :class:`~google.cloud.bigquery.table.TableReference`, or - - a :class:`str` of the fully-qualified table ID in standard SQL - format. The value must included a project ID, dataset ID, and table - ID, each separated by ``.``. For example: - ``your-project.your_dataset.your_table``. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table - """ - prop = self._get_sub_prop("destinationTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @destination.setter - def destination(self, value): - if value is None: - self._set_sub_prop("destinationTable", None) - return - - value = _table_arg_to_table_ref(value) - resource = value.to_api_repr() - self._set_sub_prop("destinationTable", resource) - - @property - def dry_run(self): - """bool: :data:`True` if this query should be a dry run to estimate - costs. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run - """ - return self._properties.get("dryRun") - - @dry_run.setter - def dry_run(self, value): - self._properties["dryRun"] = value - - @property - def flatten_results(self): - """bool: Flatten nested/repeated fields in results. (Legacy SQL only) - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results - """ - return self._get_sub_prop("flattenResults") - - @flatten_results.setter - def flatten_results(self, value): - self._set_sub_prop("flattenResults", value) - - @property - def maximum_billing_tier(self): - """int: Deprecated. Changes the billing tier to allow high-compute - queries. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier - """ - return self._get_sub_prop("maximumBillingTier") - - @maximum_billing_tier.setter - def maximum_billing_tier(self, value): - self._set_sub_prop("maximumBillingTier", value) - - @property - def maximum_bytes_billed(self): - """int: Maximum bytes to be billed for this job or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed - """ - return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) - - @maximum_bytes_billed.setter - def maximum_bytes_billed(self, value): - self._set_sub_prop("maximumBytesBilled", str(value)) - - @property - def priority(self): - """google.cloud.bigquery.job.QueryPriority: Priority of the query. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority - """ - return self._get_sub_prop("priority") - - @priority.setter - def priority(self, value): - self._set_sub_prop("priority", value) - - @property - def query_parameters(self): - """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter]]: list of parameters - for parameterized query (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters - """ - prop = self._get_sub_prop("queryParameters", default=[]) - return _from_api_repr_query_parameters(prop) - - @query_parameters.setter - def query_parameters(self, values): - self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) - - @property - def range_partitioning(self): - """Optional[google.cloud.bigquery.table.RangePartitioning]: - Configures range-based partitioning for destination table. - - .. note:: - **Beta**. The integer range partitioning feature is in a - pre-release state and might change or have limited support. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.RangePartitioning` or - :data:`None`. - """ - resource = self._get_sub_prop("rangePartitioning") - if resource is not None: - return RangePartitioning(_properties=resource) - - @range_partitioning.setter - def range_partitioning(self, value): - resource = value - if isinstance(value, RangePartitioning): - resource = value._properties - elif value is not None: - raise ValueError( - "Expected value to be RangePartitioning or None, got {}.".format(value) - ) - self._set_sub_prop("rangePartitioning", resource) - - @property - def udf_resources(self): - """List[google.cloud.bigquery.query.UDFResource]: user - defined function resources (empty by default) - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources - """ - prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) - return _from_api_repr_udf_resources(prop) - - @udf_resources.setter - def udf_resources(self, values): - self._set_sub_prop( - "userDefinedFunctionResources", _to_api_repr_udf_resources(values) - ) - - @property - def use_legacy_sql(self): - """bool: Use legacy SQL syntax. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql - """ - return self._get_sub_prop("useLegacySql") - - @use_legacy_sql.setter - def use_legacy_sql(self, value): - self._set_sub_prop("useLegacySql", value) - - @property - def use_query_cache(self): - """bool: Look for the query result in the cache. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache - """ - return self._get_sub_prop("useQueryCache") - - @use_query_cache.setter - def use_query_cache(self, value): - self._set_sub_prop("useQueryCache", value) - - @property - def write_disposition(self): - """google.cloud.bigquery.job.WriteDisposition: Action that occurs if - the destination table already exists. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition - """ - return self._get_sub_prop("writeDisposition") - - @write_disposition.setter - def write_disposition(self, value): - self._set_sub_prop("writeDisposition", value) - - @property - def table_definitions(self): - """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: - Definitions for external tables or :data:`None` if not set. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions - """ - prop = self._get_sub_prop("tableDefinitions") - if prop is not None: - prop = _from_api_repr_table_defs(prop) - return prop - - @table_definitions.setter - def table_definitions(self, values): - self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) - - @property - def time_partitioning(self): - """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies - time-based partitioning for the destination table. - - Only specify at most one of - :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or - :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. - - Raises: - ValueError: - If the value is not - :class:`~google.cloud.bigquery.table.TimePartitioning` or - :data:`None`. - """ - prop = self._get_sub_prop("timePartitioning") - if prop is not None: - prop = TimePartitioning.from_api_repr(prop) - return prop - - @time_partitioning.setter - def time_partitioning(self, value): - api_repr = value - if value is not None: - api_repr = value.to_api_repr() - self._set_sub_prop("timePartitioning", api_repr) - - @property - def clustering_fields(self): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - - Clustering fields are immutable after table creation. - - .. note:: - - BigQuery supports clustering for both partitioned and - non-partitioned tables. - """ - prop = self._get_sub_prop("clustering") - if prop is not None: - return list(prop.get("fields", ())) - - @clustering_fields.setter - def clustering_fields(self, value): - """Optional[List[str]]: Fields defining clustering for the table - - (Defaults to :data:`None`). - """ - if value is not None: - self._set_sub_prop("clustering", {"fields": value}) - else: - self._del_sub_prop("clustering") - - @property - def schema_update_options(self): - """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies - updates to the destination table schema to allow as a side effect of - the query job. - """ - return self._get_sub_prop("schemaUpdateOptions") - - @schema_update_options.setter - def schema_update_options(self, values): - self._set_sub_prop("schemaUpdateOptions", values) - - def to_api_repr(self): - """Build an API representation of the query job config. - - Returns: - Dict: A dictionary in the format used by the BigQuery API. - """ - resource = copy.deepcopy(self._properties) - - # Query parameters have an addition property associated with them - # to indicate if the query is using named or positional parameters. - query_parameters = resource["query"].get("queryParameters") - if query_parameters: - if query_parameters[0].get("name") is None: - resource["query"]["parameterMode"] = "POSITIONAL" - else: - resource["query"]["parameterMode"] = "NAMED" - - return resource - - -class QueryJob(_AsyncJob): - """Asynchronous job: query tables. - - Args: - job_id (str): the job's ID, within the project belonging to ``client``. - - query (str): SQL query string. - - client (google.cloud.bigquery.client.Client): - A client which holds credentials and project configuration - for the dataset (which requires a project). - - job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): - Extra configuration options for the query job. - """ - - _JOB_TYPE = "query" - _UDF_KEY = "userDefinedFunctionResources" - - def __init__(self, job_id, query, client, job_config=None): - super(QueryJob, self).__init__(job_id, client) - - if job_config is None: - job_config = QueryJobConfig() - if job_config.use_legacy_sql is None: - job_config.use_legacy_sql = False - - self._properties["configuration"] = job_config._properties - self._configuration = job_config - - if query: - _helpers._set_sub_prop( - self._properties, ["configuration", "query", "query"], query - ) - - self._query_results = None - self._done_timeout = None - self._transport_timeout = None - - @property - def allow_large_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. - """ - return self._configuration.allow_large_results - - @property - def create_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. - """ - return self._configuration.create_disposition - - @property - def default_dataset(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. - """ - return self._configuration.default_dataset - - @property - def destination(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. - """ - return self._configuration.destination - - @property - def destination_encryption_configuration(self): - """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom - encryption configuration for the destination table. - - Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` - if using default encryption. - - See - :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. - """ - return self._configuration.destination_encryption_configuration - - @property - def dry_run(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. - """ - return self._configuration.dry_run - - @property - def flatten_results(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. - """ - return self._configuration.flatten_results - - @property - def priority(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. - """ - return self._configuration.priority - - @property - def query(self): - """str: The query text used in this query job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query - """ - return _helpers._get_sub_prop( - self._properties, ["configuration", "query", "query"] - ) - - @property - def query_parameters(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. - """ - return self._configuration.query_parameters - - @property - def udf_resources(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. - """ - return self._configuration.udf_resources - - @property - def use_legacy_sql(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. - """ - return self._configuration.use_legacy_sql - - @property - def use_query_cache(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. - """ - return self._configuration.use_query_cache - - @property - def write_disposition(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. - """ - return self._configuration.write_disposition - - @property - def maximum_billing_tier(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. - """ - return self._configuration.maximum_billing_tier - - @property - def maximum_bytes_billed(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. - """ - return self._configuration.maximum_bytes_billed - - @property - def range_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. - """ - return self._configuration.range_partitioning - - @property - def table_definitions(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. - """ - return self._configuration.table_definitions - - @property - def time_partitioning(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. - """ - return self._configuration.time_partitioning - - @property - def clustering_fields(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. - """ - return self._configuration.clustering_fields - - @property - def schema_update_options(self): - """See - :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. - """ - return self._configuration.schema_update_options - - def to_api_repr(self): - """Generate a resource for :meth:`_begin`.""" - # Use to_api_repr to allow for some configuration properties to be set - # automatically. - configuration = self._configuration.to_api_repr() - return { - "jobReference": self._properties["jobReference"], - "configuration": configuration, - } - - @classmethod - def from_api_repr(cls, resource, client): - """Factory: construct a job given its API representation - - Args: - resource (Dict): dataset job representation returned from the API - - client (google.cloud.bigquery.client.Client): - Client which holds credentials and project - configuration for the dataset. - - Returns: - google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. - """ - cls._check_resource_config(resource) - job_ref = _JobReference._from_api_repr(resource["jobReference"]) - job = cls(job_ref, None, client=client) - job._set_properties(resource) - return job - - @property - def query_plan(self): - """Return query plan from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan - - Returns: - List[google.cloud.bigquery.job.QueryPlanEntry]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - plan_entries = self._job_statistics().get("queryPlan", ()) - return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] - - @property - def timeline(self): - """List(TimelineEntry): Return the query execution timeline - from job statistics. - """ - raw = self._job_statistics().get("timeline", ()) - return [TimelineEntry.from_api_repr(entry) for entry in raw] - - @property - def total_bytes_processed(self): - """Return total bytes processed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesProcessed") - if result is not None: - result = int(result) - return result - - @property - def total_bytes_billed(self): - """Return total bytes billed from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed - - Returns: - Optional[int]: - Total bytes processed by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("totalBytesBilled") - if result is not None: - result = int(result) - return result - - @property - def billing_tier(self): - """Return billing tier from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier - - Returns: - Optional[int]: - Billing tier used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("billingTier") - - @property - def cache_hit(self): - """Return whether or not query results were served from cache. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit - - Returns: - Optional[bool]: - whether the query results were returned from cache, or None - if job is not yet complete. - """ - return self._job_statistics().get("cacheHit") - - @property - def ddl_operation_performed(self): - """Optional[str]: Return the DDL operation performed. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed - - """ - return self._job_statistics().get("ddlOperationPerformed") - - @property - def ddl_target_routine(self): - """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present - for CREATE/DROP FUNCTION/PROCEDURE queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine - """ - prop = self._job_statistics().get("ddlTargetRoutine") - if prop is not None: - prop = RoutineReference.from_api_repr(prop) - return prop - - @property - def ddl_target_table(self): - """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present - for CREATE/DROP TABLE/VIEW queries. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table - """ - prop = self._job_statistics().get("ddlTargetTable") - if prop is not None: - prop = TableReference.from_api_repr(prop) - return prop - - @property - def num_dml_affected_rows(self): - """Return the number of DML rows affected by the job. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("numDmlAffectedRows") - if result is not None: - result = int(result) - return result - - @property - def slot_millis(self): - """Union[int, None]: Slot-milliseconds used by this query job.""" - return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) - - @property - def statement_type(self): - """Return statement type from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type - - Returns: - Optional[str]: - type of statement used by the job, or None if job is not - yet complete. - """ - return self._job_statistics().get("statementType") - - @property - def referenced_tables(self): - """Return referenced tables from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables - - Returns: - List[Dict]: - mappings describing the query plan, or an empty list - if the query has not yet completed. - """ - tables = [] - datasets_by_project_name = {} - - for table in self._job_statistics().get("referencedTables", ()): - - t_project = table["projectId"] - - ds_id = table["datasetId"] - t_dataset = datasets_by_project_name.get((t_project, ds_id)) - if t_dataset is None: - t_dataset = DatasetReference(t_project, ds_id) - datasets_by_project_name[(t_project, ds_id)] = t_dataset - - t_name = table["tableId"] - tables.append(t_dataset.table(t_name)) - - return tables - - @property - def undeclared_query_parameters(self): - """Return undeclared query parameters from job statistics, if present. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters - - Returns: - List[Union[ \ - google.cloud.bigquery.query.ArrayQueryParameter, \ - google.cloud.bigquery.query.ScalarQueryParameter, \ - google.cloud.bigquery.query.StructQueryParameter \ - ]]: - Undeclared parameters, or an empty list if the query has - not yet completed. - """ - parameters = [] - undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) - - for parameter in undeclared: - p_type = parameter["parameterType"] - - if "arrayType" in p_type: - klass = ArrayQueryParameter - elif "structTypes" in p_type: - klass = StructQueryParameter - else: - klass = ScalarQueryParameter - - parameters.append(klass.from_api_repr(parameter)) - - return parameters - - @property - def estimated_bytes_processed(self): - """Return the estimated number of bytes processed by the query. - - See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed - - Returns: - Optional[int]: - number of DML rows affected by the job, or None if job is not - yet complete. - """ - result = self._job_statistics().get("estimatedBytesProcessed") - if result is not None: - result = int(result) - return result - - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: True if the job is complete, False otherwise. - """ - is_done = ( - # Only consider a QueryJob complete when we know we have the final - # query results available. - self._query_results is not None - and self._query_results.complete - and self.state == _DONE_STATE - ) - # Do not refresh if the state is already done, as the job will not - # change once complete. - if not reload or is_done: - return is_done - - # Since the API to getQueryResults can hang up to the timeout value - # (default of 10 seconds), set the timeout parameter to ensure that - # the timeout from the futures API is respected. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 - timeout_ms = None - if self._done_timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - api_timeout = max(min(api_timeout, 10), 0) - self._done_timeout -= api_timeout - self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(api_timeout * 1000) - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete and self.state != _DONE_STATE: - self.reload(retry=retry, timeout=transport_timeout) - - return self.state == _DONE_STATE - - def _blocking_poll(self, timeout=None, **kwargs): - self._done_timeout = timeout - self._transport_timeout = timeout - super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) - - @staticmethod - def _format_for_exception(query, job_id): - """Format a query for the output in exception message. - - Args: - query (str): The SQL query to format. - job_id (str): The ID of the job that ran the query. - - Returns: - str: A formatted query text. - """ - template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" - - lines = query.splitlines() - max_line_len = max(len(line) for line in lines) - - header = "-----Query Job SQL Follows-----" - header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) - - # Print out a "ruler" above and below the SQL so we can judge columns. - # Left pad for the line numbers (4 digits plus ":"). - ruler = " |" + " . |" * (max_line_len // 10) - - # Put line numbers next to the SQL. - body = "\n".join( - "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) - ) - - return template.format(job_id=job_id, header=header, ruler=ruler, body=body) - - def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): - """API call: begin the job via a POST request - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert - - Args: - client (Optional[google.cloud.bigquery.client.Client]): - The client to use. If not passed, falls back to the ``client`` - associated with the job object or``NoneType``. - retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - - Raises: - ValueError: If the job has already begun. - """ - - try: - super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - - def result( - self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): - """Start the job and wait for it to complete and get the result. - - Args: - page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. - max_results (Optional[int]): - The maximum total number of rows from this request. - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - If multiple requests are made under the hood, ``timeout`` - applies to each individual request. - start_index (Optional[int]): - The zero-based index of the starting row to read. - - Returns: - google.cloud.bigquery.table.RowIterator: - Iterator of row data - :class:`~google.cloud.bigquery.table.Row`-s. During each - page, the iterator will have the ``total_rows`` attribute - set, which counts the total number of rows **in the result - set** (this is distinct from the total number of rows in the - current page: ``iterator.page.num_items``). - - If the query is a special query that produces no results, e.g. - a DDL query, an ``_EmptyRowIterator`` instance is returned. - - Raises: - google.cloud.exceptions.GoogleCloudError: - If the job failed. - concurrent.futures.TimeoutError: - If the job did not complete in the given timeout. - """ - try: - super(QueryJob, self).result(retry=retry, timeout=timeout) - except exceptions.GoogleCloudError as exc: - exc.message += self._format_for_exception(self.query, self.job_id) - exc.query_job = self - raise - except requests.exceptions.Timeout as exc: - six.raise_from(concurrent.futures.TimeoutError, exc) - - # If the query job is complete but there are no query results, this was - # special job, such as a DDL query. Return an empty result set to - # indicate success and avoid calling tabledata.list on a table which - # can't be read (such as a view table). - if self._query_results.total_rows is None: - return _EmptyRowIterator() - - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, - page_size=page_size, - max_results=max_results, - start_index=start_index, - retry=retry, - timeout=timeout, - ) - rows._preserve_order = _contains_order_by(self.query) - return rows - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() - def to_arrow( - self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): - """[Beta] Create a class:`pyarrow.Table` by loading all pages of a - table or query. - - Args: - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - Possible values of ``progress_bar_type`` include: - - ``None`` - No progress bar. - ``'tqdm'`` - Use the :func:`tqdm.tqdm` function to print a progress bar - to :data:`sys.stderr`. - ``'tqdm_notebook'`` - Use the :func:`tqdm.tqdm_notebook` function to display a - progress bar as a Jupyter notebook widget. - ``'tqdm_gui'`` - Use the :func:`tqdm.tqdm_gui` function to display a - progress bar as a graphical dialog box. - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This API - is a billable API. - - This method requires the ``pyarrow`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - Returns: - pyarrow.Table - A :class:`pyarrow.Table` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: - If the :mod:`pyarrow` library cannot be imported. - - ..versionadded:: 1.17.0 - """ - return self.result().to_arrow( - progress_bar_type=progress_bar_type, - bqstorage_client=bqstorage_client, - create_bqstorage_client=create_bqstorage_client, - ) - - # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() - def to_dataframe( - self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): - """Return a pandas DataFrame from a QueryJob - - Args: - bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): - A BigQuery Storage API client. If supplied, use the faster - BigQuery Storage API to fetch rows from BigQuery. This - API is a billable API. - - This method requires the ``fastavro`` and - ``google-cloud-bigquery-storage`` libraries. - - Reading from a specific partition or snapshot is not - currently supported by this method. - - dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): - A dictionary of column names pandas ``dtype``s. The provided - ``dtype`` is used when constructing the series for the column - specified. Otherwise, the default pandas behavior is used. - - progress_bar_type (Optional[str]): - If set, use the `tqdm `_ library to - display a progress bar while the data downloads. Install the - ``tqdm`` package to use this feature. - - See - :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` - for details. - - ..versionadded:: 1.11.0 - create_bqstorage_client (Optional[bool]): - If ``True`` (default), create a BigQuery Storage API client - using the default API settings. The BigQuery Storage API - is a faster way to fetch rows from BigQuery. See the - ``bqstorage_client`` parameter for more information. - - This argument does nothing if ``bqstorage_client`` is supplied. - - ..versionadded:: 1.24.0 - - date_as_object (Optional[bool]): - If ``True`` (default), cast dates to objects. If ``False``, convert - to datetime64[ns] dtype. - - ..versionadded:: 1.26.0 - - Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. - - Raises: - ValueError: If the `pandas` library cannot be imported. - """ - return self.result().to_dataframe( - bqstorage_client=bqstorage_client, - dtypes=dtypes, - progress_bar_type=progress_bar_type, - create_bqstorage_client=create_bqstorage_client, - date_as_object=date_as_object, - ) - - def __iter__(self): - return iter(self.result()) - - -class QueryPlanEntryStep(object): - """Map a single step in a query plan entry. - - Args: - kind (str): step type. - substeps (List): names of substeps. - """ - - def __init__(self, kind, substeps): - self.kind = kind - self.substeps = list(substeps) - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource (Dict): JSON representation of the entry. - - Returns: - google.cloud.bigquery.job.QueryPlanEntryStep: - New instance built from the resource. - """ - return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return self.kind == other.kind and self.substeps == other.substeps - - -class QueryPlanEntry(object): - """QueryPlanEntry represents a single stage of a query execution plan. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - ExplainQueryStage representation returned from API. - - Returns: - google.cloud.bigquery.job.QueryPlanEntry: - Query plan entry parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def name(self): - """Optional[str]: Human-readable name of the stage.""" - return self._properties.get("name") - - @property - def entry_id(self): - """Optional[str]: Unique ID for the stage within the plan.""" - return self._properties.get("id") - - @property - def start(self): - """Optional[Datetime]: Datetime when the stage started.""" - if self._properties.get("startMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("startMs")) * 1000.0 - ) - - @property - def end(self): - """Optional[Datetime]: Datetime when the stage ended.""" - if self._properties.get("endMs") is None: - return None - return _helpers._datetime_from_microseconds( - int(self._properties.get("endMs")) * 1000.0 - ) - - @property - def input_stages(self): - """List(int): Entry IDs for stages that were inputs for this stage.""" - if self._properties.get("inputStages") is None: - return [] - return [ - _helpers._int_or_none(entry) - for entry in self._properties.get("inputStages") - ] - - @property - def parallel_inputs(self): - """Optional[int]: Number of parallel input segments within - the stage. - """ - return _helpers._int_or_none(self._properties.get("parallelInputs")) - - @property - def completed_parallel_inputs(self): - """Optional[int]: Number of parallel input segments completed.""" - return _helpers._int_or_none(self._properties.get("completedParallelInputs")) - - @property - def wait_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsAvg")) - - @property - def wait_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent waiting to - be scheduled. - """ - return _helpers._int_or_none(self._properties.get("waitMsMax")) - - @property - def wait_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioAvg") - - @property - def wait_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent waiting - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("waitRatioMax") - - @property - def read_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsAvg")) - - @property - def read_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent reading - input. - """ - return _helpers._int_or_none(self._properties.get("readMsMax")) - - @property - def read_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent reading - input, relative to the longest time spent by any worker in any stage - of the overall plan. - """ - return self._properties.get("readRatioAvg") - - @property - def read_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent reading - to be scheduled, relative to the longest time spent by any worker in - any stage of the overall plan. - """ - return self._properties.get("readRatioMax") - - @property - def compute_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsAvg")) - - @property - def compute_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent on CPU-bound - processing. - """ - return _helpers._int_or_none(self._properties.get("computeMsMax")) - - @property - def compute_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioAvg") - - @property - def compute_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent on - CPU-bound processing, relative to the longest time spent by any - worker in any stage of the overall plan. - """ - return self._properties.get("computeRatioMax") - - @property - def write_ms_avg(self): - """Optional[int]: Milliseconds the average worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsAvg")) - - @property - def write_ms_max(self): - """Optional[int]: Milliseconds the slowest worker spent writing - output data. - """ - return _helpers._int_or_none(self._properties.get("writeMsMax")) - - @property - def write_ratio_avg(self): - """Optional[float]: Ratio of time the average worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioAvg") - - @property - def write_ratio_max(self): - """Optional[float]: Ratio of time the slowest worker spent writing - output data, relative to the longest time spent by any worker in any - stage of the overall plan. - """ - return self._properties.get("writeRatioMax") - - @property - def records_read(self): - """Optional[int]: Number of records read by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsRead")) - - @property - def records_written(self): - """Optional[int]: Number of records written by this stage.""" - return _helpers._int_or_none(self._properties.get("recordsWritten")) - - @property - def status(self): - """Optional[str]: status of this stage.""" - return self._properties.get("status") - - @property - def shuffle_output_bytes(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) - - @property - def shuffle_output_bytes_spilled(self): - """Optional[int]: Number of bytes written by this stage to - intermediate shuffle and spilled to disk. - """ - return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) - - @property - def steps(self): - """List(QueryPlanEntryStep): List of step operations performed by - each worker in the stage. - """ - return [ - QueryPlanEntryStep.from_api_repr(step) - for step in self._properties.get("steps", []) - ] - - -class TimelineEntry(object): - """TimelineEntry represents progress of a query job at a particular - point in time. - - See - https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample - for the underlying API representation within query statistics. - """ - - def __init__(self): - self._properties = {} - - @classmethod - def from_api_repr(cls, resource): - """Factory: construct instance from the JSON repr. - - Args: - resource(Dict[str: object]): - QueryTimelineSample representation returned from API. - - Returns: - google.cloud.bigquery.TimelineEntry: - Timeline sample parsed from ``resource``. - """ - entry = cls() - entry._properties = resource - return entry - - @property - def elapsed_ms(self): - """Optional[int]: Milliseconds elapsed since start of query - execution.""" - return _helpers._int_or_none(self._properties.get("elapsedMs")) - - @property - def active_units(self): - """Optional[int]: Current number of input units being processed - by workers, reported as largest value since the last sample.""" - return _helpers._int_or_none(self._properties.get("activeUnits")) - - @property - def pending_units(self): - """Optional[int]: Current number of input units remaining for - query stages active at this sample time.""" - return _helpers._int_or_none(self._properties.get("pendingUnits")) - - @property - def completed_units(self): - """Optional[int]: Current number of input units completed by - this query.""" - return _helpers._int_or_none(self._properties.get("completedUnits")) - - @property - def slot_millis(self): - """Optional[int]: Cumulative slot-milliseconds consumed by - this query.""" - return _helpers._int_or_none(self._properties.get("totalSlotMs")) - - -class UnknownJob(_AsyncJob): - """A job whose type cannot be determined.""" - - @classmethod - def from_api_repr(cls, resource, client): - """Construct an UnknownJob from the JSON representation. - - Args: - resource (Dict): JSON representation of a job. - client (google.cloud.bigquery.client.Client): - Client connected to BigQuery API. - - Returns: - UnknownJob: Job corresponding to the resource. - """ - job_ref_properties = resource.get("jobReference", {"projectId": client.project}) - job_ref = _JobReference._from_api_repr(job_ref_properties) - job = cls(job_ref, client) - # Populate the job reference with the project, even if it has been - # redacted, because we know it should equal that of the request. - resource["jobReference"] = job_ref_properties - job._properties = resource - return job - - -class ScriptStackFrame(object): - """Stack frame showing the line/column/procedure name where the current - evaluation happened. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def procedure_id(self): - """Optional[str]: Name of the active procedure. - - Omitted if in a top-level script. - """ - return self._properties.get("procedureId") - - @property - def text(self): - """str: Text of the current statement/expression.""" - return self._properties.get("text") - - @property - def start_line(self): - """int: One-based start line.""" - return _helpers._int_or_none(self._properties.get("startLine")) - - @property - def start_column(self): - """int: One-based start column.""" - return _helpers._int_or_none(self._properties.get("startColumn")) - - @property - def end_line(self): - """int: One-based end line.""" - return _helpers._int_or_none(self._properties.get("endLine")) - - @property - def end_column(self): - """int: One-based end column.""" - return _helpers._int_or_none(self._properties.get("endColumn")) - - -class ScriptStatistics(object): - """Statistics for a child job of a script. - - Args: - resource (Map[str, Any]): JSON representation of object. - """ - - def __init__(self, resource): - self._properties = resource - - @property - def stack_frames(self): - """List[ScriptStackFrame]: Stack trace where the current evaluation - happened. - - Shows line/column/procedure name of each frame on the stack at the - point where the current evaluation happened. - - The leaf frame is first, the primary script is last. - """ - return [ - ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) - ] - - @property - def evaluation_kind(self): - """str: Indicates the type of child job. - - Possible values include ``STATEMENT`` and ``EXPRESSION``. - """ - return self._properties.get("evaluationKind") diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py new file mode 100644 index 000000000..26ecf8d3c --- /dev/null +++ b/google/cloud/bigquery/job/__init__.py @@ -0,0 +1,77 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Define API Jobs.""" + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _error_result_to_exception +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ScriptStatistics +from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import UnknownJob +from google.cloud.bigquery.job.copy_ import CopyJob +from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.extract import ExtractJob +from google.cloud.bigquery.job.extract import ExtractJobConfig +from google.cloud.bigquery.job.load import LoadJob +from google.cloud.bigquery.job.load import LoadJobConfig +from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import QueryJob +from google.cloud.bigquery.job.query import QueryJobConfig +from google.cloud.bigquery.job.query import QueryPlanEntry +from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.enums import Compression +from google.cloud.bigquery.enums import CreateDisposition +from google.cloud.bigquery.enums import DestinationFormat +from google.cloud.bigquery.enums import Encoding +from google.cloud.bigquery.enums import QueryPriority +from google.cloud.bigquery.enums import SchemaUpdateOption +from google.cloud.bigquery.enums import SourceFormat +from google.cloud.bigquery.enums import WriteDisposition + + +# Include classes previously in job.py for backwards compatibility. +__all__ = [ + "_AsyncJob", + "_error_result_to_exception", + "_DONE_STATE", + "_JobConfig", + "_JobReference", + "ScriptStatistics", + "ScriptStackFrame", + "UnknownJob", + "CopyJob", + "CopyJobConfig", + "ExtractJob", + "ExtractJobConfig", + "LoadJob", + "LoadJobConfig", + "_contains_order_by", + "QueryJob", + "QueryJobConfig", + "QueryPlanEntry", + "QueryPlanEntryStep", + "TimelineEntry", + "Compression", + "CreateDisposition", + "DestinationFormat", + "Encoding", + "QueryPriority", + "SchemaUpdateOption", + "SourceFormat", + "WriteDisposition", +] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py new file mode 100644 index 000000000..2f4ae1460 --- /dev/null +++ b/google/cloud/bigquery/job/base.py @@ -0,0 +1,912 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base classes and helpers for job classes.""" + +import copy +import threading + +from google.api_core import exceptions +import google.api_core.future.polling +from six.moves import http_client + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.retry import DEFAULT_RETRY + + +_DONE_STATE = "DONE" +_STOPPED_REASON = "stopped" +_ERROR_REASON_TO_EXCEPTION = { + "accessDenied": http_client.FORBIDDEN, + "backendError": http_client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http_client.FORBIDDEN, + "billingTierLimitExceeded": http_client.BAD_REQUEST, + "blocked": http_client.FORBIDDEN, + "duplicate": http_client.CONFLICT, + "internalError": http_client.INTERNAL_SERVER_ERROR, + "invalid": http_client.BAD_REQUEST, + "invalidQuery": http_client.BAD_REQUEST, + "notFound": http_client.NOT_FOUND, + "notImplemented": http_client.NOT_IMPLEMENTED, + "quotaExceeded": http_client.FORBIDDEN, + "rateLimitExceeded": http_client.FORBIDDEN, + "resourceInUse": http_client.BAD_REQUEST, + "resourcesExceeded": http_client.BAD_REQUEST, + "responseTooLarge": http_client.FORBIDDEN, + "stopped": http_client.OK, + "tableUnavailable": http_client.BAD_REQUEST, +} + + +def _error_result_to_exception(error_result): + """Maps BigQuery error reasons to an exception. + + The reasons and their matching HTTP status codes are documented on + the `troubleshooting errors`_ page. + + .. _troubleshooting errors: https://cloud.google.com/bigquery\ + /troubleshooting-errors + + Args: + error_result (Mapping[str, str]): The error result from BigQuery. + + Returns: + google.cloud.exceptions.GoogleAPICallError: The mapped exception. + """ + reason = error_result.get("reason") + status_code = _ERROR_REASON_TO_EXCEPTION.get( + reason, http_client.INTERNAL_SERVER_ERROR + ) + return exceptions.from_http_status( + status_code, error_result.get("message", ""), errors=[error_result] + ) + + +class _JobReference(object): + """A reference to a job. + + Args: + job_id (str): ID of the job to run. + project (str): ID of the project where the job runs. + location (str): Location of where the job runs. + """ + + def __init__(self, job_id, project, location): + self._properties = {"jobId": job_id, "projectId": project} + # The location field must not be populated if it is None. + if location: + self._properties["location"] = location + + @property + def job_id(self): + """str: ID of the job.""" + return self._properties.get("jobId") + + @property + def project(self): + """str: ID of the project where the job runs.""" + return self._properties.get("projectId") + + @property + def location(self): + """str: Location where the job runs.""" + return self._properties.get("location") + + def _to_api_repr(self): + """Returns the API resource representation of the job reference.""" + return copy.deepcopy(self._properties) + + @classmethod + def _from_api_repr(cls, resource): + """Returns a job reference for an API resource representation.""" + job_id = resource.get("jobId") + project = resource.get("projectId") + location = resource.get("location") + job_ref = cls(job_id, project, location) + return job_ref + + +class _AsyncJob(google.api_core.future.polling.PollingFuture): + """Base class for asynchronous jobs. + + Args: + job_id (Union[str, _JobReference]): + Job's ID in the project associated with the client or a + fully-qualified job reference. + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project configuration. + """ + + def __init__(self, job_id, client): + super(_AsyncJob, self).__init__() + + # The job reference can be either a plain job ID or the full resource. + # Populate the properties dictionary consistently depending on what has + # been passed in. + job_ref = job_id + if not isinstance(job_id, _JobReference): + job_ref = _JobReference(job_id, client.project, None) + self._properties = {"jobReference": job_ref._to_api_repr()} + + self._client = client + self._result_set = False + self._completion_lock = threading.Lock() + + @property + def job_id(self): + """str: ID of the job.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "jobId"]) + + @property + def parent_job_id(self): + """Return the ID of the parent job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.parent_job_id + + Returns: + Optional[str]: parent job id. + """ + return _helpers._get_sub_prop(self._properties, ["statistics", "parentJobId"]) + + @property + def script_statistics(self): + resource = _helpers._get_sub_prop( + self._properties, ["statistics", "scriptStatistics"] + ) + if resource is None: + return None + return ScriptStatistics(resource) + + @property + def num_child_jobs(self): + """The number of child jobs executed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics.FIELDS.num_child_jobs + + Returns: + int + """ + count = _helpers._get_sub_prop(self._properties, ["statistics", "numChildJobs"]) + return int(count) if count is not None else 0 + + @property + def project(self): + """Project bound to the job. + + Returns: + str: the project (derived from the client). + """ + return _helpers._get_sub_prop(self._properties, ["jobReference", "projectId"]) + + @property + def location(self): + """str: Location where the job runs.""" + return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + + def _require_client(self, client): + """Check client or verify over-ride. + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + Returns: + google.cloud.bigquery.client.Client: + The client passed in or the currently bound client. + """ + if client is None: + client = self._client + return client + + @property + def job_type(self): + """Type of job. + + Returns: + str: one of 'load', 'copy', 'extract', 'query'. + """ + return self._JOB_TYPE + + @property + def path(self): + """URL path for the job's APIs. + + Returns: + str: the path based on project and job ID. + """ + return "/projects/%s/jobs/%s" % (self.project, self.job_id) + + @property + def labels(self): + """Dict[str, str]: Labels for the job.""" + return self._properties.setdefault("labels", {}) + + @property + def etag(self): + """ETag for the job resource. + + Returns: + Optional[str]: the ETag (None until set from the server). + """ + return self._properties.get("etag") + + @property + def self_link(self): + """URL for the job resource. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("selfLink") + + @property + def user_email(self): + """E-mail address of user who submitted the job. + + Returns: + Optional[str]: the URL (None until set from the server). + """ + return self._properties.get("user_email") + + @property + def created(self): + """Datetime at which the job was created. + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = _helpers._get_sub_prop( + self._properties, ["statistics", "creationTime"] + ) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def started(self): + """Datetime at which the job was started. + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "startTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + @property + def ended(self): + """Datetime at which the job finished. + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = _helpers._get_sub_prop(self._properties, ["statistics", "endTime"]) + if millis is not None: + return _helpers._datetime_from_microseconds(millis * 1000.0) + + def _job_statistics(self): + """Helper for job-type specific statistics-based properties.""" + statistics = self._properties.get("statistics", {}) + return statistics.get(self._JOB_TYPE, {}) + + @property + def error_result(self): + """Error information about the job as a whole. + + Returns: + Optional[Mapping]: the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errorResult") + + @property + def errors(self): + """Information about individual errors generated by the job. + + Returns: + Optional[List[Mapping]]: + the error information (None until set from the server). + """ + status = self._properties.get("status") + if status is not None: + return status.get("errors") + + @property + def state(self): + """Status of the job. + + Returns: + Optional[str]: + the state (None until set from the server). + """ + status = self._properties.get("status", {}) + return status.get("state") + + def _set_properties(self, api_response): + """Update properties from resource in body of ``api_response`` + + Args: + api_response (Dict): response returned from an API call. + """ + cleaned = api_response.copy() + + statistics = cleaned.get("statistics", {}) + if "creationTime" in statistics: + statistics["creationTime"] = float(statistics["creationTime"]) + if "startTime" in statistics: + statistics["startTime"] = float(statistics["startTime"]) + if "endTime" in statistics: + statistics["endTime"] = float(statistics["endTime"]) + + # Save configuration to keep reference same in self._configuration. + cleaned_config = cleaned.pop("configuration", {}) + configuration = self._properties.pop("configuration", {}) + self._properties.clear() + self._properties.update(cleaned) + self._properties["configuration"] = configuration + self._properties["configuration"].update(cleaned_config) + + # For Future interface + self._set_future_result() + + @classmethod + def _check_resource_config(cls, resource): + """Helper for :meth:`from_api_repr` + + Args: + resource (Dict): resource for the job. + + Raises: + KeyError: + If the resource has no identifier, or + is missing the appropriate configuration. + """ + if "jobReference" not in resource or "jobId" not in resource["jobReference"]: + raise KeyError( + "Resource lacks required identity information: " + '["jobReference"]["jobId"]' + ) + if ( + "configuration" not in resource + or cls._JOB_TYPE not in resource["configuration"] + ): + raise KeyError( + "Resource lacks required configuration: " + '["configuration"]["%s"]' % cls._JOB_TYPE + ) + + def to_api_repr(self): + """Generate a resource for the job.""" + return copy.deepcopy(self._properties) + + _build_resource = to_api_repr # backward-compatibility alias + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType`` + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: + If the job has already begun. + """ + if self.state is not None: + raise ValueError("Job already begun.") + + client = self._require_client(client) + path = "/projects/%s/jobs" % (self.project,) + + # jobs.insert is idempotent because we ensure that every new + # job has an ID. + span_attributes = {"path": path} + api_response = client._call_api( + retry, + span_name="BigQuery.job.begin", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + data=self.to_api_repr(), + timeout=timeout, + ) + self._set_properties(api_response) + + def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: test for the existence of the job via a GET request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Returns: + bool: Boolean indicating existence of the job. + """ + client = self._require_client(client) + + extra_params = {"fields": "id"} + if self.location: + extra_params["location"] = self.location + + try: + span_attributes = {"path": self.path} + + client._call_api( + retry, + span_name="BigQuery.job.exists", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + except exceptions.NotFound: + return False + else: + return True + + def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: refresh job properties via a GET request. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + span_attributes = {"path": self.path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.reload", + span_attributes=span_attributes, + job_ref=self, + method="GET", + path=self.path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response) + + def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: cancel job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + the client to use. If not passed, falls back to the + ``client`` stored on the current dataset. + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry`` + + Returns: + bool: Boolean indicating that the cancel request was sent. + """ + client = self._require_client(client) + + extra_params = {} + if self.location: + extra_params["location"] = self.location + + path = "{}/cancel".format(self.path) + span_attributes = {"path": path} + + api_response = client._call_api( + retry, + span_name="BigQuery.job.cancel", + span_attributes=span_attributes, + job_ref=self, + method="POST", + path=path, + query_params=extra_params, + timeout=timeout, + ) + self._set_properties(api_response["job"]) + # The Future interface requires that we return True if the *attempt* + # to cancel was successful. + return True + + # The following methods implement the PollingFuture interface. Note that + # the methods above are from the pre-Future interface and are left for + # compatibility. The only "overloaded" method is :meth:`cancel`, which + # satisfies both interfaces. + + def _set_future_result(self): + """Set the result or exception from the job if it is complete.""" + # This must be done in a lock to prevent the polling thread + # and main thread from both executing the completion logic + # at the same time. + with self._completion_lock: + # If the operation isn't complete or if the result has already been + # set, do not call set_result/set_exception again. + # Note: self._result_set is set to True in set_result and + # set_exception, in case those methods are invoked directly. + if not self.done(reload=False) or self._result_set: + return + + if self.error_result is not None: + exception = _error_result_to_exception(self.error_result) + self.set_exception(exception) + else: + self.set_result(self) + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Checks if the job is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + # Do not refresh is the state is already done, as the job will not + # change once complete. + if self.state != _DONE_STATE and reload: + self.reload(retry=retry, timeout=timeout) + return self.state == _DONE_STATE + + def result(self, retry=DEFAULT_RETRY, timeout=None): + """Start the job and wait for it to complete and get the result. + + Args: + retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + + Returns: + _AsyncJob: This instance. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + if the job failed. + concurrent.futures.TimeoutError: + if the job did not complete in the given timeout. + """ + if self.state is None: + self._begin(retry=retry, timeout=timeout) + + kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} + return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + + def cancelled(self): + """Check if the job has been cancelled. + + This always returns False. It's not possible to check if a job was + cancelled in the API. This method is here to satisfy the interface + for :class:`google.api_core.future.Future`. + + Returns: + bool: False + """ + return ( + self.error_result is not None + and self.error_result.get("reason") == _STOPPED_REASON + ) + + +class _JobConfig(object): + """Abstract base class for job configuration objects. + + Args: + job_type (str): The key to use for the job configuration. + """ + + def __init__(self, job_type, **kwargs): + self._job_type = job_type + self._properties = {job_type: {}} + for prop, val in kwargs.items(): + setattr(self, prop, val) + + @property + def labels(self): + """Dict[str, str]: Labels for the job. + + This method always returns a dict. To change a job's labels, + modify the dict, then call ``Client.update_job``. To delete a + label, set its value to :data:`None` before updating. + + Raises: + ValueError: If ``value`` type is invalid. + """ + return self._properties.setdefault("labels", {}) + + @labels.setter + def labels(self, value): + if not isinstance(value, dict): + raise ValueError("Pass a dict") + self._properties["labels"] = value + + def _get_sub_prop(self, key, default=None): + """Get a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access + those properties:: + + self._get_sub_prop('destinationTable') + + This is equivalent to using the ``_helpers._get_sub_prop`` function:: + + _helpers._get_sub_prop( + self._properties, ['query', 'destinationTable']) + + Args: + key (str): + Key for the value to get in the + ``self._properties[self._job_type]`` dictionary. + default (Optional[object]): + Default value to return if the key is not found. + Defaults to :data:`None`. + + Returns: + object: The value if present or the default. + """ + return _helpers._get_sub_prop( + self._properties, [self._job_type, key], default=default + ) + + def _set_sub_prop(self, key, value): + """Set a value in the ``self._properties[self._job_type]`` dictionary. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set + those properties:: + + self._set_sub_prop('useLegacySql', False) + + This is equivalent to using the ``_helper._set_sub_prop`` function:: + + _helper._set_sub_prop( + self._properties, ['query', 'useLegacySql'], False) + + Args: + key (str): + Key to set in the ``self._properties[self._job_type]`` + dictionary. + value (object): Value to set. + """ + _helpers._set_sub_prop(self._properties, [self._job_type, key], value) + + def _del_sub_prop(self, key): + """Remove ``key`` from the ``self._properties[self._job_type]`` dict. + + Most job properties are inside the dictionary related to the job type + (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear + those properties:: + + self._del_sub_prop('useLegacySql') + + This is equivalent to using the ``_helper._del_sub_prop`` function:: + + _helper._del_sub_prop( + self._properties, ['query', 'useLegacySql']) + + Args: + key (str): + Key to remove in the ``self._properties[self._job_type]`` + dictionary. + """ + _helpers._del_sub_prop(self._properties, [self._job_type, key]) + + def to_api_repr(self): + """Build an API representation of the job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) + + def _fill_from_default(self, default_job_config): + """Merge this job config with a default job config. + + The keys in this object take precedence over the keys in the default + config. The merge is done at the top-level as well as for keys one + level below the job type. + + Args: + default_job_config (google.cloud.bigquery.job._JobConfig): + The default job config that will be used to fill in self. + + Returns: + google.cloud.bigquery.job._JobConfig: A new (merged) job config. + """ + if self._job_type != default_job_config._job_type: + raise TypeError( + "attempted to merge two incompatible job types: " + + repr(self._job_type) + + ", " + + repr(default_job_config._job_type) + ) + + new_job_config = self.__class__() + + default_job_properties = copy.deepcopy(default_job_config._properties) + for key in self._properties: + if key != self._job_type: + default_job_properties[key] = self._properties[key] + + default_job_properties[self._job_type].update(self._properties[self._job_type]) + new_job_config._properties = default_job_properties + + return new_job_config + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct a job configuration given its API representation + + Args: + resource (Dict): + A job configuration in the same representation as is returned + from the API. + + Returns: + google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. + """ + job_config = cls() + job_config._properties = resource + return job_config + + +class ScriptStackFrame(object): + """Stack frame showing the line/column/procedure name where the current + evaluation happened. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def procedure_id(self): + """Optional[str]: Name of the active procedure. + + Omitted if in a top-level script. + """ + return self._properties.get("procedureId") + + @property + def text(self): + """str: Text of the current statement/expression.""" + return self._properties.get("text") + + @property + def start_line(self): + """int: One-based start line.""" + return _helpers._int_or_none(self._properties.get("startLine")) + + @property + def start_column(self): + """int: One-based start column.""" + return _helpers._int_or_none(self._properties.get("startColumn")) + + @property + def end_line(self): + """int: One-based end line.""" + return _helpers._int_or_none(self._properties.get("endLine")) + + @property + def end_column(self): + """int: One-based end column.""" + return _helpers._int_or_none(self._properties.get("endColumn")) + + +class ScriptStatistics(object): + """Statistics for a child job of a script. + + Args: + resource (Map[str, Any]): JSON representation of object. + """ + + def __init__(self, resource): + self._properties = resource + + @property + def stack_frames(self): + """List[ScriptStackFrame]: Stack trace where the current evaluation + happened. + + Shows line/column/procedure name of each frame on the stack at the + point where the current evaluation happened. + + The leaf frame is first, the primary script is last. + """ + return [ + ScriptStackFrame(frame) for frame in self._properties.get("stackFrames", []) + ] + + @property + def evaluation_kind(self): + """str: Indicates the type of child job. + + Possible values include ``STATEMENT`` and ``EXPRESSION``. + """ + return self._properties.get("evaluationKind") + + +class UnknownJob(_AsyncJob): + """A job whose type cannot be determined.""" + + @classmethod + def from_api_repr(cls, resource, client): + """Construct an UnknownJob from the JSON representation. + + Args: + resource (Dict): JSON representation of a job. + client (google.cloud.bigquery.client.Client): + Client connected to BigQuery API. + + Returns: + UnknownJob: Job corresponding to the resource. + """ + job_ref_properties = resource.get("jobReference", {"projectId": client.project}) + job_ref = _JobReference._from_api_repr(job_ref_properties) + job = cls(job_ref, client) + # Populate the job reference with the project, even if it has been + # redacted, because we know it should equal that of the request. + resource["jobReference"] = job_ref_properties + job._properties = resource + return job diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py new file mode 100644 index 000000000..95f4b613b --- /dev/null +++ b/google/cloud/bigquery/job/copy_.py @@ -0,0 +1,223 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for copy jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.table import TableReference + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class CopyJobConfig(_JobConfig): + """Configuration options for copy jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(CopyJobConfig, self).__init__("copy", **kwargs) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + +class CopyJob(_AsyncJob): + """Asynchronous job: copy data into a table from other tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + sources (List[google.cloud.bigquery.table.TableReference]): Table from which data is to be loaded. + + destination (google.cloud.bigquery.table.TableReference): Table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): + Extra configuration options for the copy job. + """ + + _JOB_TYPE = "copy" + + def __init__(self, job_id, sources, destination, client, job_config=None): + super(CopyJob, self).__init__(job_id, client) + + if not job_config: + job_config = CopyJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if destination: + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "destinationTable"], + destination.to_api_repr(), + ) + + if sources: + source_resources = [source.to_api_repr() for source in sources] + _helpers._set_sub_prop( + self._properties, + ["configuration", "copy", "sourceTables"], + source_resources, + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: Table into which data + is to be loaded. + """ + return TableReference.from_api_repr( + _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "destinationTable"] + ) + ) + + @property + def sources(self): + """List[google.cloud.bigquery.table.TableReference]): Table(s) from + which data is to be loaded. + """ + source_configs = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTables"] + ) + if source_configs is None: + single = _helpers._get_sub_prop( + self._properties, ["configuration", "copy", "sourceTable"] + ) + if single is None: + raise KeyError("Resource missing 'sourceTables' / 'sourceTable'") + source_configs = [single] + + sources = [] + for source_config in source_configs: + table_ref = TableReference.from_api_repr(source_config) + sources.append(table_ref) + return sources + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.CopyJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py new file mode 100644 index 000000000..a6e262a32 --- /dev/null +++ b/google/cloud/bigquery/job/extract.py @@ -0,0 +1,266 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for extract (export) jobs.""" + +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.model import ModelReference +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableListItem +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class ExtractJobConfig(_JobConfig): + """Configuration options for extract jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(ExtractJobConfig, self).__init__("extract", **kwargs) + + @property + def compression(self): + """google.cloud.bigquery.job.Compression: Compression type to use for + exported files. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.compression + """ + return self._get_sub_prop("compression") + + @compression.setter + def compression(self, value): + self._set_sub_prop("compression", value) + + @property + def destination_format(self): + """google.cloud.bigquery.job.DestinationFormat: Exported file format. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.destination_format + """ + return self._get_sub_prop("destinationFormat") + + @destination_format.setter + def destination_format(self, value): + self._set_sub_prop("destinationFormat", value) + + @property + def field_delimiter(self): + """str: Delimiter to use between fields in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def print_header(self): + """bool: Print a header row in the exported data. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationExtract.FIELDS.print_header + """ + return self._get_sub_prop("printHeader") + + @print_header.setter + def print_header(self, value): + self._set_sub_prop("printHeader", value) + + @property + def use_avro_logical_types(self): + """bool: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + +class ExtractJob(_AsyncJob): + """Asynchronous job: extract data from a table into Cloud Storage. + + Args: + job_id (str): the job's ID. + + source (Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]): + Table or Model from which data is to be loaded or extracted. + + destination_uris (List[str]): + URIs describing where the extracted data will be written in Cloud + Storage, using the format ``gs:///``. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration. + + job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): + Extra configuration options for the extract job. + """ + + _JOB_TYPE = "extract" + + def __init__(self, job_id, source, destination_uris, client, job_config=None): + super(ExtractJob, self).__init__(job_id, client) + + if job_config is None: + job_config = ExtractJobConfig() + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if source: + source_ref = {"projectId": source.project, "datasetId": source.dataset_id} + + if isinstance(source, (Table, TableListItem, TableReference)): + source_ref["tableId"] = source.table_id + source_key = "sourceTable" + else: + source_ref["modelId"] = source.model_id + source_key = "sourceModel" + + _helpers._set_sub_prop( + self._properties, ["configuration", "extract", source_key], source_ref + ) + + if destination_uris: + _helpers._set_sub_prop( + self._properties, + ["configuration", "extract", "destinationUris"], + destination_uris, + ) + + @property + def source(self): + """Union[ \ + google.cloud.bigquery.table.TableReference, \ + google.cloud.bigquery.model.ModelReference \ + ]: Table or Model from which data is to be loaded or extracted. + """ + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceTable"] + ) + if source_config: + return TableReference.from_api_repr(source_config) + else: + source_config = _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "sourceModel"] + ) + return ModelReference.from_api_repr(source_config) + + @property + def destination_uris(self): + """List[str]: URIs describing where the extracted data will be + written in Cloud Storage, using the format + ``gs:///``. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "extract", "destinationUris"] + ) + + @property + def compression(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`. + """ + return self._configuration.compression + + @property + def destination_format(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`. + """ + return self._configuration.destination_format + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def print_header(self): + """See + :attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`. + """ + return self._configuration.print_header + + @property + def destination_uri_file_counts(self): + """Return file counts from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics4.FIELDS.destination_uri_file_counts + + Returns: + List[int]: + A list of integer counts, each representing the number of files + per destination URI or URI pattern specified in the extract + configuration. These values will be in the same order as the URIs + specified in the 'destinationUris' field. Returns None if job is + not yet complete. + """ + counts = self._job_statistics().get("destinationUriFileCounts") + if counts is not None: + return [int(count) for count in counts] + return None + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.ExtractJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client=client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py new file mode 100644 index 000000000..e784af0a6 --- /dev/null +++ b/google/cloud/bigquery/job/load.py @@ -0,0 +1,758 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for load jobs.""" + +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.schema import SchemaField +from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +class LoadJobConfig(_JobConfig): + """Configuration options for load jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(LoadJobConfig, self).__init__("load", **kwargs) + + @property + def allow_jagged_rows(self): + """Optional[bool]: Allow missing trailing optional columns (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_jagged_rows + """ + return self._get_sub_prop("allowJaggedRows") + + @allow_jagged_rows.setter + def allow_jagged_rows(self, value): + self._set_sub_prop("allowJaggedRows", value) + + @property + def allow_quoted_newlines(self): + """Optional[bool]: Allow quoted data containing newline characters (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.allow_quoted_newlines + """ + return self._get_sub_prop("allowQuotedNewlines") + + @allow_quoted_newlines.setter + def allow_quoted_newlines(self, value): + self._set_sub_prop("allowQuotedNewlines", value) + + @property + def autodetect(self): + """Optional[bool]: Automatically infer the schema from a sample of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.autodetect + """ + return self._get_sub_prop("autodetect") + + @autodetect.setter + def autodetect(self, value): + self._set_sub_prop("autodetect", value) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def create_disposition(self): + """Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior + for creating tables. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def destination_encryption_configuration(self): + """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + else: + self._del_sub_prop("destinationEncryptionConfiguration") + + @property + def destination_table_description(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["description"] + + @destination_table_description.setter + def destination_table_description(self, value): + keys = [self._job_type, "destinationTableProperties", "description"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def destination_table_friendly_name(self): + """Optional[str]: Name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + prop = self._get_sub_prop("destinationTableProperties") + if prop is not None: + return prop["friendlyName"] + + @destination_table_friendly_name.setter + def destination_table_friendly_name(self, value): + keys = [self._job_type, "destinationTableProperties", "friendlyName"] + if value is not None: + _helpers._set_sub_prop(self._properties, keys, value) + else: + _helpers._del_sub_prop(self._properties, keys) + + @property + def encoding(self): + """Optional[google.cloud.bigquery.job.Encoding]: The character encoding of the + data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.encoding + """ + return self._get_sub_prop("encoding") + + @encoding.setter + def encoding(self, value): + self._set_sub_prop("encoding", value) + + @property + def field_delimiter(self): + """Optional[str]: The separator for fields in a CSV file. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.field_delimiter + """ + return self._get_sub_prop("fieldDelimiter") + + @field_delimiter.setter + def field_delimiter(self, value): + self._set_sub_prop("fieldDelimiter", value) + + @property + def hive_partitioning(self): + """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + it configures hive partitioning support. + + .. note:: + **Experimental**. This feature is experimental and might change or + have limited support. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.hive_partitioning_options + """ + prop = self._get_sub_prop("hivePartitioningOptions") + if prop is None: + return None + return HivePartitioningOptions.from_api_repr(prop) + + @hive_partitioning.setter + def hive_partitioning(self, value): + if value is not None: + if isinstance(value, HivePartitioningOptions): + value = value.to_api_repr() + else: + raise TypeError("Expected a HivePartitioningOptions instance or None.") + + self._set_sub_prop("hivePartitioningOptions", value) + + @property + def ignore_unknown_values(self): + """Optional[bool]: Ignore extra values not represented in the table schema. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.ignore_unknown_values + """ + return self._get_sub_prop("ignoreUnknownValues") + + @ignore_unknown_values.setter + def ignore_unknown_values(self, value): + self._set_sub_prop("ignoreUnknownValues", value) + + @property + def max_bad_records(self): + """Optional[int]: Number of invalid rows to ignore. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.max_bad_records + """ + return _helpers._int_or_none(self._get_sub_prop("maxBadRecords")) + + @max_bad_records.setter + def max_bad_records(self, value): + self._set_sub_prop("maxBadRecords", value) + + @property + def null_marker(self): + """Optional[str]: Represents a null value (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_marker + """ + return self._get_sub_prop("nullMarker") + + @null_marker.setter + def null_marker(self, value): + self._set_sub_prop("nullMarker", value) + + @property + def quote_character(self): + """Optional[str]: Character used to quote data sections (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.quote + """ + return self._get_sub_prop("quote") + + @quote_character.setter + def quote_character(self, value): + self._set_sub_prop("quote", value) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def schema(self): + """Optional[Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ]]]: Schema of the destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.schema + """ + schema = _helpers._get_sub_prop(self._properties, ["load", "schema", "fields"]) + if schema is None: + return + return [SchemaField.from_api_repr(field) for field in schema] + + @schema.setter + def schema(self, value): + if value is None: + self._del_sub_prop("schema") + return + + value = _to_schema_fields(value) + + _helpers._set_sub_prop( + self._properties, + ["load", "schema", "fields"], + [field.to_api_repr() for field in value], + ) + + @property + def schema_update_options(self): + """Optional[List[google.cloud.bigquery.job.SchemaUpdateOption]]: Specifies + updates to the destination table schema to allow as a side effect of + the load job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + @property + def skip_leading_rows(self): + """Optional[int]: Number of rows to skip when reading data (CSV only). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.skip_leading_rows + """ + return _helpers._int_or_none(self._get_sub_prop("skipLeadingRows")) + + @skip_leading_rows.setter + def skip_leading_rows(self, value): + self._set_sub_prop("skipLeadingRows", str(value)) + + @property + def source_format(self): + """Optional[google.cloud.bigquery.job.SourceFormat]: File format of the data. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_format + """ + return self._get_sub_prop("sourceFormat") + + @source_format.setter + def source_format(self, value): + self._set_sub_prop("sourceFormat", value) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based + partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + else: + self._del_sub_prop("timePartitioning") + + @property + def use_avro_logical_types(self): + """Optional[bool]: For loads of Avro data, governs whether Avro logical types are + converted to their corresponding BigQuery types (e.g. TIMESTAMP) rather than + raw types (e.g. INTEGER). + """ + return self._get_sub_prop("useAvroLogicalTypes") + + @use_avro_logical_types.setter + def use_avro_logical_types(self, value): + self._set_sub_prop("useAvroLogicalTypes", bool(value)) + + @property + def write_disposition(self): + """Optional[google.cloud.bigquery.job.WriteDisposition]: Action that occurs if + the destination table already exists. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + +class LoadJob(_AsyncJob): + """Asynchronous job for loading data into a table. + + Can load from Google Cloud Storage URIs or from a file. + + Args: + job_id (str): the job's ID + + source_uris (Optional[Sequence[str]]): + URIs of one or more data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. Pass None for jobs that load from a file. + + destination (google.cloud.bigquery.table.TableReference): reference to table into which data is to be loaded. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + """ + + _JOB_TYPE = "load" + + def __init__(self, job_id, source_uris, destination, client, job_config=None): + super(LoadJob, self).__init__(job_id, client) + + if not job_config: + job_config = LoadJobConfig() + + self._configuration = job_config + self._properties["configuration"] = job_config._properties + + if source_uris is not None: + _helpers._set_sub_prop( + self._properties, ["configuration", "load", "sourceUris"], source_uris + ) + + if destination is not None: + _helpers._set_sub_prop( + self._properties, + ["configuration", "load", "destinationTable"], + destination.to_api_repr(), + ) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where loaded rows are written + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.destination_table + """ + dest_config = _helpers._get_sub_prop( + self._properties, ["configuration", "load", "destinationTable"] + ) + return TableReference.from_api_repr(dest_config) + + @property + def source_uris(self): + """Optional[Sequence[str]]: URIs of data files to be loaded. See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_uris + for supported URI formats. None for jobs that load from a file. + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "load", "sourceUris"] + ) + + @property + def allow_jagged_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`. + """ + return self._configuration.allow_jagged_rows + + @property + def allow_quoted_newlines(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`. + """ + return self._configuration.allow_quoted_newlines + + @property + def autodetect(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`. + """ + return self._configuration.autodetect + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def encoding(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`. + """ + return self._configuration.encoding + + @property + def field_delimiter(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`. + """ + return self._configuration.field_delimiter + + @property + def ignore_unknown_values(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`. + """ + return self._configuration.ignore_unknown_values + + @property + def max_bad_records(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`. + """ + return self._configuration.max_bad_records + + @property + def null_marker(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`. + """ + return self._configuration.null_marker + + @property + def quote_character(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`. + """ + return self._configuration.quote_character + + @property + def skip_leading_rows(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`. + """ + return self._configuration.skip_leading_rows + + @property + def source_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`. + """ + return self._configuration.source_format + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def schema(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema`. + """ + return self._configuration.schema + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) + or :data:`None` if using default encryption. + + See + :attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def destination_table_description(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description + """ + return self._configuration.destination_table_description + + @property + def destination_table_friendly_name(self): + """Optional[str] name given to destination table. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name + """ + return self._configuration.destination_table_friendly_name + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def use_avro_logical_types(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`. + """ + return self._configuration.use_avro_logical_types + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + @property + def input_file_bytes(self): + """Count of bytes loaded from source files. + + Returns: + Optional[int]: the count (None until set from the server). + + Raises: + ValueError: for invalid value types. + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFileBytes"] + ) + ) + + @property + def input_files(self): + """Count of source files. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "inputFiles"] + ) + ) + + @property + def output_bytes(self): + """Count of bytes saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputBytes"] + ) + ) + + @property + def output_rows(self): + """Count of rows saved to destination table. + + Returns: + Optional[int]: the count (None until set from the server). + """ + return _helpers._int_or_none( + _helpers._get_sub_prop( + self._properties, ["statistics", "load", "outputRows"] + ) + ) + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Exclude statistics, if set. + return { + "jobReference": self._properties["jobReference"], + "configuration": self._properties["configuration"], + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + .. note: + + This method assumes that the project found in the resource matches + the client's project. + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.LoadJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, None, client) + job._set_properties(resource) + return job diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py new file mode 100644 index 000000000..e25077360 --- /dev/null +++ b/google/cloud/bigquery/job/query.py @@ -0,0 +1,1644 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Classes for query jobs.""" + +import concurrent.futures +import copy +import re + +from google.api_core import exceptions +import requests +import six + +from google.cloud.bigquery.dataset import Dataset +from google.cloud.bigquery.dataset import DatasetListItem +from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import _helpers +from google.cloud.bigquery.query import _query_param_from_api_repr +from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import UDFResource +from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.table import _EmptyRowIterator +from google.cloud.bigquery.table import RangePartitioning +from google.cloud.bigquery.table import _table_arg_to_table_ref +from google.cloud.bigquery.table import Table +from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TimePartitioning + +from google.cloud.bigquery.job.base import _AsyncJob +from google.cloud.bigquery.job.base import _DONE_STATE +from google.cloud.bigquery.job.base import _JobConfig +from google.cloud.bigquery.job.base import _JobReference + + +_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) +_TIMEOUT_BUFFER_SECS = 0.1 + + +def _contains_order_by(query): + """Do we need to preserve the order of the query results? + + This function has known false positives, such as with ordered window + functions: + + .. code-block:: sql + + SELECT SUM(x) OVER ( + window_name + PARTITION BY... + ORDER BY... + window_frame_clause) + FROM ... + + This false positive failure case means the behavior will be correct, but + downloading results with the BigQuery Storage API may be slower than it + otherwise would. This is preferable to the false negative case, where + results are expected to be in order but are not (due to parallel reads). + """ + return query and _CONTAINS_ORDER_BY.search(query) + + +def _from_api_repr_query_parameters(resource): + return [_query_param_from_api_repr(mapping) for mapping in resource] + + +def _to_api_repr_query_parameters(value): + return [query_parameter.to_api_repr() for query_parameter in value] + + +def _from_api_repr_udf_resources(resource): + udf_resources = [] + for udf_mapping in resource: + for udf_type, udf_value in udf_mapping.items(): + udf_resources.append(UDFResource(udf_type, udf_value)) + return udf_resources + + +def _to_api_repr_udf_resources(value): + return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value] + + +def _from_api_repr_table_defs(resource): + return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()} + + +def _to_api_repr_table_defs(value): + return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} + + +class QueryJobConfig(_JobConfig): + """Configuration options for query jobs. + + All properties in this class are optional. Values which are :data:`None` -> + server defaults. Set properties on the constructed configuration by using + the property name as the name of a keyword argument. + """ + + def __init__(self, **kwargs): + super(QueryJobConfig, self).__init__("query", **kwargs) + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration + """ + prop = self._get_sub_prop("destinationEncryptionConfiguration") + if prop is not None: + prop = EncryptionConfiguration.from_api_repr(prop) + return prop + + @destination_encryption_configuration.setter + def destination_encryption_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + + @property + def allow_large_results(self): + """bool: Allow large query results tables (legacy SQL, only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results + """ + return self._get_sub_prop("allowLargeResults") + + @allow_large_results.setter + def allow_large_results(self, value): + self._set_sub_prop("allowLargeResults", value) + + @property + def create_disposition(self): + """google.cloud.bigquery.job.CreateDisposition: Specifies behavior + for creating tables. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition + """ + return self._get_sub_prop("createDisposition") + + @create_disposition.setter + def create_disposition(self, value): + self._set_sub_prop("createDisposition", value) + + @property + def default_dataset(self): + """google.cloud.bigquery.dataset.DatasetReference: the default dataset + to use for unqualified table names in the query or :data:`None` if not + set. + + The ``default_dataset`` setter accepts: + + - a :class:`~google.cloud.bigquery.dataset.Dataset`, or + - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or + - a :class:`str` of the fully-qualified dataset ID in standard SQL + format. The value must included a project ID and dataset ID + separated by ``.``. For example: ``your-project.your_dataset``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset + """ + prop = self._get_sub_prop("defaultDataset") + if prop is not None: + prop = DatasetReference.from_api_repr(prop) + return prop + + @default_dataset.setter + def default_dataset(self, value): + if value is None: + self._set_sub_prop("defaultDataset", None) + return + + if isinstance(value, six.string_types): + value = DatasetReference.from_string(value) + + if isinstance(value, (Dataset, DatasetListItem)): + value = value.reference + + resource = value.to_api_repr() + self._set_sub_prop("defaultDataset", resource) + + @property + def destination(self): + """google.cloud.bigquery.table.TableReference: table where results are + written or :data:`None` if not set. + + The ``destination`` setter accepts: + + - a :class:`~google.cloud.bigquery.table.Table`, or + - a :class:`~google.cloud.bigquery.table.TableReference`, or + - a :class:`str` of the fully-qualified table ID in standard SQL + format. The value must included a project ID, dataset ID, and table + ID, each separated by ``.``. For example: + ``your-project.your_dataset.your_table``. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table + """ + prop = self._get_sub_prop("destinationTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @destination.setter + def destination(self, value): + if value is None: + self._set_sub_prop("destinationTable", None) + return + + value = _table_arg_to_table_ref(value) + resource = value.to_api_repr() + self._set_sub_prop("destinationTable", resource) + + @property + def dry_run(self): + """bool: :data:`True` if this query should be a dry run to estimate + costs. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run + """ + return self._properties.get("dryRun") + + @dry_run.setter + def dry_run(self, value): + self._properties["dryRun"] = value + + @property + def flatten_results(self): + """bool: Flatten nested/repeated fields in results. (Legacy SQL only) + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results + """ + return self._get_sub_prop("flattenResults") + + @flatten_results.setter + def flatten_results(self, value): + self._set_sub_prop("flattenResults", value) + + @property + def maximum_billing_tier(self): + """int: Deprecated. Changes the billing tier to allow high-compute + queries. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier + """ + return self._get_sub_prop("maximumBillingTier") + + @maximum_billing_tier.setter + def maximum_billing_tier(self, value): + self._set_sub_prop("maximumBillingTier", value) + + @property + def maximum_bytes_billed(self): + """int: Maximum bytes to be billed for this job or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed + """ + return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled")) + + @maximum_bytes_billed.setter + def maximum_bytes_billed(self, value): + self._set_sub_prop("maximumBytesBilled", str(value)) + + @property + def priority(self): + """google.cloud.bigquery.job.QueryPriority: Priority of the query. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority + """ + return self._get_sub_prop("priority") + + @priority.setter + def priority(self, value): + self._set_sub_prop("priority", value) + + @property + def query_parameters(self): + """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter]]: list of parameters + for parameterized query (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters + """ + prop = self._get_sub_prop("queryParameters", default=[]) + return _from_api_repr_query_parameters(prop) + + @query_parameters.setter + def query_parameters(self, values): + self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values)) + + @property + def range_partitioning(self): + """Optional[google.cloud.bigquery.table.RangePartitioning]: + Configures range-based partitioning for destination table. + + .. note:: + **Beta**. The integer range partitioning feature is in a + pre-release state and might change or have limited support. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.RangePartitioning` or + :data:`None`. + """ + resource = self._get_sub_prop("rangePartitioning") + if resource is not None: + return RangePartitioning(_properties=resource) + + @range_partitioning.setter + def range_partitioning(self, value): + resource = value + if isinstance(value, RangePartitioning): + resource = value._properties + elif value is not None: + raise ValueError( + "Expected value to be RangePartitioning or None, got {}.".format(value) + ) + self._set_sub_prop("rangePartitioning", resource) + + @property + def udf_resources(self): + """List[google.cloud.bigquery.query.UDFResource]: user + defined function resources (empty by default) + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources + """ + prop = self._get_sub_prop("userDefinedFunctionResources", default=[]) + return _from_api_repr_udf_resources(prop) + + @udf_resources.setter + def udf_resources(self, values): + self._set_sub_prop( + "userDefinedFunctionResources", _to_api_repr_udf_resources(values) + ) + + @property + def use_legacy_sql(self): + """bool: Use legacy SQL syntax. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql + """ + return self._get_sub_prop("useLegacySql") + + @use_legacy_sql.setter + def use_legacy_sql(self, value): + self._set_sub_prop("useLegacySql", value) + + @property + def use_query_cache(self): + """bool: Look for the query result in the cache. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache + """ + return self._get_sub_prop("useQueryCache") + + @use_query_cache.setter + def use_query_cache(self, value): + self._set_sub_prop("useQueryCache", value) + + @property + def write_disposition(self): + """google.cloud.bigquery.job.WriteDisposition: Action that occurs if + the destination table already exists. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition + """ + return self._get_sub_prop("writeDisposition") + + @write_disposition.setter + def write_disposition(self, value): + self._set_sub_prop("writeDisposition", value) + + @property + def table_definitions(self): + """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: + Definitions for external tables or :data:`None` if not set. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions + """ + prop = self._get_sub_prop("tableDefinitions") + if prop is not None: + prop = _from_api_repr_table_defs(prop) + return prop + + @table_definitions.setter + def table_definitions(self, values): + self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values)) + + @property + def time_partitioning(self): + """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies + time-based partitioning for the destination table. + + Only specify at most one of + :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or + :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.table.TimePartitioning` or + :data:`None`. + """ + prop = self._get_sub_prop("timePartitioning") + if prop is not None: + prop = TimePartitioning.from_api_repr(prop) + return prop + + @time_partitioning.setter + def time_partitioning(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._set_sub_prop("timePartitioning", api_repr) + + @property + def clustering_fields(self): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + + Clustering fields are immutable after table creation. + + .. note:: + + BigQuery supports clustering for both partitioned and + non-partitioned tables. + """ + prop = self._get_sub_prop("clustering") + if prop is not None: + return list(prop.get("fields", ())) + + @clustering_fields.setter + def clustering_fields(self, value): + """Optional[List[str]]: Fields defining clustering for the table + + (Defaults to :data:`None`). + """ + if value is not None: + self._set_sub_prop("clustering", {"fields": value}) + else: + self._del_sub_prop("clustering") + + @property + def schema_update_options(self): + """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies + updates to the destination table schema to allow as a side effect of + the query job. + """ + return self._get_sub_prop("schemaUpdateOptions") + + @schema_update_options.setter + def schema_update_options(self, values): + self._set_sub_prop("schemaUpdateOptions", values) + + def to_api_repr(self): + """Build an API representation of the query job config. + + Returns: + Dict: A dictionary in the format used by the BigQuery API. + """ + resource = copy.deepcopy(self._properties) + + # Query parameters have an addition property associated with them + # to indicate if the query is using named or positional parameters. + query_parameters = resource["query"].get("queryParameters") + if query_parameters: + if query_parameters[0].get("name") is None: + resource["query"]["parameterMode"] = "POSITIONAL" + else: + resource["query"]["parameterMode"] = "NAMED" + + return resource + + +class QueryJob(_AsyncJob): + """Asynchronous job: query tables. + + Args: + job_id (str): the job's ID, within the project belonging to ``client``. + + query (str): SQL query string. + + client (google.cloud.bigquery.client.Client): + A client which holds credentials and project configuration + for the dataset (which requires a project). + + job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): + Extra configuration options for the query job. + """ + + _JOB_TYPE = "query" + _UDF_KEY = "userDefinedFunctionResources" + + def __init__(self, job_id, query, client, job_config=None): + super(QueryJob, self).__init__(job_id, client) + + if job_config is None: + job_config = QueryJobConfig() + if job_config.use_legacy_sql is None: + job_config.use_legacy_sql = False + + self._properties["configuration"] = job_config._properties + self._configuration = job_config + + if query: + _helpers._set_sub_prop( + self._properties, ["configuration", "query", "query"], query + ) + + self._query_results = None + self._done_timeout = None + self._transport_timeout = None + + @property + def allow_large_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`. + """ + return self._configuration.allow_large_results + + @property + def create_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`. + """ + return self._configuration.create_disposition + + @property + def default_dataset(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`. + """ + return self._configuration.default_dataset + + @property + def destination(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`. + """ + return self._configuration.destination + + @property + def destination_encryption_configuration(self): + """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom + encryption configuration for the destination table. + + Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` + if using default encryption. + + See + :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`. + """ + return self._configuration.destination_encryption_configuration + + @property + def dry_run(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`. + """ + return self._configuration.dry_run + + @property + def flatten_results(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`. + """ + return self._configuration.flatten_results + + @property + def priority(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`. + """ + return self._configuration.priority + + @property + def query(self): + """str: The query text used in this query job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query + """ + return _helpers._get_sub_prop( + self._properties, ["configuration", "query", "query"] + ) + + @property + def query_parameters(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`. + """ + return self._configuration.query_parameters + + @property + def udf_resources(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`. + """ + return self._configuration.udf_resources + + @property + def use_legacy_sql(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`. + """ + return self._configuration.use_legacy_sql + + @property + def use_query_cache(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`. + """ + return self._configuration.use_query_cache + + @property + def write_disposition(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`. + """ + return self._configuration.write_disposition + + @property + def maximum_billing_tier(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`. + """ + return self._configuration.maximum_billing_tier + + @property + def maximum_bytes_billed(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`. + """ + return self._configuration.maximum_bytes_billed + + @property + def range_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`. + """ + return self._configuration.range_partitioning + + @property + def table_definitions(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`. + """ + return self._configuration.table_definitions + + @property + def time_partitioning(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`. + """ + return self._configuration.time_partitioning + + @property + def clustering_fields(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`. + """ + return self._configuration.clustering_fields + + @property + def schema_update_options(self): + """See + :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`. + """ + return self._configuration.schema_update_options + + def to_api_repr(self): + """Generate a resource for :meth:`_begin`.""" + # Use to_api_repr to allow for some configuration properties to be set + # automatically. + configuration = self._configuration.to_api_repr() + return { + "jobReference": self._properties["jobReference"], + "configuration": configuration, + } + + @classmethod + def from_api_repr(cls, resource, client): + """Factory: construct a job given its API representation + + Args: + resource (Dict): dataset job representation returned from the API + + client (google.cloud.bigquery.client.Client): + Client which holds credentials and project + configuration for the dataset. + + Returns: + google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``. + """ + cls._check_resource_config(resource) + job_ref = _JobReference._from_api_repr(resource["jobReference"]) + job = cls(job_ref, None, client=client) + job._set_properties(resource) + return job + + @property + def query_plan(self): + """Return query plan from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan + + Returns: + List[google.cloud.bigquery.job.QueryPlanEntry]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + plan_entries = self._job_statistics().get("queryPlan", ()) + return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries] + + @property + def timeline(self): + """List(TimelineEntry): Return the query execution timeline + from job statistics. + """ + raw = self._job_statistics().get("timeline", ()) + return [TimelineEntry.from_api_repr(entry) for entry in raw] + + @property + def total_bytes_processed(self): + """Return total bytes processed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesProcessed") + if result is not None: + result = int(result) + return result + + @property + def total_bytes_billed(self): + """Return total bytes billed from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed + + Returns: + Optional[int]: + Total bytes processed by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("totalBytesBilled") + if result is not None: + result = int(result) + return result + + @property + def billing_tier(self): + """Return billing tier from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier + + Returns: + Optional[int]: + Billing tier used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("billingTier") + + @property + def cache_hit(self): + """Return whether or not query results were served from cache. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit + + Returns: + Optional[bool]: + whether the query results were returned from cache, or None + if job is not yet complete. + """ + return self._job_statistics().get("cacheHit") + + @property + def ddl_operation_performed(self): + """Optional[str]: Return the DDL operation performed. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed + + """ + return self._job_statistics().get("ddlOperationPerformed") + + @property + def ddl_target_routine(self): + """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present + for CREATE/DROP FUNCTION/PROCEDURE queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine + """ + prop = self._job_statistics().get("ddlTargetRoutine") + if prop is not None: + prop = RoutineReference.from_api_repr(prop) + return prop + + @property + def ddl_target_table(self): + """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present + for CREATE/DROP TABLE/VIEW queries. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table + """ + prop = self._job_statistics().get("ddlTargetTable") + if prop is not None: + prop = TableReference.from_api_repr(prop) + return prop + + @property + def num_dml_affected_rows(self): + """Return the number of DML rows affected by the job. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("numDmlAffectedRows") + if result is not None: + result = int(result) + return result + + @property + def slot_millis(self): + """Union[int, None]: Slot-milliseconds used by this query job.""" + return _helpers._int_or_none(self._job_statistics().get("totalSlotMs")) + + @property + def statement_type(self): + """Return statement type from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type + + Returns: + Optional[str]: + type of statement used by the job, or None if job is not + yet complete. + """ + return self._job_statistics().get("statementType") + + @property + def referenced_tables(self): + """Return referenced tables from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables + + Returns: + List[Dict]: + mappings describing the query plan, or an empty list + if the query has not yet completed. + """ + tables = [] + datasets_by_project_name = {} + + for table in self._job_statistics().get("referencedTables", ()): + + t_project = table["projectId"] + + ds_id = table["datasetId"] + t_dataset = datasets_by_project_name.get((t_project, ds_id)) + if t_dataset is None: + t_dataset = DatasetReference(t_project, ds_id) + datasets_by_project_name[(t_project, ds_id)] = t_dataset + + t_name = table["tableId"] + tables.append(t_dataset.table(t_name)) + + return tables + + @property + def undeclared_query_parameters(self): + """Return undeclared query parameters from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters + + Returns: + List[Union[ \ + google.cloud.bigquery.query.ArrayQueryParameter, \ + google.cloud.bigquery.query.ScalarQueryParameter, \ + google.cloud.bigquery.query.StructQueryParameter \ + ]]: + Undeclared parameters, or an empty list if the query has + not yet completed. + """ + parameters = [] + undeclared = self._job_statistics().get("undeclaredQueryParameters", ()) + + for parameter in undeclared: + p_type = parameter["parameterType"] + + if "arrayType" in p_type: + klass = ArrayQueryParameter + elif "structTypes" in p_type: + klass = StructQueryParameter + else: + klass = ScalarQueryParameter + + parameters.append(klass.from_api_repr(parameter)) + + return parameters + + @property + def estimated_bytes_processed(self): + """Return the estimated number of bytes processed by the query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed + + Returns: + Optional[int]: + number of DML rows affected by the job, or None if job is not + yet complete. + """ + result = self._job_statistics().get("estimatedBytesProcessed") + if result is not None: + result = int(result) + return result + + def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + """Refresh the job and checks if it is complete. + + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + reload (Optional[bool]): + If ``True``, make an API call to refresh the job state of + unfinished jobs before checking. Default ``True``. + + Returns: + bool: True if the job is complete, False otherwise. + """ + is_done = ( + # Only consider a QueryJob complete when we know we have the final + # query results available. + self._query_results is not None + and self._query_results.complete + and self.state == _DONE_STATE + ) + # Do not refresh if the state is already done, as the job will not + # change once complete. + if not reload or is_done: + return is_done + + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(api_timeout * 1000) + + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if self._query_results.complete and self.state != _DONE_STATE: + self.reload(retry=retry, timeout=transport_timeout) + + return self.state == _DONE_STATE + + def _blocking_poll(self, timeout=None, **kwargs): + self._done_timeout = timeout + self._transport_timeout = timeout + super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs) + + @staticmethod + def _format_for_exception(query, job_id): + """Format a query for the output in exception message. + + Args: + query (str): The SQL query to format. + job_id (str): The ID of the job that ran the query. + + Returns: + str: A formatted query text. + """ + template = "\n\n(job ID: {job_id})\n\n{header}\n\n{ruler}\n{body}\n{ruler}" + + lines = query.splitlines() + max_line_len = max(len(line) for line in lines) + + header = "-----Query Job SQL Follows-----" + header = "{:^{total_width}}".format(header, total_width=max_line_len + 5) + + # Print out a "ruler" above and below the SQL so we can judge columns. + # Left pad for the line numbers (4 digits plus ":"). + ruler = " |" + " . |" * (max_line_len // 10) + + # Put line numbers next to the SQL. + body = "\n".join( + "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1) + ) + + return template.format(job_id=job_id, header=header, ruler=ruler, body=body) + + def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): + """API call: begin the job via a POST request + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + client (Optional[google.cloud.bigquery.client.Client]): + The client to use. If not passed, falls back to the ``client`` + associated with the job object or``NoneType``. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + + Raises: + ValueError: If the job has already begun. + """ + + try: + super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + + def result( + self, + page_size=None, + max_results=None, + retry=DEFAULT_RETRY, + timeout=None, + start_index=None, + ): + """Start the job and wait for it to complete and get the result. + + Args: + page_size (Optional[int]): + The maximum number of rows in each page of results from this + request. Non-positive values are ignored. + max_results (Optional[int]): + The maximum total number of rows from this request. + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves rows. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + start_index (Optional[int]): + The zero-based index of the starting row to read. + + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. During each + page, the iterator will have the ``total_rows`` attribute + set, which counts the total number of rows **in the result + set** (this is distinct from the total number of rows in the + current page: ``iterator.page.num_items``). + + If the query is a special query that produces no results, e.g. + a DDL query, an ``_EmptyRowIterator`` instance is returned. + + Raises: + google.cloud.exceptions.GoogleAPICallError: + If the job failed. + concurrent.futures.TimeoutError: + If the job did not complete in the given timeout. + """ + try: + super(QueryJob, self).result(retry=retry, timeout=timeout) + except exceptions.GoogleAPICallError as exc: + exc.message += self._format_for_exception(self.query, self.job_id) + exc.query_job = self + raise + except requests.exceptions.Timeout as exc: + six.raise_from(concurrent.futures.TimeoutError, exc) + + # If the query job is complete but there are no query results, this was + # special job, such as a DDL query. Return an empty result set to + # indicate success and avoid calling tabledata.list on a table which + # can't be read (such as a view table). + if self._query_results.total_rows is None: + return _EmptyRowIterator() + + schema = self._query_results.schema + dest_table_ref = self.destination + dest_table = Table(dest_table_ref, schema=schema) + dest_table._properties["numRows"] = self._query_results.total_rows + rows = self._client.list_rows( + dest_table, + page_size=page_size, + max_results=max_results, + start_index=start_index, + retry=retry, + timeout=timeout, + ) + rows._preserve_order = _contains_order_by(self.query) + return rows + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_arrow() + def to_arrow( + self, + progress_bar_type=None, + bqstorage_client=None, + create_bqstorage_client=True, + ): + """[Beta] Create a class:`pyarrow.Table` by loading all pages of a + table or query. + + Args: + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This API + is a billable API. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + Returns: + pyarrow.Table + A :class:`pyarrow.Table` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: + If the :mod:`pyarrow` library cannot be imported. + + ..versionadded:: 1.17.0 + """ + return self.result().to_arrow( + progress_bar_type=progress_bar_type, + bqstorage_client=bqstorage_client, + create_bqstorage_client=create_bqstorage_client, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe() + def to_dataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + ): + """Return a pandas DataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + ..versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + ..versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + ..versionadded:: 1.26.0 + + Returns: + A :class:`~pandas.DataFrame` populated with row data and column + headers from the query results. The column headers are derived + from the destination table's schema. + + Raises: + ValueError: If the `pandas` library cannot be imported. + """ + return self.result().to_dataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + ) + + def __iter__(self): + return iter(self.result()) + + +class QueryPlanEntryStep(object): + """Map a single step in a query plan entry. + + Args: + kind (str): step type. + substeps (List): names of substeps. + """ + + def __init__(self, kind, substeps): + self.kind = kind + self.substeps = list(substeps) + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource (Dict): JSON representation of the entry. + + Returns: + google.cloud.bigquery.job.QueryPlanEntryStep: + New instance built from the resource. + """ + return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ())) + + def __eq__(self, other): + if not isinstance(other, self.__class__): + return NotImplemented + return self.kind == other.kind and self.substeps == other.substeps + + +class QueryPlanEntry(object): + """QueryPlanEntry represents a single stage of a query execution plan. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + ExplainQueryStage representation returned from API. + + Returns: + google.cloud.bigquery.job.QueryPlanEntry: + Query plan entry parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def name(self): + """Optional[str]: Human-readable name of the stage.""" + return self._properties.get("name") + + @property + def entry_id(self): + """Optional[str]: Unique ID for the stage within the plan.""" + return self._properties.get("id") + + @property + def start(self): + """Optional[Datetime]: Datetime when the stage started.""" + if self._properties.get("startMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("startMs")) * 1000.0 + ) + + @property + def end(self): + """Optional[Datetime]: Datetime when the stage ended.""" + if self._properties.get("endMs") is None: + return None + return _helpers._datetime_from_microseconds( + int(self._properties.get("endMs")) * 1000.0 + ) + + @property + def input_stages(self): + """List(int): Entry IDs for stages that were inputs for this stage.""" + if self._properties.get("inputStages") is None: + return [] + return [ + _helpers._int_or_none(entry) + for entry in self._properties.get("inputStages") + ] + + @property + def parallel_inputs(self): + """Optional[int]: Number of parallel input segments within + the stage. + """ + return _helpers._int_or_none(self._properties.get("parallelInputs")) + + @property + def completed_parallel_inputs(self): + """Optional[int]: Number of parallel input segments completed.""" + return _helpers._int_or_none(self._properties.get("completedParallelInputs")) + + @property + def wait_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsAvg")) + + @property + def wait_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent waiting to + be scheduled. + """ + return _helpers._int_or_none(self._properties.get("waitMsMax")) + + @property + def wait_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioAvg") + + @property + def wait_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent waiting + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("waitRatioMax") + + @property + def read_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsAvg")) + + @property + def read_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent reading + input. + """ + return _helpers._int_or_none(self._properties.get("readMsMax")) + + @property + def read_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent reading + input, relative to the longest time spent by any worker in any stage + of the overall plan. + """ + return self._properties.get("readRatioAvg") + + @property + def read_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent reading + to be scheduled, relative to the longest time spent by any worker in + any stage of the overall plan. + """ + return self._properties.get("readRatioMax") + + @property + def compute_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsAvg")) + + @property + def compute_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent on CPU-bound + processing. + """ + return _helpers._int_or_none(self._properties.get("computeMsMax")) + + @property + def compute_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioAvg") + + @property + def compute_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent on + CPU-bound processing, relative to the longest time spent by any + worker in any stage of the overall plan. + """ + return self._properties.get("computeRatioMax") + + @property + def write_ms_avg(self): + """Optional[int]: Milliseconds the average worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsAvg")) + + @property + def write_ms_max(self): + """Optional[int]: Milliseconds the slowest worker spent writing + output data. + """ + return _helpers._int_or_none(self._properties.get("writeMsMax")) + + @property + def write_ratio_avg(self): + """Optional[float]: Ratio of time the average worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioAvg") + + @property + def write_ratio_max(self): + """Optional[float]: Ratio of time the slowest worker spent writing + output data, relative to the longest time spent by any worker in any + stage of the overall plan. + """ + return self._properties.get("writeRatioMax") + + @property + def records_read(self): + """Optional[int]: Number of records read by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsRead")) + + @property + def records_written(self): + """Optional[int]: Number of records written by this stage.""" + return _helpers._int_or_none(self._properties.get("recordsWritten")) + + @property + def status(self): + """Optional[str]: status of this stage.""" + return self._properties.get("status") + + @property + def shuffle_output_bytes(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytes")) + + @property + def shuffle_output_bytes_spilled(self): + """Optional[int]: Number of bytes written by this stage to + intermediate shuffle and spilled to disk. + """ + return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled")) + + @property + def steps(self): + """List(QueryPlanEntryStep): List of step operations performed by + each worker in the stage. + """ + return [ + QueryPlanEntryStep.from_api_repr(step) + for step in self._properties.get("steps", []) + ] + + +class TimelineEntry(object): + """TimelineEntry represents progress of a query job at a particular + point in time. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample + for the underlying API representation within query statistics. + """ + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + QueryTimelineSample representation returned from API. + + Returns: + google.cloud.bigquery.TimelineEntry: + Timeline sample parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def elapsed_ms(self): + """Optional[int]: Milliseconds elapsed since start of query + execution.""" + return _helpers._int_or_none(self._properties.get("elapsedMs")) + + @property + def active_units(self): + """Optional[int]: Current number of input units being processed + by workers, reported as largest value since the last sample.""" + return _helpers._int_or_none(self._properties.get("activeUnits")) + + @property + def pending_units(self): + """Optional[int]: Current number of input units remaining for + query stages active at this sample time.""" + return _helpers._int_or_none(self._properties.get("pendingUnits")) + + @property + def completed_units(self): + """Optional[int]: Current number of input units completed by + this query.""" + return _helpers._int_or_none(self._properties.get("completedUnits")) + + @property + def slot_millis(self): + """Optional[int]: Cumulative slot-milliseconds consumed by + this query.""" + return _helpers._int_or_none(self._properties.get("totalSlotMs")) diff --git a/tests/unit/job/__init__.py b/tests/unit/job/__init__.py new file mode 100644 index 000000000..c6334245a --- /dev/null +++ b/tests/unit/job/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py new file mode 100644 index 000000000..f928054f6 --- /dev/null +++ b/tests/unit/job/helpers.py @@ -0,0 +1,198 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import mock +from google.api_core import exceptions + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(project="test-project", connection=None): + from google.cloud.bigquery.client import Client + + if connection is None: + connection = _make_connection() + + client = Client(project=project, credentials=_make_credentials(), _http=object()) + client._connection = connection + return client + + +def _make_connection(*responses): + import google.cloud.bigquery._http + from google.cloud.exceptions import NotFound + + mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) + mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + return mock_conn + + +def _make_retriable_exception(): + return exceptions.TooManyRequests( + "retriable exception", errors=[{"reason": "rateLimitExceeded"}] + ) + + +def _make_job_resource( + creation_time_ms=1437767599006, + started_time_ms=1437767600007, + ended_time_ms=1437767601008, + started=False, + ended=False, + etag="abc-def-hjk", + endpoint="https://bigquery.googleapis.com", + job_type="load", + job_id="a-random-id", + project_id="some-project", + user_email="bq-user@example.com", +): + resource = { + "status": {"state": "PENDING"}, + "configuration": {job_type: {}}, + "statistics": {"creationTime": creation_time_ms, job_type: {}}, + "etag": etag, + "id": "{}:{}".format(project_id, job_id), + "jobReference": {"projectId": project_id, "jobId": job_id}, + "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( + endpoint, project_id, job_id + ), + "user_email": user_email, + } + + if started or ended: + resource["statistics"]["startTime"] = started_time_ms + resource["status"]["state"] = "RUNNING" + + if ended: + resource["statistics"]["endTime"] = ended_time_ms + resource["status"]["state"] = "DONE" + + if job_type == "query": + resource["configuration"]["query"]["destinationTable"] = { + "projectId": project_id, + "datasetId": "_temp_dataset", + "tableId": "_temp_table", + } + + return resource + + +class _Base(unittest.TestCase): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.table import TableReference + + ENDPOINT = "https://bigquery.googleapis.com" + PROJECT = "project" + SOURCE1 = "http://example.com/source1.csv" + DS_ID = "dataset_id" + DS_REF = DatasetReference(PROJECT, DS_ID) + TABLE_ID = "table_id" + TABLE_REF = TableReference(DS_REF, TABLE_ID) + JOB_ID = "JOB_ID" + JOB_TYPE = "unknown" + KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def _setUpConstants(self): + import datetime + from google.cloud._helpers import UTC + + self.WHEN_TS = 1437767599.006 + self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.ETAG = "ETAG" + self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( + self.ENDPOINT, self.PROJECT, self.JOB_ID + ) + self.USER_EMAIL = "phred@example.com" + + def _table_ref(self, table_id): + from google.cloud.bigquery.table import TableReference + + return TableReference(self.DS_REF, table_id) + + def _make_resource(self, started=False, ended=False): + self._setUpConstants() + return _make_job_resource( + creation_time_ms=int(self.WHEN_TS * 1000), + started_time_ms=int(self.WHEN_TS * 1000), + ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, + started=started, + ended=ended, + etag=self.ETAG, + endpoint=self.ENDPOINT, + job_type=self.JOB_TYPE, + job_id=self.JOB_ID, + project_id=self.PROJECT, + user_email=self.USER_EMAIL, + ) + + def _verifyInitialReadonlyProperties(self, job): + # root elements of resource + self.assertIsNone(job.etag) + self.assertIsNone(job.self_link) + self.assertIsNone(job.user_email) + + # derived from resource['statistics'] + self.assertIsNone(job.created) + self.assertIsNone(job.started) + self.assertIsNone(job.ended) + + # derived from resource['status'] + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + def _verifyReadonlyResourceProperties(self, job, resource): + from datetime import timedelta + + statistics = resource.get("statistics", {}) + + if "creationTime" in statistics: + self.assertEqual(job.created, self.WHEN) + else: + self.assertIsNone(job.created) + + if "startTime" in statistics: + self.assertEqual(job.started, self.WHEN) + else: + self.assertIsNone(job.started) + + if "endTime" in statistics: + self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) + else: + self.assertIsNone(job.ended) + + if "etag" in resource: + self.assertEqual(job.etag, self.ETAG) + else: + self.assertIsNone(job.etag) + + if "selfLink" in resource: + self.assertEqual(job.self_link, self.RESOURCE_URL) + else: + self.assertIsNone(job.self_link) + + if "user_email" in resource: + self.assertEqual(job.user_email, self.USER_EMAIL) + else: + self.assertIsNone(job.user_email) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py new file mode 100644 index 000000000..90d4388b8 --- /dev/null +++ b/tests/unit/job/test_base.py @@ -0,0 +1,1105 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import unittest + +from google.api_core import exceptions +import google.api_core.retry +import mock +from six.moves import http_client + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_retriable_exception +from .helpers import _make_job_resource + + +class Test__error_result_to_exception(unittest.TestCase): + def _call_fut(self, *args, **kwargs): + from google.cloud.bigquery import job + + return job._error_result_to_exception(*args, **kwargs) + + def test_simple(self): + error_result = {"reason": "invalid", "message": "bad request"} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertTrue(exception.message.startswith("bad request")) + self.assertIn(error_result, exception.errors) + + def test_missing_reason(self): + error_result = {} + exception = self._call_fut(error_result) + self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + + +class Test_JobReference(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobReference + + def _make_one(self, job_id, project, location): + return self._get_target_class()(job_id, project, location) + + def test_ctor(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + def test__to_api_repr(self): + job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) + + self.assertEqual( + job_ref._to_api_repr(), + { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + }, + ) + + def test_from_api_repr(self): + api_repr = { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": self.LOCATION, + } + + job_ref = self._get_target_class()._from_api_repr(api_repr) + + self.assertEqual(job_ref.job_id, self.JOB_ID) + self.assertEqual(job_ref.project, self.PROJECT) + self.assertEqual(job_ref.location, self.LOCATION) + + +class Test_AsyncJob(unittest.TestCase): + JOB_ID = "job-id" + PROJECT = "test-project-123" + LOCATION = "us-central" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._AsyncJob + + def _make_one(self, job_id, client): + return self._get_target_class()(job_id, client) + + def _make_derived_class(self): + class Derived(self._get_target_class()): + _JOB_TYPE = "derived" + + return Derived + + def _make_derived(self, job_id, client): + return self._make_derived_class()(job_id, client) + + @staticmethod + def _job_reference(job_id, project, location): + from google.cloud.bigquery import job + + return job._JobReference(job_id, project, location) + + def test_ctor_w_bare_job_id(self): + import threading + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertIsNone(job.location) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, + ) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test_ctor_w_job_ref(self): + import threading + + other_project = "other-project-234" + client = _make_client(project=other_project) + job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) + job = self._make_one(job_ref, client) + + self.assertEqual(job.job_id, self.JOB_ID) + self.assertEqual(job.project, self.PROJECT) + self.assertEqual(job.location, self.LOCATION) + self.assertIs(job._client, client) + self.assertEqual( + job._properties, + { + "jobReference": { + "projectId": self.PROJECT, + "location": self.LOCATION, + "jobId": self.JOB_ID, + } + }, + ) + self.assertFalse(job._result_set) + self.assertIsInstance(job._completion_lock, type(threading.Lock())) + self.assertEqual( + job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + ) + + def test__require_client_w_none(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(None), client) + + def test__require_client_w_other(self): + client = _make_client(project=self.PROJECT) + other = object() + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job._require_client(other), other) + + def test_job_type(self): + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + + self.assertEqual(derived.job_type, "derived") + + def test_parent_job_id(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.parent_job_id) + job._properties["statistics"] = {"parentJobId": "parent-job-123"} + self.assertEqual(job.parent_job_id, "parent-job-123") + + def test_script_statistics(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertIsNone(job.script_statistics) + job._properties["statistics"] = { + "scriptStatistics": { + "evaluationKind": "EXPRESSION", + "stackFrames": [ + { + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + } + ], + } + } + script_stats = job.script_statistics + self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") + stack_frames = script_stats.stack_frames + self.assertEqual(len(stack_frames), 1) + stack_frame = stack_frames[0] + self.assertIsNone(stack_frame.procedure_id) + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + + def test_num_child_jobs(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertEqual(job.num_child_jobs, 0) + job._properties["statistics"] = {"numChildJobs": "17"} + self.assertEqual(job.num_child_jobs, 17) + + def test_labels_miss(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertEqual(job.labels, {}) + + def test_labels_update_in_place(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + labels = job.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["labels"] = labels + self.assertEqual(job.labels, labels) + + def test_etag(self): + etag = "ETAG-123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.etag) + job._properties["etag"] = etag + self.assertEqual(job.etag, etag) + + def test_self_link(self): + self_link = "https://api.example.com/123" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.self_link) + job._properties["selfLink"] = self_link + self.assertEqual(job.self_link, self_link) + + def test_user_email(self): + user_email = "user@example.com" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.user_email) + job._properties["user_email"] = user_email + self.assertEqual(job.user_email, user_email) + + @staticmethod + def _datetime_and_millis(): + import datetime + import pytz + from google.cloud._helpers import _millis + + now = datetime.datetime.utcnow().replace( + microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + ) + return now, _millis(now) + + def test_created(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.created) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.created) + stats["creationTime"] = millis + self.assertEqual(job.created, now) + + def test_started(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.started) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.started) + stats["startTime"] = millis + self.assertEqual(job.started, now) + + def test_ended(self): + now, millis = self._datetime_and_millis() + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.ended) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.ended) + stats["endTime"] = millis + self.assertEqual(job.ended, now) + + def test__job_statistics(self): + statistics = {"foo": "bar"} + client = _make_client(project=self.PROJECT) + derived = self._make_derived(self.JOB_ID, client) + self.assertEqual(derived._job_statistics(), {}) + stats = derived._properties["statistics"] = {} + self.assertEqual(derived._job_statistics(), {}) + stats["derived"] = statistics + self.assertEqual(derived._job_statistics(), statistics) + + def test_error_result(self): + error_result = { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.error_result) + status = job._properties["status"] = {} + self.assertIsNone(job.error_result) + status["errorResult"] = error_result + self.assertEqual(job.error_result, error_result) + + def test_errors(self): + errors = [ + { + "debugInfo": "DEBUG INFO", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.errors) + status = job._properties["status"] = {} + self.assertIsNone(job.errors) + status["errors"] = errors + self.assertEqual(job.errors, errors) + + def test_state(self): + state = "STATE" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.state) + status = job._properties["status"] = {} + self.assertIsNone(job.state) + status["state"] = state + self.assertEqual(job.state, state) + + def _set_properties_job(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._set_future_result = mock.Mock() + job._properties = { + "jobReference": job._properties["jobReference"], + "foo": "bar", + } + return job + + def test__set_properties_no_stats(self): + config = {"test": True} + resource = {"configuration": config} + job = self._set_properties_job() + + job._set_properties(resource) + + self.assertEqual(job._properties, resource) + + def test__set_properties_w_creation_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"creationTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["creationTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_start_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"startTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["startTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__set_properties_w_end_time(self): + now, millis = self._datetime_and_millis() + config = {"test": True} + stats = {"endTime": str(millis)} + resource = {"configuration": config, "statistics": stats} + job = self._set_properties_job() + + job._set_properties(resource) + + cleaned = copy.deepcopy(resource) + cleaned["statistics"]["endTime"] = float(millis) + self.assertEqual(job._properties, cleaned) + + def test__check_resource_config_missing_job_ref(self): + resource = {} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_job_id(self): + resource = {"jobReference": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_configuration(self): + resource = {"jobReference": {"jobId": self.JOB_ID}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_missing_config_type(self): + resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} + klass = self._make_derived_class() + + with self.assertRaises(KeyError): + klass._check_resource_config(resource) + + def test__check_resource_config_ok(self): + derived_config = {"foo": "bar"} + resource = { + "jobReference": {"jobId": self.JOB_ID}, + "configuration": {"derived": derived_config}, + } + klass = self._make_derived_class() + + # Should not throw. + klass._check_resource_config(resource) + + def test__build_resource(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job._build_resource() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test_to_api_repr(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + resource = job.to_api_repr() + assert resource["jobReference"]["jobId"] == self.JOB_ID + + def test__begin_already(self): + job = self._set_properties_job() + job._properties["status"] = {"state": "WHATEVER"} + + with self.assertRaises(ValueError): + job._begin() + + def test__begin_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test__begin_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + builder = job.to_api_repr = mock.Mock() + builder.return_value = resource + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + path = "/projects/{}/jobs".format(self.PROJECT) + job._begin(client=client, retry=retry, timeout=7.5) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.begin", + span_attributes={"path": path}, + job_ref=job, + method="POST", + path=path, + data=resource, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_exists_defaults_miss(self): + from google.cloud.exceptions import NotFound + from google.cloud.bigquery.retry import DEFAULT_RETRY + + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.side_effect = NotFound("testing") + self.assertFalse(job.exists()) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id", "location": self.LOCATION}, + timeout=None, + ) + + def test_exists_explicit_hit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + self.assertTrue(job.exists(client=client, retry=retry)) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.exists", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"fields": "id"}, + timeout=None, + ) + + def test_exists_w_timeout(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + job = self._set_properties_job() + call_api = job._client._call_api = mock.Mock() + job.exists(timeout=7.5) + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.exists", + span_attributes={"path": PATH}, + job_ref=job, + method="GET", + path=PATH, + query_params={"fields": "id"}, + timeout=7.5, + ) + + def test_reload_defaults(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + call_api = job._client._call_api = mock.Mock() + call_api.return_value = resource + job.reload() + + call_api.assert_called_once_with( + DEFAULT_RETRY, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_reload_explicit(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + job = self._set_properties_job() + client = _make_client(project=other_project) + call_api = client._call_api = mock.Mock() + call_api.return_value = resource + retry = DEFAULT_RETRY.with_deadline(1) + job.reload(client=client, retry=retry, timeout=4.2) + + call_api.assert_called_once_with( + retry, + span_name="BigQuery.job.reload", + span_attributes={ + "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) + }, + job_ref=job, + method="GET", + path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=4.2, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_defaults(self): + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + job._properties["jobReference"]["location"] = self.LOCATION + connection = job._client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel()) + + final_attributes.assert_called() + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={"location": self.LOCATION}, + timeout=None, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_explicit(self): + other_project = "other-project-234" + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + client = _make_client(project=other_project) + connection = client._connection = _make_connection(response) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.cancel(client=client, timeout=7.5)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + connection.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), + query_params={}, + timeout=7.5, + ) + self.assertEqual(job._properties, resource) + + def test_cancel_w_custom_retry(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) + resource = { + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + response = {"job": resource} + job = self._set_properties_job() + + api_request_patcher = mock.patch.object( + job._client._connection, "api_request", side_effect=[ValueError, response] + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + self.assertTrue(result) + self.assertEqual(job._properties, resource) + self.assertEqual( + fake_api_request.call_args_list, + [ + mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), + mock.call( + method="POST", path=api_path, query_params={}, timeout=7.5 + ), # was retried once + ], + ) + + def test__set_future_result_wo_done(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_result_set(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + job._result_set = True + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_w_error(self): + from google.cloud.exceptions import NotFound + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "notFound", "message": "testing"}, + } + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_called_once() + args, kw = set_exception.call_args + (exception,) = args + self.assertIsInstance(exception, NotFound) + self.assertEqual(exception.message, "testing") + self.assertEqual(kw, {}) + set_result.assert_not_called() + + def test__set_future_result_w_done_wo_result_set_wo_error(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + set_exception = job.set_exception = mock.Mock() + set_result = job.set_result = mock.Mock() + + job._set_future_result() + + set_exception.assert_not_called() + set_result.assert_called_once_with(job) + + def test_done_defaults_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + + self.assertFalse(job.done()) + + reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) + + def test_done_explicit_wo_state(self): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + reload_ = job.reload = mock.Mock() + retry = DEFAULT_RETRY.with_deadline(1) + + self.assertFalse(job.done(retry=retry, timeout=7.5)) + + reload_.assert_called_once_with(retry=retry, timeout=7.5) + + def test_done_already(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"state": "DONE"} + + self.assertTrue(job.done()) + + def test_result_default_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + _make_retriable_exception(), + begun_job_resource, + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + + self.assertIs(job.result(), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_w_retry_wo_state(self): + begun_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True + ) + done_job_resource = _make_job_resource( + job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + ) + conn = _make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + # The call to done() / reload() does not get the custom retry + # policy passed to it, so we don't throw a non-retriable + # exception here. See: + # https://github.com/googleapis/python-bigquery/issues/24 + _make_retriable_exception(), + done_job_resource, + ) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + self.assertIs(job.result(retry=custom_retry), job) + + begin_call = mock.call( + method="POST", + path=f"/projects/{self.PROJECT}/jobs", + data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) + + def test_result_explicit_w_state(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, client) + # Use _set_properties() instead of directly modifying _properties so + # that the result state is set properly. + job_resource = job._properties + job_resource["status"] = {"state": "DONE"} + job._set_properties(job_resource) + timeout = 1 + + self.assertIs(job.result(timeout=timeout), job) + + conn.api_request.assert_not_called() + + def test_cancelled_wo_error_result(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_not_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "other"}} + + self.assertFalse(job.cancelled()) + + def test_cancelled_w_error_result_w_stopped(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["status"] = {"errorResult": {"reason": "stopped"}} + + self.assertTrue(job.cancelled()) + + +class Test_JobConfig(unittest.TestCase): + JOB_TYPE = "testing" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery import job + + return job._JobConfig + + def _make_one(self, job_type=JOB_TYPE): + return self._get_target_class()(job_type) + + def test_ctor(self): + job_config = self._make_one() + self.assertEqual(job_config._job_type, self.JOB_TYPE) + self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + + def test_fill_from_default(self): + from google.cloud.bigquery import QueryJobConfig + + job_config = QueryJobConfig() + job_config.dry_run = True + job_config.maximum_bytes_billed = 1000 + + default_job_config = QueryJobConfig() + default_job_config.use_query_cache = True + default_job_config.maximum_bytes_billed = 2000 + + final_job_config = job_config._fill_from_default(default_job_config) + self.assertTrue(final_job_config.dry_run) + self.assertTrue(final_job_config.use_query_cache) + self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + + def test_fill_from_default_conflict(self): + from google.cloud.bigquery import QueryJobConfig + + basic_job_config = QueryJobConfig() + conflicting_job_config = self._make_one("conflicting_job_type") + self.assertNotEqual( + basic_job_config._job_type, conflicting_job_config._job_type + ) + + with self.assertRaises(TypeError): + basic_job_config._fill_from_default(conflicting_job_config) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_wo_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=None + ) + + @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") + def test__get_sub_prop_w_default(self, _get_sub_prop): + job_config = self._make_one() + key = "key" + default = "default" + self.assertIs( + job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value + ) + _get_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], default=default + ) + + @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") + def test__set_sub_prop(self, _set_sub_prop): + job_config = self._make_one() + key = "key" + value = "value" + job_config._set_sub_prop(key, value) + _set_sub_prop.assert_called_once_with( + job_config._properties, [self.JOB_TYPE, key], value + ) + + def test_to_api_repr(self): + job_config = self._make_one() + expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} + found = job_config.to_api_repr() + self.assertEqual(found, expected) + self.assertIsNot(found, expected) # copied + + # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes + # the ctor can be called w/o arguments + + def test_labels_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.labels, {}) + + def test_labels_update_in_place(self): + job_config = self._make_one() + labels = job_config.labels + labels["foo"] = "bar" # update in place + self.assertEqual(job_config.labels, {"foo": "bar"}) + + def test_labels_hit(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config._properties["labels"] = labels + self.assertEqual(job_config.labels, labels) + + def test_labels_setter_invalid(self): + labels = object() + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.labels = labels + + def test_labels_setter(self): + labels = {"foo": "bar"} + job_config = self._make_one() + job_config.labels = labels + self.assertEqual(job_config._properties["labels"], labels) diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py new file mode 100644 index 000000000..fb0c87391 --- /dev/null +++ b/tests/unit/job/test_copy.py @@ -0,0 +1,477 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestCopyJobConfig(_Base): + JOB_TYPE = "copy" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJobConfig + + return CopyJobConfig + + def test_ctor_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + create_disposition = CreateDisposition.CREATE_NEVER + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()( + create_disposition=create_disposition, write_disposition=write_disposition + ) + + self.assertEqual(config.create_disposition, create_disposition) + self.assertEqual(config.write_disposition, write_disposition) + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "copy": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"copy": {"destinationEncryptionConfiguration": None}} + ) + + +class TestCopyJob(_Base): + JOB_TYPE = "copy" + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import CopyJob + + return CopyJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestCopyJob, self)._make_resource(started, ended) + config = resource["configuration"]["copy"] + config["sourceTables"] = [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("copy") + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + sources = config.get("sourceTables") + if sources is None: + sources = [config["sourceTable"]] + self.assertEqual(len(sources), len(job.sources)) + for table_ref, table in zip(sources, job.sources): + self.assertEqual(table.project, table_ref["projectId"]) + self.assertEqual(table.dataset_id, table_ref["datasetId"]) + self.assertEqual(table.table_id, table_ref["tableId"]) + + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + self.assertEqual(job.destination, destination) + self.assertEqual(job.sources, [source]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['copy'] + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_sourcetable(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_wo_sources(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + with self.assertRaises(KeyError): + _ = job.sources + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + copy_config = RESOURCE["configuration"]["copy"] + copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "copy": { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CopyJobConfig + + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + COPY_CONFIGURATION = { + "sourceTables": [ + { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + ], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_NEVER, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + config = CopyJobConfig() + config.create_disposition = CreateDisposition.CREATE_NEVER + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job = self._make_one(self.JOB_ID, [source], destination, client1, config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"copy": COPY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source = self._table_ref(self.SOURCE_TABLE) + destination = self._table_ref(self.DESTINATION_TABLE) + job = self._make_one(self.JOB_ID, [source], destination, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py new file mode 100644 index 000000000..4c9411d0d --- /dev/null +++ b/tests/unit/job/test_extract.py @@ -0,0 +1,437 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestExtractJobConfig(_Base): + JOB_TYPE = "extract" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJobConfig + + return ExtractJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()(field_delimiter="\t", print_header=True) + + self.assertEqual(config.field_delimiter, "\t") + self.assertTrue(config.print_header) + + def test_to_api_repr(self): + from google.cloud.bigquery import job + + config = self._make_one() + config.compression = job.Compression.SNAPPY + config.destination_format = job.DestinationFormat.AVRO + config.field_delimiter = "ignored for avro" + config.print_header = False + config._properties["extract"]["someNewField"] = "some-value" + config.use_avro_logical_types = True + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "extract": { + "compression": "SNAPPY", + "destinationFormat": "AVRO", + "fieldDelimiter": "ignored for avro", + "printHeader": False, + "someNewField": "some-value", + "useAvroLogicalTypes": True, + } + }, + ) + + def test_from_api_repr(self): + cls = self._get_target_class() + config = cls.from_api_repr( + { + "extract": { + "compression": "NONE", + "destinationFormat": "CSV", + "fieldDelimiter": "\t", + "printHeader": True, + "someNewField": "some-value", + "useAvroLogicalTypes": False, + } + } + ) + self.assertEqual(config.compression, "NONE") + self.assertEqual(config.destination_format, "CSV") + self.assertEqual(config.field_delimiter, "\t") + self.assertEqual(config.print_header, True) + self.assertEqual(config._properties["extract"]["someNewField"], "some-value") + self.assertEqual(config.use_avro_logical_types, False) + + +class TestExtractJob(_Base): + JOB_TYPE = "extract" + SOURCE_TABLE = "source_table" + DESTINATION_URI = "gs://bucket_name/object_name" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import ExtractJob + + return ExtractJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestExtractJob, self)._make_resource(started, ended) + config = resource["configuration"]["extract"] + config["sourceTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + } + config["destinationUris"] = [self.DESTINATION_URI] + return resource + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("extract") + + self.assertEqual(job.destination_uris, config["destinationUris"]) + + if "sourceTable" in config: + table_ref = config["sourceTable"] + self.assertEqual(job.source.project, table_ref["projectId"]) + self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.source.table_id, table_ref["tableId"]) + else: + model_ref = config["sourceModel"] + self.assertEqual(job.source.project, model_ref["projectId"]) + self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) + self.assertEqual(job.source.model_id, model_ref["modelId"]) + + if "compression" in config: + self.assertEqual(job.compression, config["compression"]) + else: + self.assertIsNone(job.compression) + + if "destinationFormat" in config: + self.assertEqual(job.destination_format, config["destinationFormat"]) + else: + self.assertIsNone(job.destination_format) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + + if "printHeader" in config: + self.assertEqual(job.print_header, config["printHeader"]) + else: + self.assertIsNone(job.print_header) + + def test_ctor(self): + from google.cloud.bigquery.table import Table + + client = _make_client(project=self.PROJECT) + source = Table(self.TABLE_REF) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + self.assertEqual(job.source.project, self.PROJECT) + self.assertEqual(job.source.dataset_id, self.DS_ID) + self.assertEqual(job.source.table_id, self.TABLE_ID) + self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # set/read from resource['configuration']['extract'] + self.assertIsNone(job.compression) + self.assertIsNone(job.destination_format) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.print_header) + + def test_destination_uri_file_counts(self): + file_counts = 23 + client = _make_client(project=self.PROJECT) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + self.assertIsNone(job.destination_uri_file_counts) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats = statistics["extract"] = {} + self.assertIsNone(job.destination_uri_file_counts) + + extract_stats["destinationUriFileCounts"] = [str(file_counts)] + self.assertEqual(job.destination_uri_file_counts, [file_counts]) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_for_model(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceModel": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": "model_id", + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import Compression + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + extract_config = RESOURCE["configuration"]["extract"] + extract_config["compression"] = Compression.GZIP + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "extract": { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import Compression + from google.cloud.bigquery.job import DestinationFormat + from google.cloud.bigquery.job import ExtractJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + EXTRACT_CONFIGURATION = { + "sourceTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.SOURCE_TABLE, + }, + "destinationUris": [self.DESTINATION_URI], + "compression": Compression.GZIP, + "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, + "fieldDelimiter": "|", + "printHeader": False, + } + RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + config = ExtractJobConfig() + config.compression = Compression.GZIP + config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON + config.field_delimiter = "|" + config.print_header = False + job = self._make_one( + self.JOB_ID, source, [self.DESTINATION_URI], client1, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"extract": EXTRACT_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one( + self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + source_dataset = DatasetReference(self.PROJECT, self.DS_ID) + source = source_dataset.table(self.SOURCE_TABLE) + job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py new file mode 100644 index 000000000..70e7860a7 --- /dev/null +++ b/tests/unit/job/test_load.py @@ -0,0 +1,838 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import mock + +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestLoadJob(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJob + + return LoadJob + + def _setUpConstants(self): + super(TestLoadJob, self)._setUpConstants() + self.INPUT_FILES = 2 + self.INPUT_BYTES = 12345 + self.OUTPUT_BYTES = 23456 + self.OUTPUT_ROWS = 345 + + def _make_resource(self, started=False, ended=False): + resource = super(TestLoadJob, self)._make_resource(started, ended) + config = resource["configuration"]["load"] + config["sourceUris"] = [self.SOURCE1] + config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + + if ended: + resource["status"] = {"state": "DONE"} + resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES + resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES + resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES + resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS + + return resource + + def _verifyBooleanConfigProperties(self, job, config): + if "allowJaggedRows" in config: + self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) + else: + self.assertIsNone(job.allow_jagged_rows) + if "allowQuotedNewlines" in config: + self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) + else: + self.assertIsNone(job.allow_quoted_newlines) + if "autodetect" in config: + self.assertEqual(job.autodetect, config["autodetect"]) + else: + self.assertIsNone(job.autodetect) + if "ignoreUnknownValues" in config: + self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) + else: + self.assertIsNone(job.ignore_unknown_values) + if "useAvroLogicalTypes" in config: + self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) + else: + self.assertIsNone(job.use_avro_logical_types) + + def _verifyEnumConfigProperties(self, job, config): + if "createDisposition" in config: + self.assertEqual(job.create_disposition, config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "encoding" in config: + self.assertEqual(job.encoding, config["encoding"]) + else: + self.assertIsNone(job.encoding) + if "sourceFormat" in config: + self.assertEqual(job.source_format, config["sourceFormat"]) + else: + self.assertIsNone(job.source_format) + if "writeDisposition" in config: + self.assertEqual(job.write_disposition, config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "schemaUpdateOptions" in config: + self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) + else: + self.assertIsNone(job.schema_update_options) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + config = resource.get("configuration", {}).get("load") + + self._verifyBooleanConfigProperties(job, config) + self._verifyEnumConfigProperties(job, config) + + self.assertEqual(job.source_uris, config["sourceUris"]) + + table_ref = config["destinationTable"] + self.assertEqual(job.destination.project, table_ref["projectId"]) + self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) + self.assertEqual(job.destination.table_id, table_ref["tableId"]) + + if "fieldDelimiter" in config: + self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) + else: + self.assertIsNone(job.field_delimiter) + if "maxBadRecords" in config: + self.assertEqual(job.max_bad_records, config["maxBadRecords"]) + else: + self.assertIsNone(job.max_bad_records) + if "nullMarker" in config: + self.assertEqual(job.null_marker, config["nullMarker"]) + else: + self.assertIsNone(job.null_marker) + if "quote" in config: + self.assertEqual(job.quote_character, config["quote"]) + else: + self.assertIsNone(job.quote_character) + if "skipLeadingRows" in config: + self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) + else: + self.assertIsNone(job.skip_leading_rows) + + if "destinationEncryptionConfiguration" in config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + + def test_ctor(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(job.destination, self.TABLE_REF) + self.assertEqual(list(job.source_uris), [self.SOURCE1]) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + # derived from resource['statistics']['load'] + self.assertIsNone(job.input_file_bytes) + self.assertIsNone(job.input_files) + self.assertIsNone(job.output_bytes) + self.assertIsNone(job.output_rows) + + # set/read from resource['configuration']['load'] + self.assertIsNone(job.schema) + self.assertIsNone(job.allow_jagged_rows) + self.assertIsNone(job.allow_quoted_newlines) + self.assertIsNone(job.autodetect) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.encoding) + self.assertIsNone(job.field_delimiter) + self.assertIsNone(job.ignore_unknown_values) + self.assertIsNone(job.max_bad_records) + self.assertIsNone(job.null_marker) + self.assertIsNone(job.quote_character) + self.assertIsNone(job.skip_leading_rows) + self.assertIsNone(job.source_format) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.destination_table_description) + self.assertIsNone(job.destination_table_friendly_name) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.use_avro_logical_types) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_config(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.job import LoadJobConfig + + client = _make_client(project=self.PROJECT) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + self.assertEqual(job.schema, [full_name, age]) + config.destination_table_description = "Description" + expected = {"description": "Description"} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + friendly_name = "Friendly Name" + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_ctor_w_job_reference(self): + from google.cloud.bigquery import job + + client = _make_client(project=self.PROJECT) + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + self.assertEqual(load_job.project, "alternative-project") + self.assertEqual(load_job.location, "US") + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + self.assertTrue(job.done()) + + def test_result(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIs(result, job) + + def test_result_invokes_begin(self): + begun_resource = self._make_resource() + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, done_resource) + client = _make_client(self.PROJECT) + client._connection = connection + + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 2) + begin_request, reload_request = connection.api_request.call_args_list + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_schema_setter_non_list(self): + from google.cloud.bigquery.job import LoadJobConfig + + config = LoadJobConfig() + with self.assertRaises(TypeError): + config.schema = object() + + def test_schema_setter_invalid_field(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + with self.assertRaises(ValueError): + config.schema = [full_name, object()] + + def test_schema_setter(self): + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.schema import SchemaField + + config = LoadJobConfig() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + self.assertEqual(config.schema, [full_name, age]) + + def test_props_set_by_server(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + + CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) + STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) + ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) + FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) + URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + EMAIL = "phred@example.com" + ERROR_RESULT = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "REASON", + } + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["etag"] = "ETAG" + job._properties["id"] = FULL_JOB_ID + job._properties["selfLink"] = URL + job._properties["user_email"] = EMAIL + + statistics = job._properties["statistics"] = {} + statistics["creationTime"] = _millis(CREATED) + statistics["startTime"] = _millis(STARTED) + statistics["endTime"] = _millis(ENDED) + + self.assertEqual(job.etag, "ETAG") + self.assertEqual(job.self_link, URL) + self.assertEqual(job.user_email, EMAIL) + + self.assertEqual(job.created, CREATED) + self.assertEqual(job.started, STARTED) + self.assertEqual(job.ended, ENDED) + + # running jobs have no load stats not yet set. + self.assertIsNone(job.output_bytes) + + load_stats = statistics["load"] = {} + load_stats["inputFileBytes"] = 12345 + load_stats["inputFiles"] = 1 + load_stats["outputBytes"] = 23456 + load_stats["outputRows"] = 345 + + self.assertEqual(job.input_file_bytes, 12345) + self.assertEqual(job.input_files, 1) + self.assertEqual(job.output_bytes, 23456) + self.assertEqual(job.output_rows, 345) + + status = job._properties["status"] = {} + + self.assertIsNone(job.error_result) + self.assertIsNone(job.errors) + self.assertIsNone(job.state) + + status["errorResult"] = ERROR_RESULT + status["errors"] = [ERROR_RESULT] + status["state"] = "STATE" + + self.assertEqual(job.error_result, ERROR_RESULT) + self.assertEqual(job.errors, [ERROR_RESULT]) + self.assertEqual(job.state, "STATE") + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.JOB_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.FULL_JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + load_config = RESOURCE["configuration"]["load"] + load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_already_running(self): + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + job._properties["status"] = {"state": "RUNNING"} + + with self.assertRaises(ValueError): + job._begin() + + def test_begin_w_bound_client(self): + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + path = "/projects/{}/jobs".format(self.PROJECT) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_autodetect(self): + from google.cloud.bigquery.job import LoadJobConfig + + path = "/projects/{}/jobs".format(self.PROJECT) + resource = self._make_resource() + resource["configuration"]["load"]["autodetect"] = True + # Ensure None for missing server-set props + del resource["statistics"]["creationTime"] + del resource["etag"] + del resource["selfLink"] + del resource["user_email"] + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + config = LoadJobConfig() + config.autodetect = True + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": path}, client, job) + + sent = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "load": { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "autodetect": True, + } + }, + } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=sent, timeout=None + ) + self._verifyResourceProperties(job, resource) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + from google.cloud.bigquery.schema import SchemaField + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource(ended=True) + LOAD_CONFIGURATION = { + "sourceUris": [self.SOURCE1], + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "allowJaggedRows": True, + "allowQuotedNewlines": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "encoding": "ISO-8559-1", + "fieldDelimiter": "|", + "ignoreUnknownValues": True, + "maxBadRecords": 100, + "nullMarker": r"\N", + "quote": "'", + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "useAvroLogicalTypes": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + }, + ] + }, + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config = LoadJobConfig() + config.schema = [full_name, age] + job = self._make_one( + self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config + ) + config.allow_jagged_rows = True + config.allow_quoted_newlines = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "ISO-8559-1" + config.field_delimiter = "|" + config.ignore_unknown_values = True + config.max_bad_records = 100 + config.null_marker = r"\N" + config.quote_character = "'" + config.skip_leading_rows = 1 + config.source_format = "CSV" + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + self.assertEqual(len(conn2.api_request.call_args_list), 1) + req = conn2.api_request.call_args_list[0] + self.assertEqual(req[1]["method"], "POST") + self.assertEqual(req[1]["path"], PATH) + SENT = { + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"load": LOAD_CONFIGURATION}, + } + self.maxDiff = None + self.assertEqual(req[1]["data"], SENT) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource() + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job._begin() + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs"}, client, load_job + ) + + conn.api_request.assert_called_once() + _, request = conn.api_request.call_args + self.assertEqual(request["method"], "POST") + self.assertEqual(request["path"], "/projects/alternative-project/jobs") + self.assertEqual( + request["data"]["jobReference"]["projectId"], "alternative-project" + ) + self.assertEqual(request["data"]["jobReference"]["location"], "US") + self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client, + job, + ) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with( + {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, + client2, + job, + ) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_miss_w_job_reference(self): + from google.cloud.bigquery import job + + job_ref = job._JobReference("my-job-id", "other-project", "US") + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(load_job.exists()) + + final_attributes.assert_called_with( + {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs/my-job-id", + query_params={"fields": "id", "location": "US"}, + timeout=None, + ) + + def test_reload_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource() + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection(resource) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.reload() + + final_attributes.assert_called_with( + {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, + client, + load_job, + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) + + def test_cancel_w_bound_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn = _make_connection(RESPONSE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) + RESOURCE = self._make_resource(ended=True) + RESPONSE = {"job": RESOURCE} + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESPONSE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.cancel(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancel_w_job_reference(self): + from google.cloud.bigquery import job + + resource = self._make_resource(ended=True) + resource["jobReference"]["projectId"] = "alternative-project" + resource["jobReference"]["location"] = "US" + job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") + conn = _make_connection({"job": resource}) + client = _make_client(project=self.PROJECT, connection=conn) + load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + load_job.cancel() + + final_attributes.assert_called_with( + { + "path": "/projects/alternative-project/jobs/{}/cancel".format( + self.JOB_ID + ) + }, + client, + load_job, + ) + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), + query_params={"location": "US"}, + timeout=None, + ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py new file mode 100644 index 000000000..c18f51bff --- /dev/null +++ b/tests/unit/job/test_load_config.py @@ -0,0 +1,710 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import warnings + +import pytest + +from .helpers import _Base + + +class TestLoadJobConfig(_Base): + JOB_TYPE = "load" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import LoadJobConfig + + return LoadJobConfig + + def test_ctor_w_properties(self): + config = self._get_target_class()( + allow_jagged_rows=True, allow_quoted_newlines=True + ) + + self.assertTrue(config.allow_jagged_rows) + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_jagged_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_jagged_rows) + + def test_allow_jagged_rows_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowJaggedRows"] = True + self.assertTrue(config.allow_jagged_rows) + + def test_allow_jagged_rows_setter(self): + config = self._get_target_class()() + config.allow_jagged_rows = True + self.assertTrue(config._properties["load"]["allowJaggedRows"]) + + def test_allow_quoted_newlines_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_hit(self): + config = self._get_target_class()() + config._properties["load"]["allowQuotedNewlines"] = True + self.assertTrue(config.allow_quoted_newlines) + + def test_allow_quoted_newlines_setter(self): + config = self._get_target_class()() + config.allow_quoted_newlines = True + self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) + + def test_autodetect_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.autodetect) + + def test_autodetect_hit(self): + config = self._get_target_class()() + config._properties["load"]["autodetect"] = True + self.assertTrue(config.autodetect) + + def test_autodetect_setter(self): + config = self._get_target_class()() + config.autodetect = True + self.assertTrue(config._properties["load"]["autodetect"]) + + def test_clustering_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.clustering_fields) + + def test_clustering_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + self.assertEqual(config.clustering_fields, fields) + + def test_clustering_fields_setter(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) + + def test_clustering_fields_setter_w_none(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config._properties["load"]["clustering"] = {"fields": fields} + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + self.assertNotIn("clustering", config._properties["load"]) + + def test_create_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.create_disposition) + + def test_create_disposition_hit(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config._properties["load"]["createDisposition"] = disposition + self.assertEqual(config.create_disposition, disposition) + + def test_create_disposition_setter(self): + from google.cloud.bigquery.job import CreateDisposition + + disposition = CreateDisposition.CREATE_IF_NEEDED + config = self._get_target_class()() + config.create_disposition = disposition + self.assertEqual(config._properties["load"]["createDisposition"], disposition) + + def test_destination_encryption_configuration_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_encryption_configuration) + + def test_destination_encryption_configuration_hit(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + self.assertEqual( + config.destination_encryption_configuration, encryption_configuration + ) + + def test_destination_encryption_configuration_setter(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + kms_key_name = "kms-key-name" + encryption_configuration = EncryptionConfiguration(kms_key_name) + config = self._get_target_class()() + config.destination_encryption_configuration = encryption_configuration + expected = {"kmsKeyName": kms_key_name} + self.assertEqual( + config._properties["load"]["destinationEncryptionConfiguration"], expected + ) + + def test_destination_encryption_configuration_setter_w_none(self): + kms_key_name = "kms-key-name" + config = self._get_target_class()() + config._properties["load"]["destinationEncryptionConfiguration"] = { + "kmsKeyName": kms_key_name + } + config.destination_encryption_configuration = None + self.assertIsNone(config.destination_encryption_configuration) + self.assertNotIn( + "destinationEncryptionConfiguration", config._properties["load"] + ) + + def test_destination_table_description_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_description) + + def test_destination_table_description_hit(self): + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + self.assertEqual(config.destination_table_description, description) + + def test_destination_table_description_setter(self): + description = "Description" + config = self._get_target_class()() + config.destination_table_description = description + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_setter_w_fn_already(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + config.destination_table_description = description + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_description_w_none(self): + description = "Description" + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_description = None + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.destination_table_friendly_name) + + def test_destination_table_friendly_name_hit(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "friendlyName": friendly_name + } + self.assertEqual(config.destination_table_friendly_name, friendly_name) + + def test_destination_table_friendly_name_setter(self): + friendly_name = "Friendly Name" + config = self._get_target_class()() + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_setter_w_descr_already(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description + } + config.destination_table_friendly_name = friendly_name + expected = {"friendlyName": friendly_name, "description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_destination_table_friendly_name_w_none(self): + friendly_name = "Friendly Name" + description = "Description" + config = self._get_target_class()() + config._properties["load"]["destinationTableProperties"] = { + "description": description, + "friendlyName": friendly_name, + } + config.destination_table_friendly_name = None + expected = {"description": description} + self.assertEqual( + config._properties["load"]["destinationTableProperties"], expected + ) + + def test_encoding_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.encoding) + + def test_encoding_hit(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config._properties["load"]["encoding"] = encoding + self.assertEqual(config.encoding, encoding) + + def test_encoding_setter(self): + from google.cloud.bigquery.job import Encoding + + encoding = Encoding.UTF_8 + config = self._get_target_class()() + config.encoding = encoding + self.assertEqual(config._properties["load"]["encoding"], encoding) + + def test_field_delimiter_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.field_delimiter) + + def test_field_delimiter_hit(self): + field_delimiter = "|" + config = self._get_target_class()() + config._properties["load"]["fieldDelimiter"] = field_delimiter + self.assertEqual(config.field_delimiter, field_delimiter) + + def test_field_delimiter_setter(self): + field_delimiter = "|" + config = self._get_target_class()() + config.field_delimiter = field_delimiter + self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) + + def test_hive_partitioning_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.hive_partitioning) + + def test_hive_partitioning_hit(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + config = self._get_target_class()() + config._properties["load"]["hivePartitioningOptions"] = { + "sourceUriPrefix": "http://foo/bar", + "mode": "STRINGS", + } + result = config.hive_partitioning + self.assertIsInstance(result, HivePartitioningOptions) + self.assertEqual(result.source_uri_prefix, "http://foo/bar") + self.assertEqual(result.mode, "STRINGS") + + def test_hive_partitioning_setter(self): + from google.cloud.bigquery.external_config import HivePartitioningOptions + + hive_partitioning = HivePartitioningOptions() + hive_partitioning.source_uri_prefix = "http://foo/bar" + hive_partitioning.mode = "AUTO" + + config = self._get_target_class()() + config.hive_partitioning = hive_partitioning + self.assertEqual( + config._properties["load"]["hivePartitioningOptions"], + {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, + ) + + config.hive_partitioning = None + self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) + + def test_hive_partitioning_invalid_type(self): + config = self._get_target_class()() + + with self.assertRaises(TypeError): + config.hive_partitioning = {"mode": "AUTO"} + + def test_ignore_unknown_values_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.ignore_unknown_values) + + def test_ignore_unknown_values_hit(self): + config = self._get_target_class()() + config._properties["load"]["ignoreUnknownValues"] = True + self.assertTrue(config.ignore_unknown_values) + + def test_ignore_unknown_values_setter(self): + config = self._get_target_class()() + config.ignore_unknown_values = True + self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) + + def test_max_bad_records_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.max_bad_records) + + def test_max_bad_records_hit(self): + max_bad_records = 13 + config = self._get_target_class()() + config._properties["load"]["maxBadRecords"] = max_bad_records + self.assertEqual(config.max_bad_records, max_bad_records) + + def test_max_bad_records_setter(self): + max_bad_records = 13 + config = self._get_target_class()() + config.max_bad_records = max_bad_records + self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) + + def test_null_marker_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_marker) + + def test_null_marker_hit(self): + null_marker = "XXX" + config = self._get_target_class()() + config._properties["load"]["nullMarker"] = null_marker + self.assertEqual(config.null_marker, null_marker) + + def test_null_marker_setter(self): + null_marker = "XXX" + config = self._get_target_class()() + config.null_marker = null_marker + self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + + def test_quote_character_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.quote_character) + + def test_quote_character_hit(self): + quote_character = "'" + config = self._get_target_class()() + config._properties["load"]["quote"] = quote_character + self.assertEqual(config.quote_character, quote_character) + + def test_quote_character_setter(self): + quote_character = "'" + config = self._get_target_class()() + config.quote_character = quote_character + self.assertEqual(config._properties["load"]["quote"], quote_character) + + def test_schema_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema) + + def test_schema_hit(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + all_props_repr = { + "mode": "REQUIRED", + "name": "foo", + "type": "INTEGER", + "description": "Foo", + } + minimal_repr = {"name": "bar", "type": "STRING"} + config._properties["load"]["schema"] = { + "fields": [all_props_repr, minimal_repr] + } + all_props, minimal = config.schema + self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) + self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) + + def test_schema_setter_fields(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + full_name = SchemaField("full_name", "STRING", mode="REQUIRED") + age = SchemaField("age", "INTEGER", mode="REQUIRED") + config.schema = [full_name, age] + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_valid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + config.schema = schema + + full_name_repr = { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + } + age_repr = { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } + self.assertEqual( + config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} + ) + + def test_schema_setter_invalid_mappings_list(self): + config = self._get_target_class()() + + schema = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + ] + + with self.assertRaises(Exception): + config.schema = schema + + def test_schema_setter_unsetting_schema(self): + from google.cloud.bigquery.schema import SchemaField + + config = self._get_target_class()() + config._properties["load"]["schema"] = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + + config.schema = None + self.assertNotIn("schema", config._properties["load"]) + config.schema = None # no error, idempotent operation + + def test_schema_update_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.schema_update_options) + + def test_schema_update_options_hit(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config._properties["load"]["schemaUpdateOptions"] = options + self.assertEqual(config.schema_update_options, options) + + def test_schema_update_options_setter(self): + from google.cloud.bigquery.job import SchemaUpdateOption + + options = [ + SchemaUpdateOption.ALLOW_FIELD_ADDITION, + SchemaUpdateOption.ALLOW_FIELD_RELAXATION, + ] + config = self._get_target_class()() + config.schema_update_options = options + self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) + + def test_skip_leading_rows_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.skip_leading_rows) + + def test_skip_leading_rows_hit_w_str(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_hit_w_integer(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config._properties["load"]["skipLeadingRows"] = skip_leading_rows + self.assertEqual(config.skip_leading_rows, skip_leading_rows) + + def test_skip_leading_rows_setter(self): + skip_leading_rows = 1 + config = self._get_target_class()() + config.skip_leading_rows = skip_leading_rows + self.assertEqual( + config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) + ) + + def test_source_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_format) + + def test_source_format_hit(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config._properties["load"]["sourceFormat"] = source_format + self.assertEqual(config.source_format, source_format) + + def test_source_format_setter(self): + from google.cloud.bigquery.job import SourceFormat + + source_format = SourceFormat.CSV + config = self._get_target_class()() + config.source_format = source_format + self.assertEqual(config._properties["load"]["sourceFormat"], source_format) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["load"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.time_partitioning) + + def test_time_partitioning_hit(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + with warnings.catch_warnings(record=True) as warned: + expected = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + self.assertEqual(config.time_partitioning, expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter(self): + from google.cloud.bigquery.table import TimePartitioning + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + + with warnings.catch_warnings(record=True) as warned: + time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, + field=field, + expiration_ms=year_ms, + require_partition_filter=False, + ) + + config = self._get_target_class()() + config.time_partitioning = time_partitioning + expected = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + self.assertEqual(config._properties["load"]["timePartitioning"], expected) + + assert len(warned) == 1 + warning = warned[0] + assert "TimePartitioning.require_partition_filter" in str(warning) + + def test_time_partitioning_setter_w_none(self): + from google.cloud.bigquery.table import TimePartitioningType + + field = "creation_date" + year_ms = 86400 * 1000 * 365 + config = self._get_target_class()() + config._properties["load"]["timePartitioning"] = { + "type": TimePartitioningType.DAY, + "field": field, + "expirationMs": str(year_ms), + "requirePartitionFilter": False, + } + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + self.assertNotIn("timePartitioning", config._properties["load"]) + + def test_use_avro_logical_types(self): + config = self._get_target_class()() + self.assertIsNone(config.use_avro_logical_types) + + def test_use_avro_logical_types_setter(self): + config = self._get_target_class()() + config.use_avro_logical_types = True + self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) + + def test_write_disposition_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.write_disposition) + + def test_write_disposition_hit(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config._properties["load"]["writeDisposition"] = write_disposition + self.assertEqual(config.write_disposition, write_disposition) + + def test_write_disposition_setter(self): + from google.cloud.bigquery.job import WriteDisposition + + write_disposition = WriteDisposition.WRITE_TRUNCATE + config = self._get_target_class()() + config.write_disposition = write_disposition + self.assertEqual( + config._properties["load"]["writeDisposition"], write_disposition + ) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py new file mode 100644 index 000000000..c0b90d8ea --- /dev/null +++ b/tests/unit/job/test_query.py @@ -0,0 +1,1811 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent +import copy +import textwrap + +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import mock +import requests +from six.moves import http_client + +import google.cloud.bigquery.query +from .helpers import _Base +from .helpers import _make_client +from .helpers import _make_connection + + +class TestQueryJob(_Base): + JOB_TYPE = "query" + QUERY = "select count(*) from persons" + DESTINATION_TABLE = "destination_table" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJob + + return QueryJob + + def _make_resource(self, started=False, ended=False): + resource = super(TestQueryJob, self)._make_resource(started, ended) + config = resource["configuration"]["query"] + config["query"] = self.QUERY + return resource + + def _verifyBooleanResourceProperties(self, job, config): + + if "allowLargeResults" in config: + self.assertEqual(job.allow_large_results, config["allowLargeResults"]) + else: + self.assertIsNone(job.allow_large_results) + if "flattenResults" in config: + self.assertEqual(job.flatten_results, config["flattenResults"]) + else: + self.assertIsNone(job.flatten_results) + if "useQueryCache" in config: + self.assertEqual(job.use_query_cache, config["useQueryCache"]) + else: + self.assertIsNone(job.use_query_cache) + if "useLegacySql" in config: + self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) + else: + self.assertIsNone(job.use_legacy_sql) + + def _verifyIntegerResourceProperties(self, job, config): + if "maximumBillingTier" in config: + self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) + else: + self.assertIsNone(job.maximum_billing_tier) + if "maximumBytesBilled" in config: + self.assertEqual( + str(job.maximum_bytes_billed), config["maximumBytesBilled"] + ) + self.assertIsInstance(job.maximum_bytes_billed, int) + else: + self.assertIsNone(job.maximum_bytes_billed) + + def _verify_udf_resources(self, job, config): + udf_resources = config.get("userDefinedFunctionResources", ()) + self.assertEqual(len(job.udf_resources), len(udf_resources)) + for found, expected in zip(job.udf_resources, udf_resources): + if "resourceUri" in expected: + self.assertEqual(found.udf_type, "resourceUri") + self.assertEqual(found.value, expected["resourceUri"]) + else: + self.assertEqual(found.udf_type, "inlineCode") + self.assertEqual(found.value, expected["inlineCode"]) + + def _verifyQueryParameters(self, job, config): + query_parameters = config.get("queryParameters", ()) + self.assertEqual(len(job.query_parameters), len(query_parameters)) + for found, expected in zip(job.query_parameters, query_parameters): + self.assertEqual(found.to_api_repr(), expected) + + def _verify_table_definitions(self, job, config): + table_defs = config.get("tableDefinitions") + if job.table_definitions is None: + self.assertIsNone(table_defs) + else: + self.assertEqual(len(job.table_definitions), len(table_defs)) + for found_key, found_ec in job.table_definitions.items(): + expected_ec = table_defs.get(found_key) + self.assertIsNotNone(expected_ec) + self.assertEqual(found_ec.to_api_repr(), expected_ec) + + def _verify_configuration_properties(self, job, configuration): + if "dryRun" in configuration: + self.assertEqual(job.dry_run, configuration["dryRun"]) + else: + self.assertIsNone(job.dry_run) + + def _verifyResourceProperties(self, job, resource): + self._verifyReadonlyResourceProperties(job, resource) + + configuration = resource.get("configuration", {}) + self._verify_configuration_properties(job, configuration) + + query_config = resource.get("configuration", {}).get("query") + self._verifyBooleanResourceProperties(job, query_config) + self._verifyIntegerResourceProperties(job, query_config) + self._verify_udf_resources(job, query_config) + self._verifyQueryParameters(job, query_config) + self._verify_table_definitions(job, query_config) + + self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: + self.assertEqual(job.create_disposition, query_config["createDisposition"]) + else: + self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: + ds_ref = job.default_dataset + ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} + self.assertEqual(ds_ref, query_config["defaultDataset"]) + else: + self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: + table = job.destination + tb_ref = { + "projectId": table.project, + "datasetId": table.dataset_id, + "tableId": table.table_id, + } + self.assertEqual(tb_ref, query_config["destinationTable"]) + else: + self.assertIsNone(job.destination) + if "priority" in query_config: + self.assertEqual(job.priority, query_config["priority"]) + else: + self.assertIsNone(job.priority) + if "writeDisposition" in query_config: + self.assertEqual(job.write_disposition, query_config["writeDisposition"]) + else: + self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: + self.assertIsNotNone(job.destination_encryption_configuration) + self.assertEqual( + job.destination_encryption_configuration.kms_key_name, + query_config["destinationEncryptionConfiguration"]["kmsKeyName"], + ) + else: + self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: + self.assertEqual( + job.schema_update_options, query_config["schemaUpdateOptions"] + ) + else: + self.assertIsNone(job.schema_update_options) + + def test_ctor_defaults(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query, self.QUERY) + self.assertIs(job._client, client) + self.assertEqual(job.job_type, self.JOB_TYPE) + self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) + + self._verifyInitialReadonlyProperties(job) + + self.assertFalse(job.use_legacy_sql) + + # set/read from resource['configuration']['query'] + self.assertIsNone(job.allow_large_results) + self.assertIsNone(job.create_disposition) + self.assertIsNone(job.default_dataset) + self.assertIsNone(job.destination) + self.assertIsNone(job.flatten_results) + self.assertIsNone(job.priority) + self.assertIsNone(job.use_query_cache) + self.assertIsNone(job.dry_run) + self.assertIsNone(job.write_disposition) + self.assertIsNone(job.maximum_billing_tier) + self.assertIsNone(job.maximum_bytes_billed) + self.assertIsNone(job.table_definitions) + self.assertIsNone(job.destination_encryption_configuration) + self.assertIsNone(job.range_partitioning) + self.assertIsNone(job.time_partitioning) + self.assertIsNone(job.clustering_fields) + self.assertIsNone(job.schema_update_options) + + def test_ctor_w_udf_resources(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig() + config.udf_resources = udf_resources + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.udf_resources, udf_resources) + + def test_ctor_w_query_parameters(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + client = _make_client(project=self.PROJECT) + config = QueryJobConfig(query_parameters=query_parameters) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + self.assertEqual(job.query_parameters, query_parameters) + + def test_from_api_repr_missing_identity(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = {} + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_missing_config(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + klass = self._get_target_class() + with self.assertRaises(KeyError): + klass.from_api_repr(RESOURCE, client=client) + + def test_from_api_repr_bare(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_with_encryption(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + }, + } + }, + } + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_from_api_repr_w_properties(self): + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource() + query_config = RESOURCE["configuration"]["query"] + query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED + query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE + query_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.DESTINATION_TABLE, + } + query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client=client) + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + + def test_cancelled(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + job._properties["status"] = { + "state": "DONE", + "errorResult": {"reason": "stopped"}, + } + + self.assertTrue(job.cancelled()) + + def test_done(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": resource["jobReference"]} + ) + self.assertTrue(job.done()) + + def test_done_w_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=42) + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + call_args = fake_reload.call_args + self.assertEqual(call_args.kwargs.get("timeout"), 42) + + def test_done_w_timeout_and_longer_internal_api_timeout(self): + client = _make_client(project=self.PROJECT) + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._done_timeout = 8.8 + + with mock.patch.object( + client, "_get_query_results" + ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: + job.done(timeout=5.5) + + # The expected timeout used is simply the given timeout, as the latter + # is shorter than the job's internal done timeout. + expected_timeout = 5.5 + + fake_get_results.assert_called_once() + call_args = fake_get_results.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + call_args = fake_reload.call_args + self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + + def test_query_plan(self): + from google.cloud._helpers import _RFC3339_MICROS + from google.cloud.bigquery.job import QueryPlanEntry + from google.cloud.bigquery.job import QueryPlanEntryStep + + plan_entries = [ + { + "name": "NAME", + "id": "1234", + "inputStages": ["88", "101"], + "startMs": "1522540800000", + "endMs": "1522540804000", + "parallelInputs": "1000", + "completedParallelInputs": "5", + "waitMsAvg": "33", + "waitMsMax": "400", + "waitRatioAvg": 2.71828, + "waitRatioMax": 3.14159, + "readMsAvg": "45", + "readMsMax": "90", + "readRatioAvg": 1.41421, + "readRatioMax": 1.73205, + "computeMsAvg": "55", + "computeMsMax": "99", + "computeRatioAvg": 0.69315, + "computeRatioMax": 1.09861, + "writeMsAvg": "203", + "writeMsMax": "340", + "writeRatioAvg": 3.32193, + "writeRatioMax": 2.30258, + "recordsRead": "100", + "recordsWritten": "1", + "status": "STATUS", + "shuffleOutputBytes": "1024", + "shuffleOutputBytesSpilled": "1", + "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], + } + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.query_plan, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.query_plan, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.query_plan, []) + + query_stats["queryPlan"] = plan_entries + + self.assertEqual(len(job.query_plan), len(plan_entries)) + for found, expected in zip(job.query_plan, plan_entries): + self.assertIsInstance(found, QueryPlanEntry) + self.assertEqual(found.name, expected["name"]) + self.assertEqual(found.entry_id, expected["id"]) + self.assertEqual(len(found.input_stages), len(expected["inputStages"])) + for f_id in found.input_stages: + self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) + self.assertEqual( + found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" + ) + self.assertEqual( + found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" + ) + self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) + self.assertEqual( + found.completed_parallel_inputs, + int(expected["completedParallelInputs"]), + ) + self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) + self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) + self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) + self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) + self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) + self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) + self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) + self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) + self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) + self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) + self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) + self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) + self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) + self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) + self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) + self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) + self.assertEqual(found.records_read, int(expected["recordsRead"])) + self.assertEqual(found.records_written, int(expected["recordsWritten"])) + self.assertEqual(found.status, expected["status"]) + self.assertEqual( + found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) + ) + self.assertEqual( + found.shuffle_output_bytes_spilled, + int(expected["shuffleOutputBytesSpilled"]), + ) + + self.assertEqual(len(found.steps), len(expected["steps"])) + for f_step, e_step in zip(found.steps, expected["steps"]): + self.assertIsInstance(f_step, QueryPlanEntryStep) + self.assertEqual(f_step.kind, e_step["kind"]) + self.assertEqual(f_step.substeps, e_step["substeps"]) + + def test_total_bytes_processed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_processed) + + query_stats["totalBytesProcessed"] = str(total_bytes) + self.assertEqual(job.total_bytes_processed, total_bytes) + + def test_total_bytes_billed(self): + total_bytes = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.total_bytes_billed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.total_bytes_billed) + + query_stats["totalBytesBilled"] = str(total_bytes) + self.assertEqual(job.total_bytes_billed, total_bytes) + + def test_billing_tier(self): + billing_tier = 1 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.billing_tier) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.billing_tier) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.billing_tier) + + query_stats["billingTier"] = billing_tier + self.assertEqual(job.billing_tier, billing_tier) + + def test_cache_hit(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.cache_hit) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.cache_hit) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.cache_hit) + + query_stats["cacheHit"] = True + self.assertTrue(job.cache_hit) + + def test_ddl_operation_performed(self): + op = "SKIP" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_operation_performed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_operation_performed) + + query_stats["ddlOperationPerformed"] = op + self.assertEqual(job.ddl_operation_performed, op) + + def test_ddl_target_routine(self): + from google.cloud.bigquery.routine import RoutineReference + + ref_routine = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "routineId": "targetroutine", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_routine) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_routine) + + query_stats["ddlTargetRoutine"] = ref_routine + self.assertIsInstance(job.ddl_target_routine, RoutineReference) + self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") + self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_routine.project, self.PROJECT) + + def test_ddl_target_table(self): + from google.cloud.bigquery.table import TableReference + + ref_table = { + "projectId": self.PROJECT, + "datasetId": "ddl_ds", + "tableId": "targettable", + } + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.ddl_target_table) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.ddl_target_table) + + query_stats["ddlTargetTable"] = ref_table + self.assertIsInstance(job.ddl_target_table, TableReference) + self.assertEqual(job.ddl_target_table.table_id, "targettable") + self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") + self.assertEqual(job.ddl_target_table.project, self.PROJECT) + + def test_num_dml_affected_rows(self): + num_rows = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.num_dml_affected_rows) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.num_dml_affected_rows) + + query_stats["numDmlAffectedRows"] = str(num_rows) + self.assertEqual(job.num_dml_affected_rows, num_rows) + + def test_slot_millis(self): + millis = 1234 + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.slot_millis) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.slot_millis) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.slot_millis) + + query_stats["totalSlotMs"] = millis + self.assertEqual(job.slot_millis, millis) + + def test_statement_type(self): + statement_type = "SELECT" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.statement_type) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.statement_type) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.statement_type) + + query_stats["statementType"] = statement_type + self.assertEqual(job.statement_type, statement_type) + + def test_referenced_tables(self): + from google.cloud.bigquery.table import TableReference + + ref_tables_resource = [ + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, + {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "tableId": "other-table", + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_tables, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.referenced_tables, []) + + query_stats["referencedTables"] = ref_tables_resource + + local1, local2, remote = job.referenced_tables + + self.assertIsInstance(local1, TableReference) + self.assertEqual(local1.table_id, "local1") + self.assertEqual(local1.dataset_id, "dataset") + self.assertEqual(local1.project, self.PROJECT) + + self.assertIsInstance(local2, TableReference) + self.assertEqual(local2.table_id, "local2") + self.assertEqual(local2.dataset_id, "dataset") + self.assertEqual(local2.project, self.PROJECT) + + self.assertIsInstance(remote, TableReference) + self.assertEqual(remote.table_id, "other-table") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") + + def test_timeline(self): + timeline_resource = [ + { + "elapsedMs": 1, + "activeUnits": 22, + "pendingUnits": 33, + "completedUnits": 44, + "totalSlotMs": 101, + } + ] + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.timeline, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.timeline, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.timeline, []) + + query_stats["timeline"] = timeline_resource + + self.assertEqual(len(job.timeline), len(timeline_resource)) + self.assertEqual(job.timeline[0].elapsed_ms, 1) + self.assertEqual(job.timeline[0].active_units, 22) + self.assertEqual(job.timeline[0].pending_units, 33) + self.assertEqual(job.timeline[0].completed_units, 44) + self.assertEqual(job.timeline[0].slot_millis, 101) + + def test_undeclared_query_parameters(self): + from google.cloud.bigquery.query import ArrayQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import StructQueryParameter + + undeclared = [ + { + "name": "my_scalar", + "parameterType": {"type": "STRING"}, + "parameterValue": {"value": "value"}, + }, + { + "name": "my_array", + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": { + "arrayValues": [{"value": "1066"}, {"value": "1745"}] + }, + }, + { + "name": "my_struct", + "parameterType": { + "type": "STRUCT", + "structTypes": [{"name": "count", "type": {"type": "INT64"}}], + }, + "parameterValue": {"structValues": {"count": {"value": "123"}}}, + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.undeclared_query_parameters, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.undeclared_query_parameters, []) + + query_stats["undeclaredQueryParameters"] = undeclared + + scalar, array, struct = job.undeclared_query_parameters + + self.assertIsInstance(scalar, ScalarQueryParameter) + self.assertEqual(scalar.name, "my_scalar") + self.assertEqual(scalar.type_, "STRING") + self.assertEqual(scalar.value, "value") + + self.assertIsInstance(array, ArrayQueryParameter) + self.assertEqual(array.name, "my_array") + self.assertEqual(array.array_type, "INT64") + self.assertEqual(array.values, [1066, 1745]) + + self.assertIsInstance(struct, StructQueryParameter) + self.assertEqual(struct.name, "my_struct") + self.assertEqual(struct.struct_types, {"count": "INT64"}) + self.assertEqual(struct.struct_values, {"count": 123}) + + def test_estimated_bytes_processed(self): + est_bytes = 123456 + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertIsNone(job.estimated_bytes_processed) + + statistics = job._properties["statistics"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats = statistics["query"] = {} + self.assertIsNone(job.estimated_bytes_processed) + + query_stats["estimatedBytesProcessed"] = str(est_bytes) + self.assertEqual(job.estimated_bytes_processed, est_bytes) + + def test_result(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + # Explicitly set totalRows to be different from the initial + # response to test update during iteration. + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection( + query_resource, query_resource_done, job_resource_done, tabledata_resource + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 2) + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + # Test that the total_rows property has changed during iteration, based + # on the response from tabledata.list. + self.assertEqual(result.total_rows, 1) + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call, tabledata_call] + ) + + def test_result_with_done_job_calls_get_query_results(self): + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "1", + } + job_resource = self._make_resource(started=True, ended=True) + job_resource["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + tabledata_resource = { + "totalRows": "1", + "pageToken": None, + "rows": [{"f": [{"v": "abc"}]}], + } + conn = _make_connection(query_resource_done, tabledata_resource) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + result = job.result() + + rows = list(result) + self.assertEqual(len(rows), 1) + self.assertEqual(rows[0].col1, "abc") + + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + tabledata_call = mock.call( + method="GET", + path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", + query_params={}, + timeout=None, + ) + conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + + def test_result_with_max_results(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + max_results = 3 + + result = job.result(max_results=max_results) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 3) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["maxResults"], max_results + ) + + def test_result_w_retry(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = self._make_resource(started=True) + job_resource_done = self._make_resource(started=True, ended=True) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = _make_connection( + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + query_resource_done, + exceptions.NotFound("not normally retriable"), + job_resource_done, + ) + client = _make_client(self.PROJECT, connection=connection) + job = self._get_target_class().from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.001, + predicate=custom_predicate, + ) + + self.assertIsInstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", + query_params={"maxResults": 0}, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", + query_params={}, + timeout=None, + ) + + connection.api_request.assert_has_calls( + [query_results_call, query_results_call, reload_call] + ) + + def test_result_w_empty_schema(self): + from google.cloud.bigquery.table import _EmptyRowIterator + + # Destination table may have no schema for some DDL and DML queries. + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": []}, + } + connection = _make_connection(query_resource, query_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + result = job.result() + + self.assertIsInstance(result, _EmptyRowIterator) + self.assertEqual(list(result), []) + + def test_result_invokes_begins(self): + begun_resource = self._make_resource() + incomplete_resource = { + "jobComplete": False, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + query_resource = copy.deepcopy(incomplete_resource) + query_resource["jobComplete"] = True + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, + incomplete_resource, + query_resource, + done_resource, + query_resource, + ) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + job.result() + + self.assertEqual(len(connection.api_request.call_args_list), 4) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[2] + reload_request = connection.api_request.call_args_list[3] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_timeout(self): + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result(timeout=1.0) + + self.assertEqual(len(connection.api_request.call_args_list), 3) + begin_request = connection.api_request.call_args_list[0] + query_request = connection.api_request.call_args_list[1] + reload_request = connection.api_request.call_args_list[2] + self.assertEqual(begin_request[1]["method"], "POST") + self.assertEqual(query_request[1]["method"], "GET") + self.assertEqual( + query_request[1]["path"], + "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), + ) + self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual(reload_request[1]["method"], "GET") + + def test_result_w_page_size(self): + # Arrange + query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "4", + } + job_resource = self._make_resource(started=True, ended=True) + q_config = job_resource["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + } + tabledata_resource = { + "totalRows": 4, + "pageToken": "some-page-token", + "rows": [ + {"f": [{"v": "row1"}]}, + {"f": [{"v": "row2"}]}, + {"f": [{"v": "row3"}]}, + ], + } + tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + conn = _make_connection( + query_results_resource, tabledata_resource, tabledata_resource_page_2 + ) + client = _make_client(self.PROJECT, connection=conn) + job = self._get_target_class().from_api_repr(job_resource, client) + + # Act + result = job.result(page_size=3) + + # Assert + actual_rows = list(result) + self.assertEqual(len(actual_rows), 4) + + tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + conn.api_request.assert_has_calls( + [ + mock.call( + method="GET", + path=tabledata_path, + query_params={"maxResults": 3}, + timeout=None, + ), + mock.call( + method="GET", + path=tabledata_path, + query_params={"pageToken": "some-page-token", "maxResults": 3}, + timeout=None, + ), + ] + ) + + def test_result_with_start_index(self): + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "5", + } + tabledata_resource = { + "totalRows": "5", + "pageToken": None, + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + {"f": [{"v": "jkl"}]}, + ], + } + connection = _make_connection(query_resource, tabledata_resource) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + + result = job.result(start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 5) + + rows = list(result) + + self.assertEqual(len(rows), 4) + self.assertEqual(len(connection.api_request.call_args_list), 2) + tabledata_list_request = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request[1]["query_params"]["startIndex"], start_index + ) + + def test_result_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + error_result = { + "debugInfo": "DEBUG", + "location": "LOCATION", + "message": "MESSAGE", + "reason": "invalid", + } + job._properties["status"] = { + "errorResult": error_result, + "errors": [error_result], + "state": "DONE", + } + job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( + {"jobComplete": True, "jobReference": job._properties["jobReference"]} + ) + job._set_future_result() + + with self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test_result_transport_timeout_error(self): + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=requests.exceptions.Timeout("Server response took too long."), + ) + + # Make sure that timeout errors get rebranded to concurrent futures timeout. + with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): + job.result(timeout=1) + + def test__begin_error(self): + from google.cloud import exceptions + + query = textwrap.dedent( + """ + SELECT foo, bar + FROM table_baz + WHERE foo == bar""" + ) + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, query, client) + call_api_patch = mock.patch( + "google.cloud.bigquery.client.Client._call_api", + autospec=True, + side_effect=exceptions.BadRequest("Syntax error in SQL query"), + ) + + with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: + job.result() + + self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) + self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + + exc_job_instance = getattr(exc_info.exception, "query_job", None) + self.assertIs(exc_job_instance, job) + + full_text = str(exc_info.exception) + assert job.job_id in full_text + assert "Query Job SQL Follows" in full_text + + for i, line in enumerate(query.splitlines(), start=1): + expected_line = "{}:{}".format(i, line) + assert expected_line in full_text + + def test__begin_w_timeout(self): + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(timeout=7.5) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False} + }, + }, + timeout=7.5, + ) + + def test_begin_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + DS_ID = "DATASET" + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + + config = QueryJobConfig() + config.default_dataset = DatasetReference(self.PROJECT, DS_ID) + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertIsNone(job.default_dataset) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "defaultDataset": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_alternate_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.job import QueryPriority + from google.cloud.bigquery.job import SchemaUpdateOption + from google.cloud.bigquery.job import WriteDisposition + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + TABLE = "TABLE" + DS_ID = "DATASET" + RESOURCE = self._make_resource(ended=True) + QUERY_CONFIGURATION = { + "query": self.QUERY, + "allowLargeResults": True, + "createDisposition": CreateDisposition.CREATE_NEVER, + "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, + "destinationTable": { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": TABLE, + }, + "flattenResults": True, + "priority": QueryPriority.INTERACTIVE, + "useQueryCache": True, + "useLegacySql": True, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + "maximumBillingTier": 4, + "maximumBytesBilled": "123456", + "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], + } + RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION + RESOURCE["configuration"]["dryRun"] = True + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(TABLE) + + config = QueryJobConfig() + config.allow_large_results = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.default_dataset = dataset_ref + config.destination = table_ref + config.dry_run = True + config.flatten_results = True + config.maximum_billing_tier = 4 + config.priority = QueryPriority.INTERACTIVE + config.use_legacy_sql = True + config.use_query_cache = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.maximum_bytes_billed = 123456 + config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] + job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_udf(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import UDFResource + + RESOURCE_URI = "gs://some-bucket/js/lib.js" + INLINE_UDF_CODE = 'var someCode = "here";' + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + udf_resources = [ + UDFResource("resourceUri", RESOURCE_URI), + UDFResource("inlineCode", INLINE_UDF_CODE), + ] + config = QueryJobConfig() + config.udf_resources = udf_resources + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.udf_resources, udf_resources) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "userDefinedFunctionResources": [ + {"resourceUri": RESOURCE_URI}, + {"inlineCode": INLINE_UDF_CODE}, + ], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_named_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "NAMED" + config["queryParameters"] = [ + { + "name": "foo", + "parameterType": {"type": "INT64"}, + "parameterValue": {"value": "123"}, + } + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "NAMED", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_positional_query_parameter(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.query import ScalarQueryParameter + + query_parameters = [ScalarQueryParameter.positional("INT64", 123)] + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + config = RESOURCE["configuration"]["query"] + config["parameterMode"] = "POSITIONAL" + config["queryParameters"] = [ + {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} + ] + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + jconfig = QueryJobConfig() + jconfig.query_parameters = query_parameters + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertEqual(job.query_parameters, query_parameters) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": False, + "parameterMode": "POSITIONAL", + "queryParameters": config["queryParameters"], + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_begin_w_table_defs(self): + from google.cloud.bigquery.job import QueryJobConfig + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.external_config import BigtableColumn + from google.cloud.bigquery.external_config import BigtableColumnFamily + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + + bt_config = ExternalConfig("BIGTABLE") + bt_config.ignore_unknown_values = True + bt_config.options.read_rowkey_as_string = True + cf = BigtableColumnFamily() + cf.family_id = "cf" + col = BigtableColumn() + col.field_name = "fn" + cf.columns = [col] + bt_config.options.column_families = [cf] + BT_CONFIG_RESOURCE = { + "sourceFormat": "BIGTABLE", + "ignoreUnknownValues": True, + "bigtableOptions": { + "readRowkeyAsString": True, + "columnFamilies": [ + {"familyId": "cf", "columns": [{"fieldName": "fn"}]} + ], + }, + } + CSV_CONFIG_RESOURCE = { + "sourceFormat": "CSV", + "maxBadRecords": 8, + "csvOptions": {"allowJaggedRows": True}, + } + csv_config = ExternalConfig("CSV") + csv_config.max_bad_records = 8 + csv_config.options.allow_jagged_rows = True + bt_table = "bigtable-table" + csv_table = "csv-table" + RESOURCE["configuration"]["query"]["tableDefinitions"] = { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + } + want_resource = copy.deepcopy(RESOURCE) + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.table_definitions = {bt_table: bt_config, csv_table: csv_config} + config.use_legacy_sql = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": { + "query": self.QUERY, + "useLegacySql": True, + "tableDefinitions": { + bt_table: BT_CONFIG_RESOURCE, + csv_table: CSV_CONFIG_RESOURCE, + }, + } + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, want_resource) + + def test_dry_run_query(self): + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs" % (self.PROJECT,) + RESOURCE = self._make_resource() + # Ensure None for missing server-set props + del RESOURCE["statistics"]["creationTime"] + del RESOURCE["etag"] + del RESOURCE["selfLink"] + del RESOURCE["user_email"] + RESOURCE["configuration"]["dryRun"] = True + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + config = QueryJobConfig() + config.dry_run = True + job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job._begin() + + final_attributes.assert_called_with({"path": PATH}, client, job) + self.assertEqual(job.udf_resources, []) + conn.api_request.assert_called_once_with( + method="POST", + path=PATH, + data={ + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": { + "query": {"query": self.QUERY, "useLegacySql": False}, + "dryRun": True, + }, + }, + timeout=None, + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_exists_miss_w_bound_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn = _make_connection() + client = _make_client(project=self.PROJECT, connection=conn) + job = self._make_one(self.JOB_ID, self.QUERY, client) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertFalse(job.exists()) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_exists_hit_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection({}) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + self.assertTrue(job.exists(client=client2)) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={"fields": "id"}, timeout=None + ) + + def test_reload_w_bound_client(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload() + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_alternate_client(self): + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + q_config = RESOURCE["configuration"]["query"] + q_config["destinationTable"] = { + "projectId": self.PROJECT, + "datasetId": DS_ID, + "tableId": DEST_TABLE, + } + conn1 = _make_connection() + client1 = _make_client(project=self.PROJECT, connection=conn1) + conn2 = _make_connection(RESOURCE) + client2 = _make_client(project=self.PROJECT, connection=conn2) + job = self._make_one(self.JOB_ID, self.QUERY, client1) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(client=client2) + + final_attributes.assert_called_with({"path": PATH}, client2, job) + + conn1.api_request.assert_not_called() + conn2.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=None + ) + self._verifyResourceProperties(job, RESOURCE) + + def test_reload_w_timeout(self): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job import QueryJobConfig + + PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) + DS_ID = "DATASET" + DEST_TABLE = "dest_table" + RESOURCE = self._make_resource() + conn = _make_connection(RESOURCE) + client = _make_client(project=self.PROJECT, connection=conn) + dataset_ref = DatasetReference(self.PROJECT, DS_ID) + table_ref = dataset_ref.table(DEST_TABLE) + config = QueryJobConfig() + config.destination = table_ref + job = self._make_one(self.JOB_ID, None, client, job_config=config) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + job.reload(timeout=4.2) + + final_attributes.assert_called_with({"path": PATH}, client, job) + + self.assertNotEqual(job.destination, table_ref) + + conn.api_request.assert_called_once_with( + method="GET", path=PATH, query_params={}, timeout=4.2 + ) + + def test_iter(self): + import types + + begun_resource = self._make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "totalRows": "0", + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=self.PROJECT, connection=connection) + job = self._make_one(self.JOB_ID, self.QUERY, client) + + self.assertIsInstance(iter(job), types.GeneratorType) diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py new file mode 100644 index 000000000..db03d6a3b --- /dev/null +++ b/tests/unit/job/test_query_config.py @@ -0,0 +1,255 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .helpers import _Base + + +class TestQueryJobConfig(_Base): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryJobConfig + + return QueryJobConfig + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + config = self._make_one() + self.assertEqual(config._properties, {"query": {}}) + + def test_ctor_w_none(self): + config = self._make_one() + config.default_dataset = None + config.destination = None + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + + def test_ctor_w_properties(self): + config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) + + self.assertFalse(config.use_query_cache) + self.assertTrue(config.use_legacy_sql) + + def test_ctor_w_string_default_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._get_target_class()(default_dataset=default_dataset) + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_ctor_w_string_destinaton(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._get_target_class()(destination=destination) + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_default_dataset_w_string(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + config = self._make_one() + config.default_dataset = default_dataset + expected = dataset.DatasetReference.from_string(default_dataset) + self.assertEqual(config.default_dataset, expected) + + def test_default_dataset_w_dataset(self): + from google.cloud.bigquery import dataset + + default_dataset = "default-proj.default_dset" + expected = dataset.DatasetReference.from_string(default_dataset) + config = self._make_one() + config.default_dataset = dataset.Dataset(expected) + self.assertEqual(config.default_dataset, expected) + + def test_destinaton_w_string(self): + from google.cloud.bigquery import table + + destination = "dest-proj.dest_dset.dest_tbl" + config = self._make_one() + config.destination = destination + expected = table.TableReference.from_string(destination) + self.assertEqual(config.destination, expected) + + def test_range_partitioning_w_none(self): + object_under_test = self._get_target_class()() + assert object_under_test.range_partitioning is None + + def test_range_partitioning_w_value(self): + object_under_test = self._get_target_class()() + object_under_test._properties["query"]["rangePartitioning"] = { + "field": "column_one", + "range": {"start": 1, "end": 1000, "interval": 10}, + } + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter(self): + from google.cloud.bigquery.table import PartitionRange + from google.cloud.bigquery.table import RangePartitioning + + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = RangePartitioning( + field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) + ) + object_under_test.range_partitioning.field == "column_one" + object_under_test.range_partitioning.range_.start == 1 + object_under_test.range_partitioning.range_.end == 1000 + object_under_test.range_partitioning.range_.interval == 10 + + def test_range_partitioning_setter_w_none(self): + object_under_test = self._get_target_class()() + object_under_test.range_partitioning = None + assert object_under_test.range_partitioning is None + + def test_range_partitioning_setter_w_wrong_type(self): + object_under_test = self._get_target_class()() + with pytest.raises(ValueError, match="RangePartitioning"): + object_under_test.range_partitioning = object() + + def test_time_partitioning(self): + from google.cloud.bigquery import table + + time_partitioning = table.TimePartitioning( + type_=table.TimePartitioningType.DAY, field="name" + ) + config = self._make_one() + config.time_partitioning = time_partitioning + # TimePartitioning should be configurable after assigning + time_partitioning.expiration_ms = 10000 + + self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) + self.assertEqual(config.time_partitioning.field, "name") + self.assertEqual(config.time_partitioning.expiration_ms, 10000) + + config.time_partitioning = None + self.assertIsNone(config.time_partitioning) + + def test_clustering_fields(self): + fields = ["email", "postal_code"] + config = self._get_target_class()() + config.clustering_fields = fields + self.assertEqual(config.clustering_fields, fields) + + config.clustering_fields = None + self.assertIsNone(config.clustering_fields) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + config = klass.from_api_repr({}) + self.assertIsNone(config.dry_run) + self.assertIsNone(config.use_legacy_sql) + self.assertIsNone(config.default_dataset) + self.assertIsNone(config.destination) + self.assertIsNone(config.destination_encryption_configuration) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + resource = { + "query": { + "useLegacySql": True, + "query": "no property for me", + "defaultDataset": { + "projectId": "someproject", + "datasetId": "somedataset", + }, + "someNewProperty": "I should be saved, too.", + }, + "dryRun": True, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + self.assertTrue(config.use_legacy_sql) + self.assertEqual( + config.default_dataset, DatasetReference("someproject", "somedataset") + ) + self.assertTrue(config.dry_run) + # Make sure unknown properties propagate. + self.assertEqual(config._properties["query"]["query"], "no property for me") + self.assertEqual( + config._properties["query"]["someNewProperty"], "I should be saved, too." + ) + + def test_to_api_repr_normal(self): + from google.cloud.bigquery.dataset import DatasetReference + + config = self._make_one() + config.use_legacy_sql = True + config.default_dataset = DatasetReference("someproject", "somedataset") + config.dry_run = False + config._properties["someNewProperty"] = "Woohoo, alpha stuff." + + resource = config.to_api_repr() + + self.assertFalse(resource["dryRun"]) + self.assertTrue(resource["query"]["useLegacySql"]) + self.assertEqual( + resource["query"]["defaultDataset"]["projectId"], "someproject" + ) + self.assertEqual( + resource["query"]["defaultDataset"]["datasetId"], "somedataset" + ) + # Make sure unknown properties propagate. + self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") + + def test_to_api_repr_with_encryption(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + + config = self._make_one() + config.destination_encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME + ) + resource = config.to_api_repr() + self.assertEqual( + resource, + { + "query": { + "destinationEncryptionConfiguration": { + "kmsKeyName": self.KMS_KEY_NAME + } + } + }, + ) + + def test_to_api_repr_with_encryption_none(self): + config = self._make_one() + config.destination_encryption_configuration = None + resource = config.to_api_repr() + self.assertEqual( + resource, {"query": {"destinationEncryptionConfiguration": None}} + ) + + def test_from_api_repr_with_encryption(self): + resource = { + "query": { + "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} + } + } + klass = self._get_target_class() + config = klass.from_api_repr(resource) + self.assertEqual( + config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME + ) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py new file mode 100644 index 000000000..37f4a6dec --- /dev/null +++ b/tests/unit/job/test_query_pandas.py @@ -0,0 +1,450 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import json + +import mock +import pytest + +try: + import pandas +except (ImportError, AttributeError): # pragma: NO COVER + pandas = None +try: + import pyarrow +except (ImportError, AttributeError): # pragma: NO COVER + pyarrow = None +try: + from google.cloud import bigquery_storage +except (ImportError, AttributeError): # pragma: NO COVER + bigquery_storage = None +try: + from tqdm import tqdm +except (ImportError, AttributeError): # pragma: NO COVER + tqdm = None + +from .helpers import _make_client +from .helpers import _make_connection +from .helpers import _make_job_resource + + +@pytest.mark.parametrize( + "query,expected", + ( + (None, False), + ("", False), + ("select name, age from table", False), + ("select name, age from table LIMIT 10;", False), + ("select name, age from table order by other_column;", True), + ("Select name, age From table Order By other_column", True), + ("SELECT name, age FROM table ORDER BY other_column;", True), + ("select name, age from table order\nby other_column", True), + ("Select name, age From table Order\nBy other_column;", True), + ("SELECT name, age FROM table ORDER\nBY other_column", True), + ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), + ), +) +def test__contains_order_by(query, expected): + from google.cloud.bigquery import job as mut + + if expected: + assert mut._contains_order_by(query) + else: + assert not mut._contains_order_by(query) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +@pytest.mark.parametrize( + "query", + ( + "select name, age from table order by other_column;", + "Select name, age From table Order By other_column;", + "SELECT name, age FROM table ORDER BY other_column;", + "select name, age from table order\nby other_column;", + "Select name, age From table Order\nBy other_column;", + "SELECT name, age FROM table ORDER\nBY other_column;", + "SelecT name, age froM table OrdeR \n\t BY other_column;", + ), +) +def test_to_dataframe_bqstorage_preserve_order(query): + from google.cloud.bigquery.job import QueryJob as target_class + + job_resource = _make_job_resource( + project_id="test-project", job_type="query", ended=True + ) + job_resource["configuration"]["query"]["query"] = query + job_resource["status"] = {"state": "DONE"} + get_query_results_resource = { + "jobComplete": True, + "jobReference": {"projectId": "test-project", "jobId": "test-job"}, + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "totalRows": "4", + } + connection = _make_connection(get_query_results_resource, job_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(job_resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **job_resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/test-project", + read_session=expected_session, + max_stream_count=1, # Use a single stream to preserve row order. + ) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + { + "name": "spouse_1", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + { + "name": "spouse_2", + "type": "RECORD", + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ], + }, + ] + }, + } + tabledata_resource = { + "rows": [ + { + "f": [ + {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, + {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, + ] + }, + { + "f": [ + {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, + {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, + ] + }, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + tbl = job.to_arrow(create_bqstorage_client=False) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + # Check the schema. + assert tbl.schema[0].name == "spouse_1" + assert tbl.schema[0].type[0].name == "name" + assert tbl.schema[0].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[0].type) + assert pyarrow.types.is_string(tbl.schema[0].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[0].type[1].type) + assert tbl.schema[1].name == "spouse_2" + assert tbl.schema[1].type[0].name == "name" + assert tbl.schema[1].type[1].name == "age" + assert pyarrow.types.is_struct(tbl.schema[1].type) + assert pyarrow.types.is_string(tbl.schema[1].type[0].type) + assert pyarrow.types.is_int64(tbl.schema[1].type[1].type) + + # Check the data. + tbl_data = tbl.to_pydict() + spouse_1 = tbl_data["spouse_1"] + assert spouse_1 == [ + {"name": "Phred Phlyntstone", "age": 32}, + {"name": "Bhettye Rhubble", "age": 27}, + ] + spouse_2 = tbl_data["spouse_2"] + assert spouse_2 == [ + {"name": "Wylma Phlyntstone", "age": 29}, + {"name": "Bharney Rhubble", "age": 33}, + ] + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + tabledata_resource = { + "rows": [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df) == ["name", "age"] # verify the column names + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_ddl_query(): + from google.cloud.bigquery.job import QueryJob as target_class + + # Destination table may have no schema for some DDL and DML queries. + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "schema": {"fields": []}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + + df = job.to_dataframe() + + assert len(df) == 0 + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [ + {"name": "name", "type": ["null", "string"]}, + {"name": "age", "type": ["null", "long"]}, + ], + } + ) + bqstorage_client.create_read_session.return_value = session + + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.types.ReadSession( + table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, # Use default number of streams for best performance. + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": { + "fields": [ + {"name": "start_timestamp", "type": "TIMESTAMP"}, + {"name": "seconds", "type": "INT64"}, + {"name": "miles", "type": "FLOAT64"}, + {"name": "km", "type": "FLOAT64"}, + {"name": "payment_type", "type": "STRING"}, + {"name": "complete", "type": "BOOL"}, + {"name": "date", "type": "DATE"}, + ] + }, + } + row_data = [ + [ + "1.4338368E9", + "420", + "1.1", + "1.77", + "Cto_dataframeash", + "true", + "1999-12-01", + ], + ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + assert df.seconds.dtype.name == "int64" + assert df.miles.dtype.name == "float64" + assert df.km.dtype.name == "float16" + assert df.payment_type.dtype.name == "object" + assert df.complete.dtype.name == "bool" + assert df.date.dtype.name == "object" + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe_column_date_dtypes(): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "1", + "schema": {"fields": [{"name": "date", "type": "DATE"}]}, + } + row_data = [ + ["1999-12-01"], + ] + rows = [{"f": [{"v": field} for field in row]} for row in row_data] + query_resource["rows"] = rows + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] + assert list(df) == exp_columns # verify the column names + assert df.date.dtype.name == "datetime64[ns]" + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +@mock.patch("tqdm.tqdm") +def test_to_dataframe_with_progress_bar(tqdm_mock): + from google.cloud.bigquery.job import QueryJob as target_class + + begun_resource = _make_job_resource(job_type="query") + query_resource = { + "jobComplete": True, + "jobReference": begun_resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + done_resource = copy.deepcopy(begun_resource) + done_resource["status"] = {"state": "DONE"} + connection = _make_connection( + begun_resource, query_resource, done_resource, query_resource, query_resource, + ) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) + tqdm_mock.assert_not_called() + + job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + tqdm_mock.assert_called() diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py new file mode 100644 index 000000000..09a0efc45 --- /dev/null +++ b/tests/unit/job/test_query_stats.py @@ -0,0 +1,356 @@ +# Copyright 2015 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .helpers import _Base + + +class TestQueryPlanEntryStep(_Base): + KIND = "KIND" + SUBSTEPS = ("SUB1", "SUB2") + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntryStep + + return QueryPlanEntryStep + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + step = klass.from_api_repr({}) + self.assertIsNone(step.kind) + self.assertEqual(step.substeps, []) + + def test_from_api_repr_normal(self): + resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} + klass = self._get_target_class() + step = klass.from_api_repr(resource) + self.assertEqual(step.kind, self.KIND) + self.assertEqual(step.substeps, list(self.SUBSTEPS)) + + def test___eq___mismatched_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertNotEqual(step, object()) + + def test___eq___mismatch_kind(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one("OTHER", self.SUBSTEPS) + self.assertNotEqual(step, other) + + def test___eq___mismatch_substeps(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, ()) + self.assertNotEqual(step, other) + + def test___eq___hit(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + other = self._make_one(self.KIND, self.SUBSTEPS) + self.assertEqual(step, other) + + def test___eq___wrong_type(self): + step = self._make_one(self.KIND, self.SUBSTEPS) + self.assertFalse(step == "hello") + + +class TestQueryPlanEntry(_Base): + NAME = "NAME" + ENTRY_ID = 1234 + START_MS = 1522540800000 + END_MS = 1522540804000 + INPUT_STAGES = (88, 101) + PARALLEL_INPUTS = 1000 + COMPLETED_PARALLEL_INPUTS = 5 + WAIT_MS_AVG = 33 + WAIT_MS_MAX = 400 + WAIT_RATIO_AVG = 2.71828 + WAIT_RATIO_MAX = 3.14159 + READ_MS_AVG = 45 + READ_MS_MAX = 90 + READ_RATIO_AVG = 1.41421 + READ_RATIO_MAX = 1.73205 + COMPUTE_MS_AVG = 55 + COMPUTE_MS_MAX = 99 + COMPUTE_RATIO_AVG = 0.69315 + COMPUTE_RATIO_MAX = 1.09861 + WRITE_MS_AVG = 203 + WRITE_MS_MAX = 340 + WRITE_RATIO_AVG = 3.32193 + WRITE_RATIO_MAX = 2.30258 + RECORDS_READ = 100 + RECORDS_WRITTEN = 1 + STATUS = "STATUS" + SHUFFLE_OUTPUT_BYTES = 1024 + SHUFFLE_OUTPUT_BYTES_SPILLED = 1 + + START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" + END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import QueryPlanEntry + + return QueryPlanEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + + self.assertIsNone(entry.name) + self.assertIsNone(entry.entry_id) + self.assertEqual(entry.input_stages, []) + self.assertIsNone(entry.start) + self.assertIsNone(entry.end) + self.assertIsNone(entry.parallel_inputs) + self.assertIsNone(entry.completed_parallel_inputs) + self.assertIsNone(entry.wait_ms_avg) + self.assertIsNone(entry.wait_ms_max) + self.assertIsNone(entry.wait_ratio_avg) + self.assertIsNone(entry.wait_ratio_max) + self.assertIsNone(entry.read_ms_avg) + self.assertIsNone(entry.read_ms_max) + self.assertIsNone(entry.read_ratio_avg) + self.assertIsNone(entry.read_ratio_max) + self.assertIsNone(entry.compute_ms_avg) + self.assertIsNone(entry.compute_ms_max) + self.assertIsNone(entry.compute_ratio_avg) + self.assertIsNone(entry.compute_ratio_max) + self.assertIsNone(entry.write_ms_avg) + self.assertIsNone(entry.write_ms_max) + self.assertIsNone(entry.write_ratio_avg) + self.assertIsNone(entry.write_ratio_max) + self.assertIsNone(entry.records_read) + self.assertIsNone(entry.records_written) + self.assertIsNone(entry.status) + self.assertIsNone(entry.shuffle_output_bytes) + self.assertIsNone(entry.shuffle_output_bytes_spilled) + self.assertEqual(entry.steps, []) + + def test_from_api_repr_normal(self): + from google.cloud.bigquery.job import QueryPlanEntryStep + + steps = [ + QueryPlanEntryStep( + kind=TestQueryPlanEntryStep.KIND, + substeps=TestQueryPlanEntryStep.SUBSTEPS, + ) + ] + resource = { + "name": self.NAME, + "id": self.ENTRY_ID, + "inputStages": self.INPUT_STAGES, + "startMs": self.START_MS, + "endMs": self.END_MS, + "waitMsAvg": self.WAIT_MS_AVG, + "waitMsMax": self.WAIT_MS_MAX, + "waitRatioAvg": self.WAIT_RATIO_AVG, + "waitRatioMax": self.WAIT_RATIO_MAX, + "readMsAvg": self.READ_MS_AVG, + "readMsMax": self.READ_MS_MAX, + "readRatioAvg": self.READ_RATIO_AVG, + "readRatioMax": self.READ_RATIO_MAX, + "computeMsAvg": self.COMPUTE_MS_AVG, + "computeMsMax": self.COMPUTE_MS_MAX, + "computeRatioAvg": self.COMPUTE_RATIO_AVG, + "computeRatioMax": self.COMPUTE_RATIO_MAX, + "writeMsAvg": self.WRITE_MS_AVG, + "writeMsMax": self.WRITE_MS_MAX, + "writeRatioAvg": self.WRITE_RATIO_AVG, + "writeRatioMax": self.WRITE_RATIO_MAX, + "recordsRead": self.RECORDS_READ, + "recordsWritten": self.RECORDS_WRITTEN, + "status": self.STATUS, + "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, + "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, + "steps": [ + { + "kind": TestQueryPlanEntryStep.KIND, + "substeps": TestQueryPlanEntryStep.SUBSTEPS, + } + ], + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.name, self.NAME) + self.assertEqual(entry.entry_id, self.ENTRY_ID) + self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) + self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) + self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) + self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) + self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) + self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) + self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) + self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) + self.assertEqual(entry.records_read, self.RECORDS_READ) + self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) + self.assertEqual(entry.status, self.STATUS) + self.assertEqual(entry.steps, steps) + + def test_start(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.start, None) + + entry._properties["startMs"] = self.START_MS + self.assertEqual( + entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS + ) + + def test_end(self): + from google.cloud._helpers import _RFC3339_MICROS + + klass = self._get_target_class() + + entry = klass.from_api_repr({}) + self.assertEqual(entry.end, None) + + entry._properties["endMs"] = self.END_MS + self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) + + +class TestScriptStackFrame(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStackFrame + + return ScriptStackFrame(resource) + + def test_procedure_id(self): + frame = self._make_one({"procedureId": "some-procedure"}) + self.assertEqual(frame.procedure_id, "some-procedure") + del frame._properties["procedureId"] + self.assertIsNone(frame.procedure_id) + + def test_start_line(self): + frame = self._make_one({"startLine": 5}) + self.assertEqual(frame.start_line, 5) + frame._properties["startLine"] = "5" + self.assertEqual(frame.start_line, 5) + + def test_start_column(self): + frame = self._make_one({"startColumn": 29}) + self.assertEqual(frame.start_column, 29) + frame._properties["startColumn"] = "29" + self.assertEqual(frame.start_column, 29) + + def test_end_line(self): + frame = self._make_one({"endLine": 9}) + self.assertEqual(frame.end_line, 9) + frame._properties["endLine"] = "9" + self.assertEqual(frame.end_line, 9) + + def test_end_column(self): + frame = self._make_one({"endColumn": 14}) + self.assertEqual(frame.end_column, 14) + frame._properties["endColumn"] = "14" + self.assertEqual(frame.end_column, 14) + + def test_text(self): + frame = self._make_one({"text": "QUERY TEXT"}) + self.assertEqual(frame.text, "QUERY TEXT") + + +class TestScriptStatistics(_Base): + def _make_one(self, resource): + from google.cloud.bigquery.job import ScriptStatistics + + return ScriptStatistics(resource) + + def test_evalutation_kind(self): + stats = self._make_one({"evaluationKind": "EXPRESSION"}) + self.assertEqual(stats.evaluation_kind, "EXPRESSION") + self.assertEqual(stats.stack_frames, []) + + def test_stack_frames(self): + stats = self._make_one( + { + "stackFrames": [ + { + "procedureId": "some-procedure", + "startLine": 5, + "startColumn": 29, + "endLine": 9, + "endColumn": 14, + "text": "QUERY TEXT", + }, + {}, + ] + } + ) + stack_frames = stats.stack_frames + self.assertEqual(len(stack_frames), 2) + stack_frame = stack_frames[0] + self.assertEqual(stack_frame.procedure_id, "some-procedure") + self.assertEqual(stack_frame.start_line, 5) + self.assertEqual(stack_frame.start_column, 29) + self.assertEqual(stack_frame.end_line, 9) + self.assertEqual(stack_frame.end_column, 14) + self.assertEqual(stack_frame.text, "QUERY TEXT") + stack_frame = stack_frames[1] + self.assertIsNone(stack_frame.procedure_id) + self.assertIsNone(stack_frame.start_line) + self.assertIsNone(stack_frame.start_column) + self.assertIsNone(stack_frame.end_line) + self.assertIsNone(stack_frame.end_column) + self.assertIsNone(stack_frame.text) + + +class TestTimelineEntry(_Base): + ELAPSED_MS = 101 + ACTIVE_UNITS = 50 + PENDING_UNITS = 98 + COMPLETED_UNITS = 520 + SLOT_MILLIS = 12029 + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import TimelineEntry + + return TimelineEntry + + def test_from_api_repr_empty(self): + klass = self._get_target_class() + entry = klass.from_api_repr({}) + self.assertIsNone(entry.elapsed_ms) + self.assertIsNone(entry.active_units) + self.assertIsNone(entry.pending_units) + self.assertIsNone(entry.completed_units) + self.assertIsNone(entry.slot_millis) + + def test_from_api_repr_normal(self): + resource = { + "elapsedMs": self.ELAPSED_MS, + "activeUnits": self.ACTIVE_UNITS, + "pendingUnits": self.PENDING_UNITS, + "completedUnits": self.COMPLETED_UNITS, + "totalSlotMs": self.SLOT_MILLIS, + } + klass = self._get_target_class() + + entry = klass.from_api_repr(resource) + self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) + self.assertEqual(entry.active_units, self.ACTIVE_UNITS) + self.assertEqual(entry.pending_units, self.PENDING_UNITS) + self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) + self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) diff --git a/tests/unit/test_job.py b/tests/unit/test_job.py deleted file mode 100644 index 8590e0576..000000000 --- a/tests/unit/test_job.py +++ /dev/null @@ -1,6448 +0,0 @@ -# Copyright 2015 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import concurrent -import copy -import json -import textwrap -import unittest -import warnings - -import freezegun -from google.api_core import exceptions -import google.api_core.retry -import mock -import pytest -import requests -from six.moves import http_client - -try: - import pandas -except (ImportError, AttributeError): # pragma: NO COVER - pandas = None - -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None -try: - from google.cloud import bigquery_storage -except (ImportError, AttributeError): # pragma: NO COVER - bigquery_storage = None -try: - from tqdm import tqdm -except (ImportError, AttributeError): # pragma: NO COVER - tqdm = None - -import google.cloud.bigquery.query - - -def _make_credentials(): - import google.auth.credentials - - return mock.Mock(spec=google.auth.credentials.Credentials) - - -def _make_client(project="test-project", connection=None): - from google.cloud.bigquery.client import Client - - if connection is None: - connection = _make_connection() - - client = Client(project=project, credentials=_make_credentials(), _http=object()) - client._connection = connection - return client - - -def _make_connection(*responses): - import google.cloud.bigquery._http - from google.cloud.exceptions import NotFound - - mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) - mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] - return mock_conn - - -def _make_retriable_exception(): - return exceptions.TooManyRequests( - "retriable exception", errors=[{"reason": "rateLimitExceeded"}] - ) - - -def _make_job_resource( - creation_time_ms=1437767599006, - started_time_ms=1437767600007, - ended_time_ms=1437767601008, - started=False, - ended=False, - etag="abc-def-hjk", - endpoint="https://bigquery.googleapis.com", - job_type="load", - job_id="a-random-id", - project_id="some-project", - user_email="bq-user@example.com", -): - resource = { - "status": {"state": "PENDING"}, - "configuration": {job_type: {}}, - "statistics": {"creationTime": creation_time_ms, job_type: {}}, - "etag": etag, - "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, - "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( - endpoint, project_id, job_id - ), - "user_email": user_email, - } - - if started or ended: - resource["statistics"]["startTime"] = started_time_ms - resource["status"]["state"] = "RUNNING" - - if ended: - resource["statistics"]["endTime"] = ended_time_ms - resource["status"]["state"] = "DONE" - - if job_type == "query": - resource["configuration"]["query"]["destinationTable"] = { - "projectId": project_id, - "datasetId": "_temp_dataset", - "tableId": "_temp_table", - } - - return resource - - -class Test__error_result_to_exception(unittest.TestCase): - def _call_fut(self, *args, **kwargs): - from google.cloud.bigquery import job - - return job._error_result_to_exception(*args, **kwargs) - - def test_simple(self): - error_result = {"reason": "invalid", "message": "bad request"} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.BAD_REQUEST) - self.assertTrue(exception.message.startswith("bad request")) - self.assertIn(error_result, exception.errors) - - def test_missing_reason(self): - error_result = {} - exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) - - -class Test_JobReference(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobReference - - def _make_one(self, job_id, project, location): - return self._get_target_class()(job_id, project, location) - - def test_ctor(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - def test__to_api_repr(self): - job_ref = self._make_one(self.JOB_ID, self.PROJECT, self.LOCATION) - - self.assertEqual( - job_ref._to_api_repr(), - { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - }, - ) - - def test_from_api_repr(self): - api_repr = { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": self.LOCATION, - } - - job_ref = self._get_target_class()._from_api_repr(api_repr) - - self.assertEqual(job_ref.job_id, self.JOB_ID) - self.assertEqual(job_ref.project, self.PROJECT) - self.assertEqual(job_ref.location, self.LOCATION) - - -class Test_AsyncJob(unittest.TestCase): - JOB_ID = "job-id" - PROJECT = "test-project-123" - LOCATION = "us-central" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._AsyncJob - - def _make_one(self, job_id, client): - return self._get_target_class()(job_id, client) - - def _make_derived_class(self): - class Derived(self._get_target_class()): - _JOB_TYPE = "derived" - - return Derived - - def _make_derived(self, job_id, client): - return self._make_derived_class()(job_id, client) - - @staticmethod - def _job_reference(job_id, project, location): - from google.cloud.bigquery import job - - return job._JobReference(job_id, project, location) - - def test_ctor_w_bare_job_id(self): - import threading - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertIsNone(job.location) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - {"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}}, - ) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test_ctor_w_job_ref(self): - import threading - - other_project = "other-project-234" - client = _make_client(project=other_project) - job_ref = self._job_reference(self.JOB_ID, self.PROJECT, self.LOCATION) - job = self._make_one(job_ref, client) - - self.assertEqual(job.job_id, self.JOB_ID) - self.assertEqual(job.project, self.PROJECT) - self.assertEqual(job.location, self.LOCATION) - self.assertIs(job._client, client) - self.assertEqual( - job._properties, - { - "jobReference": { - "projectId": self.PROJECT, - "location": self.LOCATION, - "jobId": self.JOB_ID, - } - }, - ) - self.assertFalse(job._result_set) - self.assertIsInstance(job._completion_lock, type(threading.Lock())) - self.assertEqual( - job.path, "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - ) - - def test__require_client_w_none(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(None), client) - - def test__require_client_w_other(self): - client = _make_client(project=self.PROJECT) - other = object() - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job._require_client(other), other) - - def test_job_type(self): - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - - self.assertEqual(derived.job_type, "derived") - - def test_parent_job_id(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.parent_job_id) - job._properties["statistics"] = {"parentJobId": "parent-job-123"} - self.assertEqual(job.parent_job_id, "parent-job-123") - - def test_script_statistics(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertIsNone(job.script_statistics) - job._properties["statistics"] = { - "scriptStatistics": { - "evaluationKind": "EXPRESSION", - "stackFrames": [ - { - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - } - ], - } - } - script_stats = job.script_statistics - self.assertEqual(script_stats.evaluation_kind, "EXPRESSION") - stack_frames = script_stats.stack_frames - self.assertEqual(len(stack_frames), 1) - stack_frame = stack_frames[0] - self.assertIsNone(stack_frame.procedure_id) - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - - def test_num_child_jobs(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertEqual(job.num_child_jobs, 0) - job._properties["statistics"] = {"numChildJobs": "17"} - self.assertEqual(job.num_child_jobs, 17) - - def test_labels_miss(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertEqual(job.labels, {}) - - def test_labels_update_in_place(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - labels = job.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["labels"] = labels - self.assertEqual(job.labels, labels) - - def test_etag(self): - etag = "ETAG-123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.etag) - job._properties["etag"] = etag - self.assertEqual(job.etag, etag) - - def test_self_link(self): - self_link = "https://api.example.com/123" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.self_link) - job._properties["selfLink"] = self_link - self.assertEqual(job.self_link, self_link) - - def test_user_email(self): - user_email = "user@example.com" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.user_email) - job._properties["user_email"] = user_email - self.assertEqual(job.user_email, user_email) - - @staticmethod - def _datetime_and_millis(): - import datetime - import pytz - from google.cloud._helpers import _millis - - now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision - ) - return now, _millis(now) - - def test_created(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.created) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.created) - stats["creationTime"] = millis - self.assertEqual(job.created, now) - - def test_started(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.started) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.started) - stats["startTime"] = millis - self.assertEqual(job.started, now) - - def test_ended(self): - now, millis = self._datetime_and_millis() - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.ended) - stats = job._properties["statistics"] = {} - self.assertIsNone(job.ended) - stats["endTime"] = millis - self.assertEqual(job.ended, now) - - def test__job_statistics(self): - statistics = {"foo": "bar"} - client = _make_client(project=self.PROJECT) - derived = self._make_derived(self.JOB_ID, client) - self.assertEqual(derived._job_statistics(), {}) - stats = derived._properties["statistics"] = {} - self.assertEqual(derived._job_statistics(), {}) - stats["derived"] = statistics - self.assertEqual(derived._job_statistics(), statistics) - - def test_error_result(self): - error_result = { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.error_result) - status = job._properties["status"] = {} - self.assertIsNone(job.error_result) - status["errorResult"] = error_result - self.assertEqual(job.error_result, error_result) - - def test_errors(self): - errors = [ - { - "debugInfo": "DEBUG INFO", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.errors) - status = job._properties["status"] = {} - self.assertIsNone(job.errors) - status["errors"] = errors - self.assertEqual(job.errors, errors) - - def test_state(self): - state = "STATE" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - self.assertIsNone(job.state) - status = job._properties["status"] = {} - self.assertIsNone(job.state) - status["state"] = state - self.assertEqual(job.state, state) - - def _set_properties_job(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._set_future_result = mock.Mock() - job._properties = { - "jobReference": job._properties["jobReference"], - "foo": "bar", - } - return job - - def test__set_properties_no_stats(self): - config = {"test": True} - resource = {"configuration": config} - job = self._set_properties_job() - - job._set_properties(resource) - - self.assertEqual(job._properties, resource) - - def test__set_properties_w_creation_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"creationTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["creationTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_start_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"startTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["startTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__set_properties_w_end_time(self): - now, millis = self._datetime_and_millis() - config = {"test": True} - stats = {"endTime": str(millis)} - resource = {"configuration": config, "statistics": stats} - job = self._set_properties_job() - - job._set_properties(resource) - - cleaned = copy.deepcopy(resource) - cleaned["statistics"]["endTime"] = float(millis) - self.assertEqual(job._properties, cleaned) - - def test__check_resource_config_missing_job_ref(self): - resource = {} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_job_id(self): - resource = {"jobReference": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_configuration(self): - resource = {"jobReference": {"jobId": self.JOB_ID}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_missing_config_type(self): - resource = {"jobReference": {"jobId": self.JOB_ID}, "configuration": {}} - klass = self._make_derived_class() - - with self.assertRaises(KeyError): - klass._check_resource_config(resource) - - def test__check_resource_config_ok(self): - derived_config = {"foo": "bar"} - resource = { - "jobReference": {"jobId": self.JOB_ID}, - "configuration": {"derived": derived_config}, - } - klass = self._make_derived_class() - - # Should not throw. - klass._check_resource_config(resource) - - def test__build_resource(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job._build_resource() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test_to_api_repr(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - resource = job.to_api_repr() - assert resource["jobReference"]["jobId"] == self.JOB_ID - - def test__begin_already(self): - job = self._set_properties_job() - job._properties["status"] = {"state": "WHATEVER"} - - with self.assertRaises(ValueError): - job._begin() - - def test__begin_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test__begin_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - builder = job.to_api_repr = mock.Mock() - builder.return_value = resource - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - path = "/projects/{}/jobs".format(self.PROJECT) - job._begin(client=client, retry=retry, timeout=7.5) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.begin", - span_attributes={"path": path}, - job_ref=job, - method="POST", - path=path, - data=resource, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_exists_defaults_miss(self): - from google.cloud.exceptions import NotFound - from google.cloud.bigquery.retry import DEFAULT_RETRY - - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.side_effect = NotFound("testing") - self.assertFalse(job.exists()) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id", "location": self.LOCATION}, - timeout=None, - ) - - def test_exists_explicit_hit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - self.assertTrue(job.exists(client=client, retry=retry)) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.exists", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"fields": "id"}, - timeout=None, - ) - - def test_exists_w_timeout(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - PATH = "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - job = self._set_properties_job() - call_api = job._client._call_api = mock.Mock() - job.exists(timeout=7.5) - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.exists", - span_attributes={"path": PATH}, - job_ref=job, - method="GET", - path=PATH, - query_params={"fields": "id"}, - timeout=7.5, - ) - - def test_reload_defaults(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - call_api = job._client._call_api = mock.Mock() - call_api.return_value = resource - job.reload() - - call_api.assert_called_once_with( - DEFAULT_RETRY, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_reload_explicit(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - job = self._set_properties_job() - client = _make_client(project=other_project) - call_api = client._call_api = mock.Mock() - call_api.return_value = resource - retry = DEFAULT_RETRY.with_deadline(1) - job.reload(client=client, retry=retry, timeout=4.2) - - call_api.assert_called_once_with( - retry, - span_name="BigQuery.job.reload", - span_attributes={ - "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID) - }, - job_ref=job, - method="GET", - path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=4.2, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_defaults(self): - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - job._properties["jobReference"]["location"] = self.LOCATION - connection = job._client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel()) - - final_attributes.assert_called() - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={"location": self.LOCATION}, - timeout=None, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_explicit(self): - other_project = "other-project-234" - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - client = _make_client(project=other_project) - connection = client._connection = _make_connection(response) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.cancel(client=client, timeout=7.5)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - connection.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID), - query_params={}, - timeout=7.5, - ) - self.assertEqual(job._properties, resource) - - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, resource) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - - def test__set_future_result_wo_done(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_result_set(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - job._result_set = True - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_w_error(self): - from google.cloud.exceptions import NotFound - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "notFound", "message": "testing"}, - } - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_called_once() - args, kw = set_exception.call_args - (exception,) = args - self.assertIsInstance(exception, NotFound) - self.assertEqual(exception.message, "testing") - self.assertEqual(kw, {}) - set_result.assert_not_called() - - def test__set_future_result_w_done_wo_result_set_wo_error(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - set_exception = job.set_exception = mock.Mock() - set_result = job.set_result = mock.Mock() - - job._set_future_result() - - set_exception.assert_not_called() - set_result.assert_called_once_with(job) - - def test_done_defaults_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - - self.assertFalse(job.done()) - - reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None) - - def test_done_explicit_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - reload_ = job.reload = mock.Mock() - retry = DEFAULT_RETRY.with_deadline(1) - - self.assertFalse(job.done(retry=retry, timeout=7.5)) - - reload_.assert_called_once_with(retry=retry, timeout=7.5) - - def test_done_already(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"state": "DONE"} - - self.assertTrue(job.done()) - - def test_result_default_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - _make_retriable_exception(), - begun_job_resource, - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - - self.assertIs(job.result(), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_w_retry_wo_state(self): - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True - ) - conn = _make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - - def test_result_explicit_w_state(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) - # Use _set_properties() instead of directly modifying _properties so - # that the result state is set properly. - job_resource = job._properties - job_resource["status"] = {"state": "DONE"} - job._set_properties(job_resource) - timeout = 1 - - self.assertIs(job.result(timeout=timeout), job) - - conn.api_request.assert_not_called() - - def test_cancelled_wo_error_result(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_not_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "other"}} - - self.assertFalse(job.cancelled()) - - def test_cancelled_w_error_result_w_stopped(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, client) - job._properties["status"] = {"errorResult": {"reason": "stopped"}} - - self.assertTrue(job.cancelled()) - - -class Test_JobConfig(unittest.TestCase): - JOB_TYPE = "testing" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery import job - - return job._JobConfig - - def _make_one(self, job_type=JOB_TYPE): - return self._get_target_class()(job_type) - - def test_ctor(self): - job_config = self._make_one() - self.assertEqual(job_config._job_type, self.JOB_TYPE) - self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) - - def test_fill_from_default(self): - from google.cloud.bigquery import QueryJobConfig - - job_config = QueryJobConfig() - job_config.dry_run = True - job_config.maximum_bytes_billed = 1000 - - default_job_config = QueryJobConfig() - default_job_config.use_query_cache = True - default_job_config.maximum_bytes_billed = 2000 - - final_job_config = job_config._fill_from_default(default_job_config) - self.assertTrue(final_job_config.dry_run) - self.assertTrue(final_job_config.use_query_cache) - self.assertEqual(final_job_config.maximum_bytes_billed, 1000) - - def test_fill_from_default_conflict(self): - from google.cloud.bigquery import QueryJobConfig - - basic_job_config = QueryJobConfig() - conflicting_job_config = self._make_one("conflicting_job_type") - self.assertNotEqual( - basic_job_config._job_type, conflicting_job_config._job_type - ) - - with self.assertRaises(TypeError): - basic_job_config._fill_from_default(conflicting_job_config) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_wo_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - self.assertIs(job_config._get_sub_prop(key), _get_sub_prop.return_value) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=None - ) - - @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") - def test__get_sub_prop_w_default(self, _get_sub_prop): - job_config = self._make_one() - key = "key" - default = "default" - self.assertIs( - job_config._get_sub_prop(key, default=default), _get_sub_prop.return_value - ) - _get_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], default=default - ) - - @mock.patch("google.cloud.bigquery._helpers._set_sub_prop") - def test__set_sub_prop(self, _set_sub_prop): - job_config = self._make_one() - key = "key" - value = "value" - job_config._set_sub_prop(key, value) - _set_sub_prop.assert_called_once_with( - job_config._properties, [self.JOB_TYPE, key], value - ) - - def test_to_api_repr(self): - job_config = self._make_one() - expected = job_config._properties = {self.JOB_TYPE: {"foo": "bar"}} - found = job_config.to_api_repr() - self.assertEqual(found, expected) - self.assertIsNot(found, expected) # copied - - # 'from_api_repr' cannot be tested on '_JobConfig', because it presumes - # the ctor can be called w/o arguments - - def test_labels_miss(self): - job_config = self._make_one() - self.assertEqual(job_config.labels, {}) - - def test_labels_update_in_place(self): - job_config = self._make_one() - labels = job_config.labels - labels["foo"] = "bar" # update in place - self.assertEqual(job_config.labels, {"foo": "bar"}) - - def test_labels_hit(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config._properties["labels"] = labels - self.assertEqual(job_config.labels, labels) - - def test_labels_setter_invalid(self): - labels = object() - job_config = self._make_one() - with self.assertRaises(ValueError): - job_config.labels = labels - - def test_labels_setter(self): - labels = {"foo": "bar"} - job_config = self._make_one() - job_config.labels = labels - self.assertEqual(job_config._properties["labels"], labels) - - -class _Base(object): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.table import TableReference - - ENDPOINT = "https://bigquery.googleapis.com" - PROJECT = "project" - SOURCE1 = "http://example.com/source1.csv" - DS_ID = "dataset_id" - DS_REF = DatasetReference(PROJECT, DS_ID) - TABLE_ID = "table_id" - TABLE_REF = TableReference(DS_REF, TABLE_ID) - JOB_ID = "JOB_ID" - KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def _setUpConstants(self): - import datetime - from google.cloud._helpers import UTC - - self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) - self.ETAG = "ETAG" - self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( - self.ENDPOINT, self.PROJECT, self.JOB_ID - ) - self.USER_EMAIL = "phred@example.com" - - def _table_ref(self, table_id): - from google.cloud.bigquery.table import TableReference - - return TableReference(self.DS_REF, table_id) - - def _make_resource(self, started=False, ended=False): - self._setUpConstants() - return _make_job_resource( - creation_time_ms=int(self.WHEN_TS * 1000), - started_time_ms=int(self.WHEN_TS * 1000), - ended_time_ms=int(self.WHEN_TS * 1000) + 1000000, - started=started, - ended=ended, - etag=self.ETAG, - endpoint=self.ENDPOINT, - job_type=self.JOB_TYPE, - job_id=self.JOB_ID, - project_id=self.PROJECT, - user_email=self.USER_EMAIL, - ) - - def _verifyInitialReadonlyProperties(self, job): - # root elements of resource - self.assertIsNone(job.etag) - self.assertIsNone(job.self_link) - self.assertIsNone(job.user_email) - - # derived from resource['statistics'] - self.assertIsNone(job.created) - self.assertIsNone(job.started) - self.assertIsNone(job.ended) - - # derived from resource['status'] - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - def _verifyReadonlyResourceProperties(self, job, resource): - from datetime import timedelta - - statistics = resource.get("statistics", {}) - - if "creationTime" in statistics: - self.assertEqual(job.created, self.WHEN) - else: - self.assertIsNone(job.created) - - if "startTime" in statistics: - self.assertEqual(job.started, self.WHEN) - else: - self.assertIsNone(job.started) - - if "endTime" in statistics: - self.assertEqual(job.ended, self.WHEN + timedelta(seconds=1000)) - else: - self.assertIsNone(job.ended) - - if "etag" in resource: - self.assertEqual(job.etag, self.ETAG) - else: - self.assertIsNone(job.etag) - - if "selfLink" in resource: - self.assertEqual(job.self_link, self.RESOURCE_URL) - else: - self.assertIsNone(job.self_link) - - if "user_email" in resource: - self.assertEqual(job.user_email, self.USER_EMAIL) - else: - self.assertIsNone(job.user_email) - - -class TestLoadJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJobConfig - - return LoadJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()( - allow_jagged_rows=True, allow_quoted_newlines=True - ) - - self.assertTrue(config.allow_jagged_rows) - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_jagged_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_jagged_rows) - - def test_allow_jagged_rows_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowJaggedRows"] = True - self.assertTrue(config.allow_jagged_rows) - - def test_allow_jagged_rows_setter(self): - config = self._get_target_class()() - config.allow_jagged_rows = True - self.assertTrue(config._properties["load"]["allowJaggedRows"]) - - def test_allow_quoted_newlines_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_hit(self): - config = self._get_target_class()() - config._properties["load"]["allowQuotedNewlines"] = True - self.assertTrue(config.allow_quoted_newlines) - - def test_allow_quoted_newlines_setter(self): - config = self._get_target_class()() - config.allow_quoted_newlines = True - self.assertTrue(config._properties["load"]["allowQuotedNewlines"]) - - def test_autodetect_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.autodetect) - - def test_autodetect_hit(self): - config = self._get_target_class()() - config._properties["load"]["autodetect"] = True - self.assertTrue(config.autodetect) - - def test_autodetect_setter(self): - config = self._get_target_class()() - config.autodetect = True - self.assertTrue(config._properties["load"]["autodetect"]) - - def test_clustering_fields_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.clustering_fields) - - def test_clustering_fields_hit(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - self.assertEqual(config.clustering_fields, fields) - - def test_clustering_fields_setter(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config._properties["load"]["clustering"], {"fields": fields}) - - def test_clustering_fields_setter_w_none(self): - config = self._get_target_class()() - fields = ["email", "postal_code"] - config._properties["load"]["clustering"] = {"fields": fields} - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - self.assertNotIn("clustering", config._properties["load"]) - - def test_create_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.create_disposition) - - def test_create_disposition_hit(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config._properties["load"]["createDisposition"] = disposition - self.assertEqual(config.create_disposition, disposition) - - def test_create_disposition_setter(self): - from google.cloud.bigquery.job import CreateDisposition - - disposition = CreateDisposition.CREATE_IF_NEEDED - config = self._get_target_class()() - config.create_disposition = disposition - self.assertEqual(config._properties["load"]["createDisposition"], disposition) - - def test_destination_encryption_configuration_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_encryption_configuration) - - def test_destination_encryption_configuration_hit(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - self.assertEqual( - config.destination_encryption_configuration, encryption_configuration - ) - - def test_destination_encryption_configuration_setter(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - kms_key_name = "kms-key-name" - encryption_configuration = EncryptionConfiguration(kms_key_name) - config = self._get_target_class()() - config.destination_encryption_configuration = encryption_configuration - expected = {"kmsKeyName": kms_key_name} - self.assertEqual( - config._properties["load"]["destinationEncryptionConfiguration"], expected - ) - - def test_destination_encryption_configuration_setter_w_none(self): - kms_key_name = "kms-key-name" - config = self._get_target_class()() - config._properties["load"]["destinationEncryptionConfiguration"] = { - "kmsKeyName": kms_key_name - } - config.destination_encryption_configuration = None - self.assertIsNone(config.destination_encryption_configuration) - self.assertNotIn( - "destinationEncryptionConfiguration", config._properties["load"] - ) - - def test_destination_table_description_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_description) - - def test_destination_table_description_hit(self): - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - self.assertEqual(config.destination_table_description, description) - - def test_destination_table_description_setter(self): - description = "Description" - config = self._get_target_class()() - config.destination_table_description = description - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_setter_w_fn_already(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - config.destination_table_description = description - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_description_w_none(self): - description = "Description" - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_description = None - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.destination_table_friendly_name) - - def test_destination_table_friendly_name_hit(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_destination_table_friendly_name_setter(self): - friendly_name = "Friendly Name" - config = self._get_target_class()() - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_setter_w_descr_already(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description - } - config.destination_table_friendly_name = friendly_name - expected = {"friendlyName": friendly_name, "description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_destination_table_friendly_name_w_none(self): - friendly_name = "Friendly Name" - description = "Description" - config = self._get_target_class()() - config._properties["load"]["destinationTableProperties"] = { - "description": description, - "friendlyName": friendly_name, - } - config.destination_table_friendly_name = None - expected = {"description": description} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - - def test_encoding_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.encoding) - - def test_encoding_hit(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config._properties["load"]["encoding"] = encoding - self.assertEqual(config.encoding, encoding) - - def test_encoding_setter(self): - from google.cloud.bigquery.job import Encoding - - encoding = Encoding.UTF_8 - config = self._get_target_class()() - config.encoding = encoding - self.assertEqual(config._properties["load"]["encoding"], encoding) - - def test_field_delimiter_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.field_delimiter) - - def test_field_delimiter_hit(self): - field_delimiter = "|" - config = self._get_target_class()() - config._properties["load"]["fieldDelimiter"] = field_delimiter - self.assertEqual(config.field_delimiter, field_delimiter) - - def test_field_delimiter_setter(self): - field_delimiter = "|" - config = self._get_target_class()() - config.field_delimiter = field_delimiter - self.assertEqual(config._properties["load"]["fieldDelimiter"], field_delimiter) - - def test_hive_partitioning_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.hive_partitioning) - - def test_hive_partitioning_hit(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - config = self._get_target_class()() - config._properties["load"]["hivePartitioningOptions"] = { - "sourceUriPrefix": "http://foo/bar", - "mode": "STRINGS", - } - result = config.hive_partitioning - self.assertIsInstance(result, HivePartitioningOptions) - self.assertEqual(result.source_uri_prefix, "http://foo/bar") - self.assertEqual(result.mode, "STRINGS") - - def test_hive_partitioning_setter(self): - from google.cloud.bigquery.external_config import HivePartitioningOptions - - hive_partitioning = HivePartitioningOptions() - hive_partitioning.source_uri_prefix = "http://foo/bar" - hive_partitioning.mode = "AUTO" - - config = self._get_target_class()() - config.hive_partitioning = hive_partitioning - self.assertEqual( - config._properties["load"]["hivePartitioningOptions"], - {"sourceUriPrefix": "http://foo/bar", "mode": "AUTO"}, - ) - - config.hive_partitioning = None - self.assertIsNone(config._properties["load"]["hivePartitioningOptions"]) - - def test_hive_partitioning_invalid_type(self): - config = self._get_target_class()() - - with self.assertRaises(TypeError): - config.hive_partitioning = {"mode": "AUTO"} - - def test_ignore_unknown_values_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.ignore_unknown_values) - - def test_ignore_unknown_values_hit(self): - config = self._get_target_class()() - config._properties["load"]["ignoreUnknownValues"] = True - self.assertTrue(config.ignore_unknown_values) - - def test_ignore_unknown_values_setter(self): - config = self._get_target_class()() - config.ignore_unknown_values = True - self.assertTrue(config._properties["load"]["ignoreUnknownValues"]) - - def test_max_bad_records_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.max_bad_records) - - def test_max_bad_records_hit(self): - max_bad_records = 13 - config = self._get_target_class()() - config._properties["load"]["maxBadRecords"] = max_bad_records - self.assertEqual(config.max_bad_records, max_bad_records) - - def test_max_bad_records_setter(self): - max_bad_records = 13 - config = self._get_target_class()() - config.max_bad_records = max_bad_records - self.assertEqual(config._properties["load"]["maxBadRecords"], max_bad_records) - - def test_null_marker_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.null_marker) - - def test_null_marker_hit(self): - null_marker = "XXX" - config = self._get_target_class()() - config._properties["load"]["nullMarker"] = null_marker - self.assertEqual(config.null_marker, null_marker) - - def test_null_marker_setter(self): - null_marker = "XXX" - config = self._get_target_class()() - config.null_marker = null_marker - self.assertEqual(config._properties["load"]["nullMarker"], null_marker) - - def test_quote_character_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.quote_character) - - def test_quote_character_hit(self): - quote_character = "'" - config = self._get_target_class()() - config._properties["load"]["quote"] = quote_character - self.assertEqual(config.quote_character, quote_character) - - def test_quote_character_setter(self): - quote_character = "'" - config = self._get_target_class()() - config.quote_character = quote_character - self.assertEqual(config._properties["load"]["quote"], quote_character) - - def test_schema_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema) - - def test_schema_hit(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - all_props_repr = { - "mode": "REQUIRED", - "name": "foo", - "type": "INTEGER", - "description": "Foo", - } - minimal_repr = {"name": "bar", "type": "STRING"} - config._properties["load"]["schema"] = { - "fields": [all_props_repr, minimal_repr] - } - all_props, minimal = config.schema - self.assertEqual(all_props, SchemaField.from_api_repr(all_props_repr)) - self.assertEqual(minimal, SchemaField.from_api_repr(minimal_repr)) - - def test_schema_setter_fields(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_valid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - - full_name_repr = { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - age_repr = { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - } - self.assertEqual( - config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} - ) - - def test_schema_setter_invalid_mappings_list(self): - config = self._get_target_class()() - - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, - ] - - with self.assertRaises(Exception): - config.schema = schema - - def test_schema_setter_unsetting_schema(self): - from google.cloud.bigquery.schema import SchemaField - - config = self._get_target_class()() - config._properties["load"]["schema"] = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] - - config.schema = None - self.assertNotIn("schema", config._properties["load"]) - config.schema = None # no error, idempotent operation - - def test_schema_update_options_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.schema_update_options) - - def test_schema_update_options_hit(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config._properties["load"]["schemaUpdateOptions"] = options - self.assertEqual(config.schema_update_options, options) - - def test_schema_update_options_setter(self): - from google.cloud.bigquery.job import SchemaUpdateOption - - options = [ - SchemaUpdateOption.ALLOW_FIELD_ADDITION, - SchemaUpdateOption.ALLOW_FIELD_RELAXATION, - ] - config = self._get_target_class()() - config.schema_update_options = options - self.assertEqual(config._properties["load"]["schemaUpdateOptions"], options) - - def test_skip_leading_rows_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.skip_leading_rows) - - def test_skip_leading_rows_hit_w_str(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = str(skip_leading_rows) - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_hit_w_integer(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config._properties["load"]["skipLeadingRows"] = skip_leading_rows - self.assertEqual(config.skip_leading_rows, skip_leading_rows) - - def test_skip_leading_rows_setter(self): - skip_leading_rows = 1 - config = self._get_target_class()() - config.skip_leading_rows = skip_leading_rows - self.assertEqual( - config._properties["load"]["skipLeadingRows"], str(skip_leading_rows) - ) - - def test_source_format_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.source_format) - - def test_source_format_hit(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config._properties["load"]["sourceFormat"] = source_format - self.assertEqual(config.source_format, source_format) - - def test_source_format_setter(self): - from google.cloud.bigquery.job import SourceFormat - - source_format = SourceFormat.CSV - config = self._get_target_class()() - config.source_format = source_format - self.assertEqual(config._properties["load"]["sourceFormat"], source_format) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["load"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning_miss(self): - config = self._get_target_class()() - self.assertIsNone(config.time_partitioning) - - def test_time_partitioning_hit(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - with warnings.catch_warnings(record=True) as warned: - expected = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - self.assertEqual(config.time_partitioning, expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter(self): - from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - - with warnings.catch_warnings(record=True) as warned: - time_partitioning = TimePartitioning( - type_=TimePartitioningType.DAY, - field=field, - expiration_ms=year_ms, - require_partition_filter=False, - ) - - config = self._get_target_class()() - config.time_partitioning = time_partitioning - expected = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - self.assertEqual(config._properties["load"]["timePartitioning"], expected) - - assert len(warned) == 1 - warning = warned[0] - assert "TimePartitioning.require_partition_filter" in str(warning) - - def test_time_partitioning_setter_w_none(self): - from google.cloud.bigquery.table import TimePartitioningType - - field = "creation_date" - year_ms = 86400 * 1000 * 365 - config = self._get_target_class()() - config._properties["load"]["timePartitioning"] = { - "type": TimePartitioningType.DAY, - "field": field, - "expirationMs": str(year_ms), - "requirePartitionFilter": False, - } - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - self.assertNotIn("timePartitioning", config._properties["load"]) - - def test_use_avro_logical_types(self): - config = self._get_target_class()() - self.assertIsNone(config.use_avro_logical_types) - - def test_use_avro_logical_types_setter(self): - config = self._get_target_class()() - config.use_avro_logical_types = True - self.assertTrue(config._properties["load"]["useAvroLogicalTypes"]) - - def test_write_disposition_missing(self): - config = self._get_target_class()() - self.assertIsNone(config.write_disposition) - - def test_write_disposition_hit(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config._properties["load"]["writeDisposition"] = write_disposition - self.assertEqual(config.write_disposition, write_disposition) - - def test_write_disposition_setter(self): - from google.cloud.bigquery.job import WriteDisposition - - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()() - config.write_disposition = write_disposition - self.assertEqual( - config._properties["load"]["writeDisposition"], write_disposition - ) - - -class TestLoadJob(unittest.TestCase, _Base): - JOB_TYPE = "load" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import LoadJob - - return LoadJob - - def _setUpConstants(self): - super(TestLoadJob, self)._setUpConstants() - self.INPUT_FILES = 2 - self.INPUT_BYTES = 12345 - self.OUTPUT_BYTES = 23456 - self.OUTPUT_ROWS = 345 - - def _make_resource(self, started=False, ended=False): - resource = super(TestLoadJob, self)._make_resource(started, ended) - config = resource["configuration"]["load"] - config["sourceUris"] = [self.SOURCE1] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - - if ended: - resource["status"] = {"state": "DONE"} - resource["statistics"]["load"]["inputFiles"] = self.INPUT_FILES - resource["statistics"]["load"]["inputFileBytes"] = self.INPUT_BYTES - resource["statistics"]["load"]["outputBytes"] = self.OUTPUT_BYTES - resource["statistics"]["load"]["outputRows"] = self.OUTPUT_ROWS - - return resource - - def _verifyBooleanConfigProperties(self, job, config): - if "allowJaggedRows" in config: - self.assertEqual(job.allow_jagged_rows, config["allowJaggedRows"]) - else: - self.assertIsNone(job.allow_jagged_rows) - if "allowQuotedNewlines" in config: - self.assertEqual(job.allow_quoted_newlines, config["allowQuotedNewlines"]) - else: - self.assertIsNone(job.allow_quoted_newlines) - if "autodetect" in config: - self.assertEqual(job.autodetect, config["autodetect"]) - else: - self.assertIsNone(job.autodetect) - if "ignoreUnknownValues" in config: - self.assertEqual(job.ignore_unknown_values, config["ignoreUnknownValues"]) - else: - self.assertIsNone(job.ignore_unknown_values) - if "useAvroLogicalTypes" in config: - self.assertEqual(job.use_avro_logical_types, config["useAvroLogicalTypes"]) - else: - self.assertIsNone(job.use_avro_logical_types) - - def _verifyEnumConfigProperties(self, job, config): - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "encoding" in config: - self.assertEqual(job.encoding, config["encoding"]) - else: - self.assertIsNone(job.encoding) - if "sourceFormat" in config: - self.assertEqual(job.source_format, config["sourceFormat"]) - else: - self.assertIsNone(job.source_format) - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "schemaUpdateOptions" in config: - self.assertEqual(job.schema_update_options, config["schemaUpdateOptions"]) - else: - self.assertIsNone(job.schema_update_options) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("load") - - self._verifyBooleanConfigProperties(job, config) - self._verifyEnumConfigProperties(job, config) - - self.assertEqual(job.source_uris, config["sourceUris"]) - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - if "maxBadRecords" in config: - self.assertEqual(job.max_bad_records, config["maxBadRecords"]) - else: - self.assertIsNone(job.max_bad_records) - if "nullMarker" in config: - self.assertEqual(job.null_marker, config["nullMarker"]) - else: - self.assertIsNone(job.null_marker) - if "quote" in config: - self.assertEqual(job.quote_character, config["quote"]) - else: - self.assertIsNone(job.quote_character) - if "skipLeadingRows" in config: - self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"]) - else: - self.assertIsNone(job.skip_leading_rows) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(job.destination, self.TABLE_REF) - self.assertEqual(list(job.source_uris), [self.SOURCE1]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # derived from resource['statistics']['load'] - self.assertIsNone(job.input_file_bytes) - self.assertIsNone(job.input_files) - self.assertIsNone(job.output_bytes) - self.assertIsNone(job.output_rows) - - # set/read from resource['configuration']['load'] - self.assertIsNone(job.schema) - self.assertIsNone(job.allow_jagged_rows) - self.assertIsNone(job.allow_quoted_newlines) - self.assertIsNone(job.autodetect) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.encoding) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.ignore_unknown_values) - self.assertIsNone(job.max_bad_records) - self.assertIsNone(job.null_marker) - self.assertIsNone(job.quote_character) - self.assertIsNone(job.skip_leading_rows) - self.assertIsNone(job.source_format) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.destination_table_description) - self.assertIsNone(job.destination_table_friendly_name) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.use_avro_logical_types) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_config(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.job import LoadJobConfig - - client = _make_client(project=self.PROJECT) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - self.assertEqual(job.schema, [full_name, age]) - config.destination_table_description = "Description" - expected = {"description": "Description"} - self.assertEqual( - config._properties["load"]["destinationTableProperties"], expected - ) - friendly_name = "Friendly Name" - config._properties["load"]["destinationTableProperties"] = { - "friendlyName": friendly_name - } - self.assertEqual(config.destination_table_friendly_name, friendly_name) - - def test_ctor_w_job_reference(self): - from google.cloud.bigquery import job - - client = _make_client(project=self.PROJECT) - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - self.assertEqual(load_job.project, "alternative-project") - self.assertEqual(load_job.location, "US") - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - self.assertTrue(job.done()) - - def test_result(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIs(result, job) - - def test_result_invokes_begin(self): - begun_resource = self._make_resource() - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, done_resource) - client = _make_client(self.PROJECT) - client._connection = connection - - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 2) - begin_request, reload_request = connection.api_request.call_args_list - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_schema_setter_non_list(self): - from google.cloud.bigquery.job import LoadJobConfig - - config = LoadJobConfig() - with self.assertRaises(TypeError): - config.schema = object() - - def test_schema_setter_invalid_field(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): - config.schema = [full_name, object()] - - def test_schema_setter(self): - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.schema import SchemaField - - config = LoadJobConfig() - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config.schema = [full_name, age] - self.assertEqual(config.schema, [full_name, age]) - - def test_props_set_by_server(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - - CREATED = datetime.datetime(2015, 8, 11, 12, 13, 22, tzinfo=UTC) - STARTED = datetime.datetime(2015, 8, 11, 13, 47, 15, tzinfo=UTC) - ENDED = datetime.datetime(2015, 8, 11, 14, 47, 15, tzinfo=UTC) - FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) - URL = "http://example.com/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - EMAIL = "phred@example.com" - ERROR_RESULT = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "REASON", - } - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["etag"] = "ETAG" - job._properties["id"] = FULL_JOB_ID - job._properties["selfLink"] = URL - job._properties["user_email"] = EMAIL - - statistics = job._properties["statistics"] = {} - statistics["creationTime"] = _millis(CREATED) - statistics["startTime"] = _millis(STARTED) - statistics["endTime"] = _millis(ENDED) - - self.assertEqual(job.etag, "ETAG") - self.assertEqual(job.self_link, URL) - self.assertEqual(job.user_email, EMAIL) - - self.assertEqual(job.created, CREATED) - self.assertEqual(job.started, STARTED) - self.assertEqual(job.ended, ENDED) - - # running jobs have no load stats not yet set. - self.assertIsNone(job.output_bytes) - - load_stats = statistics["load"] = {} - load_stats["inputFileBytes"] = 12345 - load_stats["inputFiles"] = 1 - load_stats["outputBytes"] = 23456 - load_stats["outputRows"] = 345 - - self.assertEqual(job.input_file_bytes, 12345) - self.assertEqual(job.input_files, 1) - self.assertEqual(job.output_bytes, 23456) - self.assertEqual(job.output_rows, 345) - - status = job._properties["status"] = {} - - self.assertIsNone(job.error_result) - self.assertIsNone(job.errors) - self.assertIsNone(job.state) - - status["errorResult"] = ERROR_RESULT - status["errors"] = [ERROR_RESULT] - status["state"] = "STATE" - - self.assertEqual(job.error_result, ERROR_RESULT) - self.assertEqual(job.errors, [ERROR_RESULT]) - self.assertEqual(job.state, "STATE") - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.FULL_JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - load_config = RESOURCE["configuration"]["load"] - load_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_already_running(self): - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - job._properties["status"] = {"state": "RUNNING"} - - with self.assertRaises(ValueError): - job._begin() - - def test_begin_w_bound_client(self): - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - path = "/projects/{}/jobs".format(self.PROJECT) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=path, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_autodetect(self): - from google.cloud.bigquery.job import LoadJobConfig - - path = "/projects/{}/jobs".format(self.PROJECT) - resource = self._make_resource() - resource["configuration"]["load"]["autodetect"] = True - # Ensure None for missing server-set props - del resource["statistics"]["creationTime"] - del resource["etag"] - del resource["selfLink"] - del resource["user_email"] - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - config = LoadJobConfig() - config.autodetect = True - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": path}, client, job) - - sent = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "load": { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "autodetect": True, - } - }, - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=sent, timeout=None - ) - self._verifyResourceProperties(job, resource) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - from google.cloud.bigquery.schema import SchemaField - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - LOAD_CONFIGURATION = { - "sourceUris": [self.SOURCE1], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "allowJaggedRows": True, - "allowQuotedNewlines": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "encoding": "ISO-8559-1", - "fieldDelimiter": "|", - "ignoreUnknownValues": True, - "maxBadRecords": 100, - "nullMarker": r"\N", - "quote": "'", - "skipLeadingRows": "1", - "sourceFormat": "CSV", - "useAvroLogicalTypes": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ] - }, - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], - } - RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - age = SchemaField("age", "INTEGER", mode="REQUIRED") - config = LoadJobConfig() - config.schema = [full_name, age] - job = self._make_one( - self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1, config - ) - config.allow_jagged_rows = True - config.allow_quoted_newlines = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.encoding = "ISO-8559-1" - config.field_delimiter = "|" - config.ignore_unknown_values = True - config.max_bad_records = 100 - config.null_marker = r"\N" - config.quote_character = "'" - config.skip_leading_rows = 1 - config.source_format = "CSV" - config.use_avro_logical_types = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - self.assertEqual(len(conn2.api_request.call_args_list), 1) - req = conn2.api_request.call_args_list[0] - self.assertEqual(req[1]["method"], "POST") - self.assertEqual(req[1]["path"], PATH) - SENT = { - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"load": LOAD_CONFIGURATION}, - } - self.maxDiff = None - self.assertEqual(req[1]["data"], SENT) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource() - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job._begin() - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs"}, client, load_job - ) - - conn.api_request.assert_called_once() - _, request = conn.api_request.call_args - self.assertEqual(request["method"], "POST") - self.assertEqual(request["path"], "/projects/alternative-project/jobs") - self.assertEqual( - request["data"]["jobReference"]["projectId"], "alternative-project" - ) - self.assertEqual(request["data"]["jobReference"]["location"], "US") - self.assertEqual(request["data"]["jobReference"]["jobId"], self.JOB_ID) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client, - job, - ) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with( - {"path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)}, - client2, - job, - ) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_miss_w_job_reference(self): - from google.cloud.bigquery import job - - job_ref = job._JobReference("my-job-id", "other-project", "US") - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(load_job.exists()) - - final_attributes.assert_called_with( - {"path": "/projects/other-project/jobs/my-job-id"}, client, load_job - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs/my-job-id", - query_params={"fields": "id", "location": "US"}, - timeout=None, - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection(resource) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.reload() - - final_attributes.assert_called_with( - {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)}, - client, - load_job, - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/alternative-project/jobs/{}".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - def test_cancel_w_bound_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn = _make_connection(RESPONSE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s/cancel" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource(ended=True) - RESPONSE = {"job": RESOURCE} - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESPONSE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.cancel(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancel_w_job_reference(self): - from google.cloud.bigquery import job - - resource = self._make_resource(ended=True) - resource["jobReference"]["projectId"] = "alternative-project" - resource["jobReference"]["location"] = "US" - job_ref = job._JobReference(self.JOB_ID, "alternative-project", "US") - conn = _make_connection({"job": resource}) - client = _make_client(project=self.PROJECT, connection=conn) - load_job = self._make_one(job_ref, [self.SOURCE1], self.TABLE_REF, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - load_job.cancel() - - final_attributes.assert_called_with( - { - "path": "/projects/alternative-project/jobs/{}/cancel".format( - self.JOB_ID - ) - }, - client, - load_job, - ) - conn.api_request.assert_called_once_with( - method="POST", - path="/projects/alternative-project/jobs/{}/cancel".format(self.JOB_ID), - query_params={"location": "US"}, - timeout=None, - ) - - -class TestCopyJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "copy" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJobConfig - - return CopyJobConfig - - def test_ctor_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - create_disposition = CreateDisposition.CREATE_NEVER - write_disposition = WriteDisposition.WRITE_TRUNCATE - config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition - ) - - self.assertEqual(config.create_disposition, create_disposition) - self.assertEqual(config.write_disposition, write_disposition) - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "copy": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"copy": {"destinationEncryptionConfiguration": None}} - ) - - -class TestCopyJob(unittest.TestCase, _Base): - JOB_TYPE = "copy" - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import CopyJob - - return CopyJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestCopyJob, self)._make_resource(started, ended) - config = resource["configuration"]["copy"] - config["sourceTables"] = [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ] - config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("copy") - - table_ref = config["destinationTable"] - self.assertEqual(job.destination.project, table_ref["projectId"]) - self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.destination.table_id, table_ref["tableId"]) - - sources = config.get("sourceTables") - if sources is None: - sources = [config["sourceTable"]] - self.assertEqual(len(sources), len(job.sources)) - for table_ref, table in zip(sources, job.sources): - self.assertEqual(table.project, table_ref["projectId"]) - self.assertEqual(table.dataset_id, table_ref["datasetId"]) - self.assertEqual(table.table_id, table_ref["tableId"]) - - if "createDisposition" in config: - self.assertEqual(job.create_disposition, config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - - if "writeDisposition" in config: - self.assertEqual(job.write_disposition, config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - - if "destinationEncryptionConfiguration" in config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - - def test_ctor(self): - client = _make_client(project=self.PROJECT) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - self.assertEqual(job.destination, destination) - self.assertEqual(job.sources, [source]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['copy'] - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.destination_encryption_configuration) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_sourcetable(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_wo_sources(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - with self.assertRaises(KeyError): - _ = job.sources - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - copy_config = RESOURCE["configuration"]["copy"] - copy_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.job import CopyJobConfig - - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - COPY_CONFIGURATION = { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_NEVER, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - RESOURCE["configuration"]["copy"] = COPY_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - config = CopyJobConfig() - config.create_disposition = CreateDisposition.CREATE_NEVER - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job = self._make_one(self.JOB_ID, [source], destination, client1, config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"copy": COPY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source = self._table_ref(self.SOURCE_TABLE) - destination = self._table_ref(self.DESTINATION_TABLE) - job = self._make_one(self.JOB_ID, [source], destination, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestExtractJobConfig(unittest.TestCase, _Base): - JOB_TYPE = "extract" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJobConfig - - return ExtractJobConfig - - def test_ctor_w_properties(self): - config = self._get_target_class()(field_delimiter="\t", print_header=True) - - self.assertEqual(config.field_delimiter, "\t") - self.assertTrue(config.print_header) - - def test_to_api_repr(self): - from google.cloud.bigquery import job - - config = self._make_one() - config.compression = job.Compression.SNAPPY - config.destination_format = job.DestinationFormat.AVRO - config.field_delimiter = "ignored for avro" - config.print_header = False - config._properties["extract"]["someNewField"] = "some-value" - config.use_avro_logical_types = True - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "extract": { - "compression": "SNAPPY", - "destinationFormat": "AVRO", - "fieldDelimiter": "ignored for avro", - "printHeader": False, - "someNewField": "some-value", - "useAvroLogicalTypes": True, - } - }, - ) - - def test_from_api_repr(self): - cls = self._get_target_class() - config = cls.from_api_repr( - { - "extract": { - "compression": "NONE", - "destinationFormat": "CSV", - "fieldDelimiter": "\t", - "printHeader": True, - "someNewField": "some-value", - "useAvroLogicalTypes": False, - } - } - ) - self.assertEqual(config.compression, "NONE") - self.assertEqual(config.destination_format, "CSV") - self.assertEqual(config.field_delimiter, "\t") - self.assertEqual(config.print_header, True) - self.assertEqual(config._properties["extract"]["someNewField"], "some-value") - self.assertEqual(config.use_avro_logical_types, False) - - -class TestExtractJob(unittest.TestCase, _Base): - JOB_TYPE = "extract" - SOURCE_TABLE = "source_table" - DESTINATION_URI = "gs://bucket_name/object_name" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import ExtractJob - - return ExtractJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestExtractJob, self)._make_resource(started, ended) - config = resource["configuration"]["extract"] - config["sourceTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - } - config["destinationUris"] = [self.DESTINATION_URI] - return resource - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - config = resource.get("configuration", {}).get("extract") - - self.assertEqual(job.destination_uris, config["destinationUris"]) - - if "sourceTable" in config: - table_ref = config["sourceTable"] - self.assertEqual(job.source.project, table_ref["projectId"]) - self.assertEqual(job.source.dataset_id, table_ref["datasetId"]) - self.assertEqual(job.source.table_id, table_ref["tableId"]) - else: - model_ref = config["sourceModel"] - self.assertEqual(job.source.project, model_ref["projectId"]) - self.assertEqual(job.source.dataset_id, model_ref["datasetId"]) - self.assertEqual(job.source.model_id, model_ref["modelId"]) - - if "compression" in config: - self.assertEqual(job.compression, config["compression"]) - else: - self.assertIsNone(job.compression) - - if "destinationFormat" in config: - self.assertEqual(job.destination_format, config["destinationFormat"]) - else: - self.assertIsNone(job.destination_format) - - if "fieldDelimiter" in config: - self.assertEqual(job.field_delimiter, config["fieldDelimiter"]) - else: - self.assertIsNone(job.field_delimiter) - - if "printHeader" in config: - self.assertEqual(job.print_header, config["printHeader"]) - else: - self.assertIsNone(job.print_header) - - def test_ctor(self): - from google.cloud.bigquery.table import Table - - client = _make_client(project=self.PROJECT) - source = Table(self.TABLE_REF) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - self.assertEqual(job.source.project, self.PROJECT) - self.assertEqual(job.source.dataset_id, self.DS_ID) - self.assertEqual(job.source.table_id, self.TABLE_ID) - self.assertEqual(job.destination_uris, [self.DESTINATION_URI]) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - # set/read from resource['configuration']['extract'] - self.assertIsNone(job.compression) - self.assertIsNone(job.destination_format) - self.assertIsNone(job.field_delimiter) - self.assertIsNone(job.print_header) - - def test_destination_uri_file_counts(self): - file_counts = 23 - client = _make_client(project=self.PROJECT) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - self.assertIsNone(job.destination_uri_file_counts) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats = statistics["extract"] = {} - self.assertIsNone(job.destination_uri_file_counts) - - extract_stats["destinationUriFileCounts"] = [str(file_counts)] - self.assertEqual(job.destination_uri_file_counts, [file_counts]) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_for_model(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceModel": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": "model_id", - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import Compression - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - extract_config = RESOURCE["configuration"]["extract"] - extract_config["compression"] = Compression.GZIP - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import Compression - from google.cloud.bigquery.job import DestinationFormat - from google.cloud.bigquery.job import ExtractJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource(ended=True) - EXTRACT_CONFIGURATION = { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.SOURCE_TABLE, - }, - "destinationUris": [self.DESTINATION_URI], - "compression": Compression.GZIP, - "destinationFormat": DestinationFormat.NEWLINE_DELIMITED_JSON, - "fieldDelimiter": "|", - "printHeader": False, - } - RESOURCE["configuration"]["extract"] = EXTRACT_CONFIGURATION - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - config = ExtractJobConfig() - config.compression = Compression.GZIP - config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON - config.field_delimiter = "|" - config.print_header = False - job = self._make_one( - self.JOB_ID, source, [self.DESTINATION_URI], client1, config - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"extract": EXTRACT_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one( - self.JOB_ID, self.TABLE_REF, [self.DESTINATION_URI], client1 - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - RESOURCE = self._make_resource() - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - source_dataset = DatasetReference(self.PROJECT, self.DS_ID) - source = source_dataset.table(self.SOURCE_TABLE) - job = self._make_one(self.JOB_ID, source, [self.DESTINATION_URI], client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - -class TestQueryJobConfig(unittest.TestCase, _Base): - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJobConfig - - return QueryJobConfig - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - config = self._make_one() - self.assertEqual(config._properties, {"query": {}}) - - def test_ctor_w_none(self): - config = self._make_one() - config.default_dataset = None - config.destination = None - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - - def test_ctor_w_properties(self): - config = self._get_target_class()(use_query_cache=False, use_legacy_sql=True) - - self.assertFalse(config.use_query_cache) - self.assertTrue(config.use_legacy_sql) - - def test_ctor_w_string_default_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._get_target_class()(default_dataset=default_dataset) - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_ctor_w_string_destinaton(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._get_target_class()(destination=destination) - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_default_dataset_w_string(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - config = self._make_one() - config.default_dataset = default_dataset - expected = dataset.DatasetReference.from_string(default_dataset) - self.assertEqual(config.default_dataset, expected) - - def test_default_dataset_w_dataset(self): - from google.cloud.bigquery import dataset - - default_dataset = "default-proj.default_dset" - expected = dataset.DatasetReference.from_string(default_dataset) - config = self._make_one() - config.default_dataset = dataset.Dataset(expected) - self.assertEqual(config.default_dataset, expected) - - def test_destinaton_w_string(self): - from google.cloud.bigquery import table - - destination = "dest-proj.dest_dset.dest_tbl" - config = self._make_one() - config.destination = destination - expected = table.TableReference.from_string(destination) - self.assertEqual(config.destination, expected) - - def test_range_partitioning_w_none(self): - object_under_test = self._get_target_class()() - assert object_under_test.range_partitioning is None - - def test_range_partitioning_w_value(self): - object_under_test = self._get_target_class()() - object_under_test._properties["query"]["rangePartitioning"] = { - "field": "column_one", - "range": {"start": 1, "end": 1000, "interval": 10}, - } - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter(self): - from google.cloud.bigquery.table import PartitionRange - from google.cloud.bigquery.table import RangePartitioning - - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = RangePartitioning( - field="column_one", range_=PartitionRange(start=1, end=1000, interval=10) - ) - object_under_test.range_partitioning.field == "column_one" - object_under_test.range_partitioning.range_.start == 1 - object_under_test.range_partitioning.range_.end == 1000 - object_under_test.range_partitioning.range_.interval == 10 - - def test_range_partitioning_setter_w_none(self): - object_under_test = self._get_target_class()() - object_under_test.range_partitioning = None - assert object_under_test.range_partitioning is None - - def test_range_partitioning_setter_w_wrong_type(self): - object_under_test = self._get_target_class()() - with pytest.raises(ValueError, match="RangePartitioning"): - object_under_test.range_partitioning = object() - - def test_time_partitioning(self): - from google.cloud.bigquery import table - - time_partitioning = table.TimePartitioning( - type_=table.TimePartitioningType.DAY, field="name" - ) - config = self._make_one() - config.time_partitioning = time_partitioning - # TimePartitioning should be configurable after assigning - time_partitioning.expiration_ms = 10000 - - self.assertEqual(config.time_partitioning.type_, table.TimePartitioningType.DAY) - self.assertEqual(config.time_partitioning.field, "name") - self.assertEqual(config.time_partitioning.expiration_ms, 10000) - - config.time_partitioning = None - self.assertIsNone(config.time_partitioning) - - def test_clustering_fields(self): - fields = ["email", "postal_code"] - config = self._get_target_class()() - config.clustering_fields = fields - self.assertEqual(config.clustering_fields, fields) - - config.clustering_fields = None - self.assertIsNone(config.clustering_fields) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - config = klass.from_api_repr({}) - self.assertIsNone(config.dry_run) - self.assertIsNone(config.use_legacy_sql) - self.assertIsNone(config.default_dataset) - self.assertIsNone(config.destination) - self.assertIsNone(config.destination_encryption_configuration) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - resource = { - "query": { - "useLegacySql": True, - "query": "no property for me", - "defaultDataset": { - "projectId": "someproject", - "datasetId": "somedataset", - }, - "someNewProperty": "I should be saved, too.", - }, - "dryRun": True, - } - klass = self._get_target_class() - - config = klass.from_api_repr(resource) - - self.assertTrue(config.use_legacy_sql) - self.assertEqual( - config.default_dataset, DatasetReference("someproject", "somedataset") - ) - self.assertTrue(config.dry_run) - # Make sure unknown properties propagate. - self.assertEqual(config._properties["query"]["query"], "no property for me") - self.assertEqual( - config._properties["query"]["someNewProperty"], "I should be saved, too." - ) - - def test_to_api_repr_normal(self): - from google.cloud.bigquery.dataset import DatasetReference - - config = self._make_one() - config.use_legacy_sql = True - config.default_dataset = DatasetReference("someproject", "somedataset") - config.dry_run = False - config._properties["someNewProperty"] = "Woohoo, alpha stuff." - - resource = config.to_api_repr() - - self.assertFalse(resource["dryRun"]) - self.assertTrue(resource["query"]["useLegacySql"]) - self.assertEqual( - resource["query"]["defaultDataset"]["projectId"], "someproject" - ) - self.assertEqual( - resource["query"]["defaultDataset"]["datasetId"], "somedataset" - ) - # Make sure unknown properties propagate. - self.assertEqual(resource["someNewProperty"], "Woohoo, alpha stuff.") - - def test_to_api_repr_with_encryption(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - - config = self._make_one() - config.destination_encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - resource = config.to_api_repr() - self.assertEqual( - resource, - { - "query": { - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - } - } - }, - ) - - def test_to_api_repr_with_encryption_none(self): - config = self._make_one() - config.destination_encryption_configuration = None - resource = config.to_api_repr() - self.assertEqual( - resource, {"query": {"destinationEncryptionConfiguration": None}} - ) - - def test_from_api_repr_with_encryption(self): - resource = { - "query": { - "destinationEncryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME} - } - } - klass = self._get_target_class() - config = klass.from_api_repr(resource) - self.assertEqual( - config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME - ) - - -class TestQueryJob(unittest.TestCase, _Base): - JOB_TYPE = "query" - QUERY = "select count(*) from persons" - DESTINATION_TABLE = "destination_table" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryJob - - return QueryJob - - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) - config = resource["configuration"]["query"] - config["query"] = self.QUERY - return resource - - def _verifyBooleanResourceProperties(self, job, config): - - if "allowLargeResults" in config: - self.assertEqual(job.allow_large_results, config["allowLargeResults"]) - else: - self.assertIsNone(job.allow_large_results) - if "flattenResults" in config: - self.assertEqual(job.flatten_results, config["flattenResults"]) - else: - self.assertIsNone(job.flatten_results) - if "useQueryCache" in config: - self.assertEqual(job.use_query_cache, config["useQueryCache"]) - else: - self.assertIsNone(job.use_query_cache) - if "useLegacySql" in config: - self.assertEqual(job.use_legacy_sql, config["useLegacySql"]) - else: - self.assertIsNone(job.use_legacy_sql) - - def _verifyIntegerResourceProperties(self, job, config): - if "maximumBillingTier" in config: - self.assertEqual(job.maximum_billing_tier, config["maximumBillingTier"]) - else: - self.assertIsNone(job.maximum_billing_tier) - if "maximumBytesBilled" in config: - self.assertEqual( - str(job.maximum_bytes_billed), config["maximumBytesBilled"] - ) - self.assertIsInstance(job.maximum_bytes_billed, int) - else: - self.assertIsNone(job.maximum_bytes_billed) - - def _verify_udf_resources(self, job, config): - udf_resources = config.get("userDefinedFunctionResources", ()) - self.assertEqual(len(job.udf_resources), len(udf_resources)) - for found, expected in zip(job.udf_resources, udf_resources): - if "resourceUri" in expected: - self.assertEqual(found.udf_type, "resourceUri") - self.assertEqual(found.value, expected["resourceUri"]) - else: - self.assertEqual(found.udf_type, "inlineCode") - self.assertEqual(found.value, expected["inlineCode"]) - - def _verifyQueryParameters(self, job, config): - query_parameters = config.get("queryParameters", ()) - self.assertEqual(len(job.query_parameters), len(query_parameters)) - for found, expected in zip(job.query_parameters, query_parameters): - self.assertEqual(found.to_api_repr(), expected) - - def _verify_table_definitions(self, job, config): - table_defs = config.get("tableDefinitions") - if job.table_definitions is None: - self.assertIsNone(table_defs) - else: - self.assertEqual(len(job.table_definitions), len(table_defs)) - for found_key, found_ec in job.table_definitions.items(): - expected_ec = table_defs.get(found_key) - self.assertIsNotNone(expected_ec) - self.assertEqual(found_ec.to_api_repr(), expected_ec) - - def _verify_configuration_properties(self, job, configuration): - if "dryRun" in configuration: - self.assertEqual(job.dry_run, configuration["dryRun"]) - else: - self.assertIsNone(job.dry_run) - - def _verifyResourceProperties(self, job, resource): - self._verifyReadonlyResourceProperties(job, resource) - - configuration = resource.get("configuration", {}) - self._verify_configuration_properties(job, configuration) - - query_config = resource.get("configuration", {}).get("query") - self._verifyBooleanResourceProperties(job, query_config) - self._verifyIntegerResourceProperties(job, query_config) - self._verify_udf_resources(job, query_config) - self._verifyQueryParameters(job, query_config) - self._verify_table_definitions(job, query_config) - - self.assertEqual(job.query, query_config["query"]) - if "createDisposition" in query_config: - self.assertEqual(job.create_disposition, query_config["createDisposition"]) - else: - self.assertIsNone(job.create_disposition) - if "defaultDataset" in query_config: - ds_ref = job.default_dataset - ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} - self.assertEqual(ds_ref, query_config["defaultDataset"]) - else: - self.assertIsNone(job.default_dataset) - if "destinationTable" in query_config: - table = job.destination - tb_ref = { - "projectId": table.project, - "datasetId": table.dataset_id, - "tableId": table.table_id, - } - self.assertEqual(tb_ref, query_config["destinationTable"]) - else: - self.assertIsNone(job.destination) - if "priority" in query_config: - self.assertEqual(job.priority, query_config["priority"]) - else: - self.assertIsNone(job.priority) - if "writeDisposition" in query_config: - self.assertEqual(job.write_disposition, query_config["writeDisposition"]) - else: - self.assertIsNone(job.write_disposition) - if "destinationEncryptionConfiguration" in query_config: - self.assertIsNotNone(job.destination_encryption_configuration) - self.assertEqual( - job.destination_encryption_configuration.kms_key_name, - query_config["destinationEncryptionConfiguration"]["kmsKeyName"], - ) - else: - self.assertIsNone(job.destination_encryption_configuration) - if "schemaUpdateOptions" in query_config: - self.assertEqual( - job.schema_update_options, query_config["schemaUpdateOptions"] - ) - else: - self.assertIsNone(job.schema_update_options) - - def test_ctor_defaults(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query, self.QUERY) - self.assertIs(job._client, client) - self.assertEqual(job.job_type, self.JOB_TYPE) - self.assertEqual(job.path, "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)) - - self._verifyInitialReadonlyProperties(job) - - self.assertFalse(job.use_legacy_sql) - - # set/read from resource['configuration']['query'] - self.assertIsNone(job.allow_large_results) - self.assertIsNone(job.create_disposition) - self.assertIsNone(job.default_dataset) - self.assertIsNone(job.destination) - self.assertIsNone(job.flatten_results) - self.assertIsNone(job.priority) - self.assertIsNone(job.use_query_cache) - self.assertIsNone(job.dry_run) - self.assertIsNone(job.write_disposition) - self.assertIsNone(job.maximum_billing_tier) - self.assertIsNone(job.maximum_bytes_billed) - self.assertIsNone(job.table_definitions) - self.assertIsNone(job.destination_encryption_configuration) - self.assertIsNone(job.range_partitioning) - self.assertIsNone(job.time_partitioning) - self.assertIsNone(job.clustering_fields) - self.assertIsNone(job.schema_update_options) - - def test_ctor_w_udf_resources(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - udf_resources = [UDFResource("resourceUri", RESOURCE_URI)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig() - config.udf_resources = udf_resources - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.udf_resources, udf_resources) - - def test_ctor_w_query_parameters(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - client = _make_client(project=self.PROJECT) - config = QueryJobConfig(query_parameters=query_parameters) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - self.assertEqual(job.query_parameters, query_parameters) - - def test_from_api_repr_missing_identity(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = {} - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_missing_config(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - klass = self._get_target_class() - with self.assertRaises(KeyError): - klass.from_api_repr(RESOURCE, client=client) - - def test_from_api_repr_bare(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"query": {"query": self.QUERY}}, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_with_encryption(self): - self._setUpConstants() - client = _make_client(project=self.PROJECT) - RESOURCE = { - "id": self.JOB_ID, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "destinationEncryptionConfiguration": { - "kmsKeyName": self.KMS_KEY_NAME - }, - } - }, - } - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_from_api_repr_w_properties(self): - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - client = _make_client(project=self.PROJECT) - RESOURCE = self._make_resource() - query_config = RESOURCE["configuration"]["query"] - query_config["createDisposition"] = CreateDisposition.CREATE_IF_NEEDED - query_config["writeDisposition"] = WriteDisposition.WRITE_TRUNCATE - query_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.DESTINATION_TABLE, - } - query_config["schemaUpdateOptions"] = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] - klass = self._get_target_class() - job = klass.from_api_repr(RESOURCE, client=client) - self.assertIs(job._client, client) - self._verifyResourceProperties(job, RESOURCE) - - def test_cancelled(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["status"] = { - "state": "DONE", - "errorResult": {"reason": "stopped"}, - } - - self.assertTrue(job.cancelled()) - - def test_done(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - call_args = fake_reload.call_args - self.assertEqual(call_args.kwargs.get("timeout"), 42) - - def test_done_w_timeout_and_longer_internal_api_timeout(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._done_timeout = 8.8 - - with mock.patch.object( - client, "_get_query_results" - ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) - - # The expected timeout used is simply the given timeout, as the latter - # is shorter than the job's internal done timeout. - expected_timeout = 5.5 - - fake_get_results.assert_called_once() - call_args = fake_get_results.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - call_args = fake_reload.call_args - self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - - def test_query_plan(self): - from google.cloud._helpers import _RFC3339_MICROS - from google.cloud.bigquery.job import QueryPlanEntry - from google.cloud.bigquery.job import QueryPlanEntryStep - - plan_entries = [ - { - "name": "NAME", - "id": "1234", - "inputStages": ["88", "101"], - "startMs": "1522540800000", - "endMs": "1522540804000", - "parallelInputs": "1000", - "completedParallelInputs": "5", - "waitMsAvg": "33", - "waitMsMax": "400", - "waitRatioAvg": 2.71828, - "waitRatioMax": 3.14159, - "readMsAvg": "45", - "readMsMax": "90", - "readRatioAvg": 1.41421, - "readRatioMax": 1.73205, - "computeMsAvg": "55", - "computeMsMax": "99", - "computeRatioAvg": 0.69315, - "computeRatioMax": 1.09861, - "writeMsAvg": "203", - "writeMsMax": "340", - "writeRatioAvg": 3.32193, - "writeRatioMax": 2.30258, - "recordsRead": "100", - "recordsWritten": "1", - "status": "STATUS", - "shuffleOutputBytes": "1024", - "shuffleOutputBytesSpilled": "1", - "steps": [{"kind": "KIND", "substeps": ["SUBSTEP1", "SUBSTEP2"]}], - } - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.query_plan, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.query_plan, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.query_plan, []) - - query_stats["queryPlan"] = plan_entries - - self.assertEqual(len(job.query_plan), len(plan_entries)) - for found, expected in zip(job.query_plan, plan_entries): - self.assertIsInstance(found, QueryPlanEntry) - self.assertEqual(found.name, expected["name"]) - self.assertEqual(found.entry_id, expected["id"]) - self.assertEqual(len(found.input_stages), len(expected["inputStages"])) - for f_id in found.input_stages: - self.assertIn(f_id, [int(e) for e in expected["inputStages"]]) - self.assertEqual( - found.start.strftime(_RFC3339_MICROS), "2018-04-01T00:00:00.000000Z" - ) - self.assertEqual( - found.end.strftime(_RFC3339_MICROS), "2018-04-01T00:00:04.000000Z" - ) - self.assertEqual(found.parallel_inputs, int(expected["parallelInputs"])) - self.assertEqual( - found.completed_parallel_inputs, - int(expected["completedParallelInputs"]), - ) - self.assertEqual(found.wait_ms_avg, int(expected["waitMsAvg"])) - self.assertEqual(found.wait_ms_max, int(expected["waitMsMax"])) - self.assertEqual(found.wait_ratio_avg, expected["waitRatioAvg"]) - self.assertEqual(found.wait_ratio_max, expected["waitRatioMax"]) - self.assertEqual(found.read_ms_avg, int(expected["readMsAvg"])) - self.assertEqual(found.read_ms_max, int(expected["readMsMax"])) - self.assertEqual(found.read_ratio_avg, expected["readRatioAvg"]) - self.assertEqual(found.read_ratio_max, expected["readRatioMax"]) - self.assertEqual(found.compute_ms_avg, int(expected["computeMsAvg"])) - self.assertEqual(found.compute_ms_max, int(expected["computeMsMax"])) - self.assertEqual(found.compute_ratio_avg, expected["computeRatioAvg"]) - self.assertEqual(found.compute_ratio_max, expected["computeRatioMax"]) - self.assertEqual(found.write_ms_avg, int(expected["writeMsAvg"])) - self.assertEqual(found.write_ms_max, int(expected["writeMsMax"])) - self.assertEqual(found.write_ratio_avg, expected["writeRatioAvg"]) - self.assertEqual(found.write_ratio_max, expected["writeRatioMax"]) - self.assertEqual(found.records_read, int(expected["recordsRead"])) - self.assertEqual(found.records_written, int(expected["recordsWritten"])) - self.assertEqual(found.status, expected["status"]) - self.assertEqual( - found.shuffle_output_bytes, int(expected["shuffleOutputBytes"]) - ) - self.assertEqual( - found.shuffle_output_bytes_spilled, - int(expected["shuffleOutputBytesSpilled"]), - ) - - self.assertEqual(len(found.steps), len(expected["steps"])) - for f_step, e_step in zip(found.steps, expected["steps"]): - self.assertIsInstance(f_step, QueryPlanEntryStep) - self.assertEqual(f_step.kind, e_step["kind"]) - self.assertEqual(f_step.substeps, e_step["substeps"]) - - def test_total_bytes_processed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_processed) - - query_stats["totalBytesProcessed"] = str(total_bytes) - self.assertEqual(job.total_bytes_processed, total_bytes) - - def test_total_bytes_billed(self): - total_bytes = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.total_bytes_billed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.total_bytes_billed) - - query_stats["totalBytesBilled"] = str(total_bytes) - self.assertEqual(job.total_bytes_billed, total_bytes) - - def test_billing_tier(self): - billing_tier = 1 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.billing_tier) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.billing_tier) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.billing_tier) - - query_stats["billingTier"] = billing_tier - self.assertEqual(job.billing_tier, billing_tier) - - def test_cache_hit(self): - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.cache_hit) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.cache_hit) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.cache_hit) - - query_stats["cacheHit"] = True - self.assertTrue(job.cache_hit) - - def test_ddl_operation_performed(self): - op = "SKIP" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_operation_performed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_operation_performed) - - query_stats["ddlOperationPerformed"] = op - self.assertEqual(job.ddl_operation_performed, op) - - def test_ddl_target_routine(self): - from google.cloud.bigquery.routine import RoutineReference - - ref_routine = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "routineId": "targetroutine", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_routine) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_routine) - - query_stats["ddlTargetRoutine"] = ref_routine - self.assertIsInstance(job.ddl_target_routine, RoutineReference) - self.assertEqual(job.ddl_target_routine.routine_id, "targetroutine") - self.assertEqual(job.ddl_target_routine.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_routine.project, self.PROJECT) - - def test_ddl_target_table(self): - from google.cloud.bigquery.table import TableReference - - ref_table = { - "projectId": self.PROJECT, - "datasetId": "ddl_ds", - "tableId": "targettable", - } - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.ddl_target_table) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.ddl_target_table) - - query_stats["ddlTargetTable"] = ref_table - self.assertIsInstance(job.ddl_target_table, TableReference) - self.assertEqual(job.ddl_target_table.table_id, "targettable") - self.assertEqual(job.ddl_target_table.dataset_id, "ddl_ds") - self.assertEqual(job.ddl_target_table.project, self.PROJECT) - - def test_num_dml_affected_rows(self): - num_rows = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.num_dml_affected_rows) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.num_dml_affected_rows) - - query_stats["numDmlAffectedRows"] = str(num_rows) - self.assertEqual(job.num_dml_affected_rows, num_rows) - - def test_slot_millis(self): - millis = 1234 - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.slot_millis) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.slot_millis) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.slot_millis) - - query_stats["totalSlotMs"] = millis - self.assertEqual(job.slot_millis, millis) - - def test_statement_type(self): - statement_type = "SELECT" - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.statement_type) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.statement_type) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.statement_type) - - query_stats["statementType"] = statement_type - self.assertEqual(job.statement_type, statement_type) - - def test_referenced_tables(self): - from google.cloud.bigquery.table import TableReference - - ref_tables_resource = [ - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local1"}, - {"projectId": self.PROJECT, "datasetId": "dataset", "tableId": "local2"}, - { - "projectId": "other-project-123", - "datasetId": "other-dataset", - "tableId": "other-table", - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.referenced_tables, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.referenced_tables, []) - - query_stats["referencedTables"] = ref_tables_resource - - local1, local2, remote = job.referenced_tables - - self.assertIsInstance(local1, TableReference) - self.assertEqual(local1.table_id, "local1") - self.assertEqual(local1.dataset_id, "dataset") - self.assertEqual(local1.project, self.PROJECT) - - self.assertIsInstance(local2, TableReference) - self.assertEqual(local2.table_id, "local2") - self.assertEqual(local2.dataset_id, "dataset") - self.assertEqual(local2.project, self.PROJECT) - - self.assertIsInstance(remote, TableReference) - self.assertEqual(remote.table_id, "other-table") - self.assertEqual(remote.dataset_id, "other-dataset") - self.assertEqual(remote.project, "other-project-123") - - def test_timeline(self): - timeline_resource = [ - { - "elapsedMs": 1, - "activeUnits": 22, - "pendingUnits": 33, - "completedUnits": 44, - "totalSlotMs": 101, - } - ] - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.timeline, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.timeline, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.timeline, []) - - query_stats["timeline"] = timeline_resource - - self.assertEqual(len(job.timeline), len(timeline_resource)) - self.assertEqual(job.timeline[0].elapsed_ms, 1) - self.assertEqual(job.timeline[0].active_units, 22) - self.assertEqual(job.timeline[0].pending_units, 33) - self.assertEqual(job.timeline[0].completed_units, 44) - self.assertEqual(job.timeline[0].slot_millis, 101) - - def test_undeclared_query_parameters(self): - from google.cloud.bigquery.query import ArrayQueryParameter - from google.cloud.bigquery.query import ScalarQueryParameter - from google.cloud.bigquery.query import StructQueryParameter - - undeclared = [ - { - "name": "my_scalar", - "parameterType": {"type": "STRING"}, - "parameterValue": {"value": "value"}, - }, - { - "name": "my_array", - "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, - "parameterValue": { - "arrayValues": [{"value": "1066"}, {"value": "1745"}] - }, - }, - { - "name": "my_struct", - "parameterType": { - "type": "STRUCT", - "structTypes": [{"name": "count", "type": {"type": "INT64"}}], - }, - "parameterValue": {"structValues": {"count": {"value": "123"}}}, - }, - ] - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertEqual(job.undeclared_query_parameters, []) - - statistics = job._properties["statistics"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats = statistics["query"] = {} - self.assertEqual(job.undeclared_query_parameters, []) - - query_stats["undeclaredQueryParameters"] = undeclared - - scalar, array, struct = job.undeclared_query_parameters - - self.assertIsInstance(scalar, ScalarQueryParameter) - self.assertEqual(scalar.name, "my_scalar") - self.assertEqual(scalar.type_, "STRING") - self.assertEqual(scalar.value, "value") - - self.assertIsInstance(array, ArrayQueryParameter) - self.assertEqual(array.name, "my_array") - self.assertEqual(array.array_type, "INT64") - self.assertEqual(array.values, [1066, 1745]) - - self.assertIsInstance(struct, StructQueryParameter) - self.assertEqual(struct.name, "my_struct") - self.assertEqual(struct.struct_types, {"count": "INT64"}) - self.assertEqual(struct.struct_values, {"count": 123}) - - def test_estimated_bytes_processed(self): - est_bytes = 123456 - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, self.QUERY, client) - self.assertIsNone(job.estimated_bytes_processed) - - statistics = job._properties["statistics"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats = statistics["query"] = {} - self.assertIsNone(job.estimated_bytes_processed) - - query_stats["estimatedBytesProcessed"] = str(est_bytes) - self.assertEqual(job.estimated_bytes_processed, est_bytes) - - def test_result(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - # Explicitly set totalRows to be different from the initial - # response to test update during iteration. - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 2) - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - # Test that the total_rows property has changed during iteration, based - # on the response from tabledata.list. - self.assertEqual(result.total_rows, 1) - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] - ) - - def test_result_with_done_job_calls_get_query_results(self): - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "1", - } - job_resource = self._make_resource(started=True, ended=True) - job_resource["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - tabledata_resource = { - "totalRows": "1", - "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], - } - conn = _make_connection(query_resource_done, tabledata_resource) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - result = job.result() - - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertEqual(rows[0].col1, "abc") - - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - tabledata_call = mock.call( - method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, - timeout=None, - ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) - - def test_result_with_max_results(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - max_results = 3 - - result = job.result(max_results=max_results) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 3) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results - ) - - def test_result_w_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = _make_connection( - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - query_resource_done, - exceptions.NotFound("not normally retriable"), - job_resource_done, - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, - timeout=None, - ) - - connection.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call] - ) - - def test_result_w_empty_schema(self): - from google.cloud.bigquery.table import _EmptyRowIterator - - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource, query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - result = job.result() - - self.assertIsInstance(result, _EmptyRowIterator) - self.assertEqual(list(result), []) - - def test_result_invokes_begins(self): - begun_resource = self._make_resource() - incomplete_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - query_resource = copy.deepcopy(incomplete_resource) - query_resource["jobComplete"] = True - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - incomplete_resource, - query_resource, - done_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.result() - - self.assertEqual(len(connection.api_request.call_args_list), 4) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[2] - reload_request = connection.api_request.call_args_list[3] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_timeout(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result(timeout=1.0) - - self.assertEqual(len(connection.api_request.call_args_list), 3) - begin_request = connection.api_request.call_args_list[0] - query_request = connection.api_request.call_args_list[1] - reload_request = connection.api_request.call_args_list[2] - self.assertEqual(begin_request[1]["method"], "POST") - self.assertEqual(query_request[1]["method"], "GET") - self.assertEqual( - query_request[1]["path"], - "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), - ) - self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) - self.assertEqual(reload_request[1]["method"], "GET") - - def test_result_w_page_size(self): - # Arrange - query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "4", - } - job_resource = self._make_resource(started=True, ended=True) - q_config = job_resource["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - } - tabledata_resource = { - "totalRows": 4, - "pageToken": "some-page-token", - "rows": [ - {"f": [{"v": "row1"}]}, - {"f": [{"v": "row2"}]}, - {"f": [{"v": "row3"}]}, - ], - } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} - conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 - ) - client = _make_client(self.PROJECT, connection=conn) - job = self._get_target_class().from_api_repr(job_resource, client) - - # Act - result = job.result(page_size=3) - - # Assert - actual_rows = list(result) - self.assertEqual(len(actual_rows), 4) - - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] - ) - - def test_result_with_start_index(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "5", - } - tabledata_resource = { - "totalRows": "5", - "pageToken": None, - "rows": [ - {"f": [{"v": "abc"}]}, - {"f": [{"v": "def"}]}, - {"f": [{"v": "ghi"}]}, - {"f": [{"v": "jkl"}]}, - ], - } - connection = _make_connection(query_resource, tabledata_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - start_index = 1 - - result = job.result(start_index=start_index) - - self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 5) - - rows = list(result) - - self.assertEqual(len(rows), 4) - self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] - self.assertEqual( - tabledata_list_request[1]["query_params"]["startIndex"], start_index - ) - - def test_result_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - error_result = { - "debugInfo": "DEBUG", - "location": "LOCATION", - "message": "MESSAGE", - "reason": "invalid", - } - job._properties["status"] = { - "errorResult": error_result, - "errors": [error_result], - "state": "DONE", - } - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": job._properties["jobReference"]} - ) - job._set_future_result() - - with self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test_result_transport_timeout_error(self): - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=requests.exceptions.Timeout("Server response took too long."), - ) - - # Make sure that timeout errors get rebranded to concurrent futures timeout. - with call_api_patch, self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1) - - def test__begin_error(self): - from google.cloud import exceptions - - query = textwrap.dedent( - """ - SELECT foo, bar - FROM table_baz - WHERE foo == bar""" - ) - - client = _make_client(project=self.PROJECT) - job = self._make_one(self.JOB_ID, query, client) - call_api_patch = mock.patch( - "google.cloud.bigquery.client.Client._call_api", - autospec=True, - side_effect=exceptions.BadRequest("Syntax error in SQL query"), - ) - - with call_api_patch, self.assertRaises(exceptions.GoogleCloudError) as exc_info: - job.result() - - self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) - - exc_job_instance = getattr(exc_info.exception, "query_job", None) - self.assertIs(exc_job_instance, job) - - full_text = str(exc_info.exception) - assert job.job_id in full_text - assert "Query Job SQL Follows" in full_text - - for i, line in enumerate(query.splitlines(), start=1): - expected_line = "{}:{}".format(i, line) - assert expected_line in full_text - - def test__begin_w_timeout(self): - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(timeout=7.5) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False} - }, - }, - timeout=7.5, - ) - - def test_begin_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - DS_ID = "DATASET" - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - - config = QueryJobConfig() - config.default_dataset = DatasetReference(self.PROJECT, DS_ID) - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertIsNone(job.default_dataset) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "defaultDataset": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_alternate_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.job import QueryPriority - from google.cloud.bigquery.job import SchemaUpdateOption - from google.cloud.bigquery.job import WriteDisposition - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - TABLE = "TABLE" - DS_ID = "DATASET" - RESOURCE = self._make_resource(ended=True) - QUERY_CONFIGURATION = { - "query": self.QUERY, - "allowLargeResults": True, - "createDisposition": CreateDisposition.CREATE_NEVER, - "defaultDataset": {"projectId": self.PROJECT, "datasetId": DS_ID}, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": TABLE, - }, - "flattenResults": True, - "priority": QueryPriority.INTERACTIVE, - "useQueryCache": True, - "useLegacySql": True, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - "maximumBillingTier": 4, - "maximumBytesBilled": "123456", - "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_RELAXATION], - } - RESOURCE["configuration"]["query"] = QUERY_CONFIGURATION - RESOURCE["configuration"]["dryRun"] = True - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(TABLE) - - config = QueryJobConfig() - config.allow_large_results = True - config.create_disposition = CreateDisposition.CREATE_NEVER - config.default_dataset = dataset_ref - config.destination = table_ref - config.dry_run = True - config.flatten_results = True - config.maximum_billing_tier = 4 - config.priority = QueryPriority.INTERACTIVE - config.use_legacy_sql = True - config.use_query_cache = True - config.write_disposition = WriteDisposition.WRITE_TRUNCATE - config.maximum_bytes_billed = 123456 - config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_RELAXATION] - job = self._make_one(self.JOB_ID, self.QUERY, client1, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": {"dryRun": True, "query": QUERY_CONFIGURATION}, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_udf(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import UDFResource - - RESOURCE_URI = "gs://some-bucket/js/lib.js" - INLINE_UDF_CODE = 'var someCode = "here";' - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["query"]["userDefinedFunctionResources"] = [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - udf_resources = [ - UDFResource("resourceUri", RESOURCE_URI), - UDFResource("inlineCode", INLINE_UDF_CODE), - ] - config = QueryJobConfig() - config.udf_resources = udf_resources - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.udf_resources, udf_resources) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "userDefinedFunctionResources": [ - {"resourceUri": RESOURCE_URI}, - {"inlineCode": INLINE_UDF_CODE}, - ], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_named_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter("foo", "INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "NAMED" - config["queryParameters"] = [ - { - "name": "foo", - "parameterType": {"type": "INT64"}, - "parameterValue": {"value": "123"}, - } - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "NAMED", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_positional_query_parameter(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.query import ScalarQueryParameter - - query_parameters = [ScalarQueryParameter.positional("INT64", 123)] - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - config = RESOURCE["configuration"]["query"] - config["parameterMode"] = "POSITIONAL" - config["queryParameters"] = [ - {"parameterType": {"type": "INT64"}, "parameterValue": {"value": "123"}} - ] - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - jconfig = QueryJobConfig() - jconfig.query_parameters = query_parameters - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=jconfig) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertEqual(job.query_parameters, query_parameters) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": False, - "parameterMode": "POSITIONAL", - "queryParameters": config["queryParameters"], - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_begin_w_table_defs(self): - from google.cloud.bigquery.job import QueryJobConfig - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.external_config import BigtableColumn - from google.cloud.bigquery.external_config import BigtableColumnFamily - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - - bt_config = ExternalConfig("BIGTABLE") - bt_config.ignore_unknown_values = True - bt_config.options.read_rowkey_as_string = True - cf = BigtableColumnFamily() - cf.family_id = "cf" - col = BigtableColumn() - col.field_name = "fn" - cf.columns = [col] - bt_config.options.column_families = [cf] - BT_CONFIG_RESOURCE = { - "sourceFormat": "BIGTABLE", - "ignoreUnknownValues": True, - "bigtableOptions": { - "readRowkeyAsString": True, - "columnFamilies": [ - {"familyId": "cf", "columns": [{"fieldName": "fn"}]} - ], - }, - } - CSV_CONFIG_RESOURCE = { - "sourceFormat": "CSV", - "maxBadRecords": 8, - "csvOptions": {"allowJaggedRows": True}, - } - csv_config = ExternalConfig("CSV") - csv_config.max_bad_records = 8 - csv_config.options.allow_jagged_rows = True - bt_table = "bigtable-table" - csv_table = "csv-table" - RESOURCE["configuration"]["query"]["tableDefinitions"] = { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - } - want_resource = copy.deepcopy(RESOURCE) - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.table_definitions = {bt_table: bt_config, csv_table: csv_config} - config.use_legacy_sql = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": { - "query": self.QUERY, - "useLegacySql": True, - "tableDefinitions": { - bt_table: BT_CONFIG_RESOURCE, - csv_table: CSV_CONFIG_RESOURCE, - }, - } - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, want_resource) - - def test_dry_run_query(self): - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs" % (self.PROJECT,) - RESOURCE = self._make_resource() - # Ensure None for missing server-set props - del RESOURCE["statistics"]["creationTime"] - del RESOURCE["etag"] - del RESOURCE["selfLink"] - del RESOURCE["user_email"] - RESOURCE["configuration"]["dryRun"] = True - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - config = QueryJobConfig() - config.dry_run = True - job = self._make_one(self.JOB_ID, self.QUERY, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job._begin() - - final_attributes.assert_called_with({"path": PATH}, client, job) - self.assertEqual(job.udf_resources, []) - conn.api_request.assert_called_once_with( - method="POST", - path=PATH, - data={ - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "configuration": { - "query": {"query": self.QUERY, "useLegacySql": False}, - "dryRun": True, - }, - }, - timeout=None, - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_exists_miss_w_bound_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn = _make_connection() - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, self.QUERY, client) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertFalse(job.exists()) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_exists_hit_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection({}) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - self.assertTrue(job.exists(client=client2)) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={"fields": "id"}, timeout=None - ) - - def test_reload_w_bound_client(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload() - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_alternate_client(self): - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - q_config = RESOURCE["configuration"]["query"] - q_config["destinationTable"] = { - "projectId": self.PROJECT, - "datasetId": DS_ID, - "tableId": DEST_TABLE, - } - conn1 = _make_connection() - client1 = _make_client(project=self.PROJECT, connection=conn1) - conn2 = _make_connection(RESOURCE) - client2 = _make_client(project=self.PROJECT, connection=conn2) - job = self._make_one(self.JOB_ID, self.QUERY, client1) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(client=client2) - - final_attributes.assert_called_with({"path": PATH}, client2, job) - - conn1.api_request.assert_not_called() - conn2.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=None - ) - self._verifyResourceProperties(job, RESOURCE) - - def test_reload_w_timeout(self): - from google.cloud.bigquery.dataset import DatasetReference - from google.cloud.bigquery.job import QueryJobConfig - - PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID) - DS_ID = "DATASET" - DEST_TABLE = "dest_table" - RESOURCE = self._make_resource() - conn = _make_connection(RESOURCE) - client = _make_client(project=self.PROJECT, connection=conn) - dataset_ref = DatasetReference(self.PROJECT, DS_ID) - table_ref = dataset_ref.table(DEST_TABLE) - config = QueryJobConfig() - config.destination = table_ref - job = self._make_one(self.JOB_ID, None, client, job_config=config) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - job.reload(timeout=4.2) - - final_attributes.assert_called_with({"path": PATH}, client, job) - - self.assertNotEqual(job.destination, table_ref) - - conn.api_request.assert_called_once_with( - method="GET", path=PATH, query_params={}, timeout=4.2 - ) - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - { - "name": "spouse_1", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - { - "name": "spouse_2", - "type": "RECORD", - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ], - }, - ] - }, - } - tabledata_resource = { - "rows": [ - { - "f": [ - {"v": {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}}, - {"v": {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}}, - ] - }, - { - "f": [ - {"v": {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}}, - {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, - ] - }, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - tbl = job.to_arrow(create_bqstorage_client=False) - - self.assertIsInstance(tbl, pyarrow.Table) - self.assertEqual(tbl.num_rows, 2) - - # Check the schema. - self.assertEqual(tbl.schema[0].name, "spouse_1") - self.assertEqual(tbl.schema[0].type[0].name, "name") - self.assertEqual(tbl.schema[0].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[0].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[0].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[0].type[1].type)) - self.assertEqual(tbl.schema[1].name, "spouse_2") - self.assertEqual(tbl.schema[1].type[0].name, "name") - self.assertEqual(tbl.schema[1].type[1].name, "age") - self.assertTrue(pyarrow.types.is_struct(tbl.schema[1].type)) - self.assertTrue(pyarrow.types.is_string(tbl.schema[1].type[0].type)) - self.assertTrue(pyarrow.types.is_int64(tbl.schema[1].type[1].type)) - - # Check the data. - tbl_data = tbl.to_pydict() - spouse_1 = tbl_data["spouse_1"] - self.assertEqual( - spouse_1, - [ - {"name": "Phred Phlyntstone", "age": 32}, - {"name": "Bhettye Rhubble", "age": 27}, - ], - ) - spouse_2 = tbl_data["spouse_2"] - self.assertEqual( - spouse_2, - [ - {"name": "Wylma Phlyntstone", "age": 29}, - {"name": "Bharney Rhubble", "age": 33}, - ], - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 4) # verify the number of rows - self.assertEqual(list(df), ["name", "age"]) # verify the column names - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_ddl_query(self): - # Destination table may have no schema for some DDL and DML queries. - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": []}, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - - df = job.to_dataframe() - - self.assertEqual(len(df), 0) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_to_dataframe_bqstorage(self): - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - } - connection = _make_connection(query_resource) - client = _make_client(self.PROJECT, connection=connection) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, - data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/{}".format(self.PROJECT), - read_session=expected_session, - max_stream_count=0, # Use default number of streams for best performance. - ) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [ - {"name": "start_timestamp", "type": "TIMESTAMP"}, - {"name": "seconds", "type": "INT64"}, - {"name": "miles", "type": "FLOAT64"}, - {"name": "km", "type": "FLOAT64"}, - {"name": "payment_type", "type": "STRING"}, - {"name": "complete", "type": "BOOL"}, - {"name": "date", "type": "DATE"}, - ] - }, - } - row_data = [ - [ - "1.4338368E9", - "420", - "1.1", - "1.77", - "Cto_dataframeash", - "true", - "1999-12-01", - ], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - df = job.to_dataframe(dtypes={"km": "float16"}, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 3) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.start_timestamp.dtype.name, "datetime64[ns, UTC]") - self.assertEqual(df.seconds.dtype.name, "int64") - self.assertEqual(df.miles.dtype.name, "float64") - self.assertEqual(df.km.dtype.name, "float16") - self.assertEqual(df.payment_type.dtype.name, "object") - self.assertEqual(df.complete.dtype.name, "bool") - self.assertEqual(df.date.dtype.name, "object") - - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_column_date_dtypes(self): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "1", - "schema": {"fields": [{"name": "date", "type": "DATE"}]}, - } - row_data = [ - ["1999-12-01"], - ] - rows = [{"f": [{"v": field} for field in row]} for row in row_data] - query_resource["rows"] = rows - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, query_resource - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - df = job.to_dataframe(date_as_object=False, create_bqstorage_client=False) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] - self.assertEqual(list(df), exp_columns) # verify the column names - - self.assertEqual(df.date.dtype.name, "datetime64[ns]") - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(tqdm is None, "Requires `tqdm`") - @mock.patch("tqdm.tqdm") - def test_to_dataframe_with_progress_bar(self, tqdm_mock): - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "4", - "schema": { - "fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}] - }, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, - query_resource, - done_resource, - query_resource, - query_resource, - ) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False) - tqdm_mock.assert_not_called() - - job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) - tqdm_mock.assert_called() - - def test_iter(self): - import types - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "totalRows": "0", - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - - self.assertIsInstance(iter(job), types.GeneratorType) - - -class TestQueryPlanEntryStep(unittest.TestCase, _Base): - KIND = "KIND" - SUBSTEPS = ("SUB1", "SUB2") - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntryStep - - return QueryPlanEntryStep - - def _make_one(self, *args, **kw): - return self._get_target_class()(*args, **kw) - - def test_ctor(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - step = klass.from_api_repr({}) - self.assertIsNone(step.kind) - self.assertEqual(step.substeps, []) - - def test_from_api_repr_normal(self): - resource = {"kind": self.KIND, "substeps": self.SUBSTEPS} - klass = self._get_target_class() - step = klass.from_api_repr(resource) - self.assertEqual(step.kind, self.KIND) - self.assertEqual(step.substeps, list(self.SUBSTEPS)) - - def test___eq___mismatched_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertNotEqual(step, object()) - - def test___eq___mismatch_kind(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one("OTHER", self.SUBSTEPS) - self.assertNotEqual(step, other) - - def test___eq___mismatch_substeps(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, ()) - self.assertNotEqual(step, other) - - def test___eq___hit(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - other = self._make_one(self.KIND, self.SUBSTEPS) - self.assertEqual(step, other) - - def test___eq___wrong_type(self): - step = self._make_one(self.KIND, self.SUBSTEPS) - self.assertFalse(step == "hello") - - -class TestQueryPlanEntry(unittest.TestCase, _Base): - NAME = "NAME" - ENTRY_ID = 1234 - START_MS = 1522540800000 - END_MS = 1522540804000 - INPUT_STAGES = (88, 101) - PARALLEL_INPUTS = 1000 - COMPLETED_PARALLEL_INPUTS = 5 - WAIT_MS_AVG = 33 - WAIT_MS_MAX = 400 - WAIT_RATIO_AVG = 2.71828 - WAIT_RATIO_MAX = 3.14159 - READ_MS_AVG = 45 - READ_MS_MAX = 90 - READ_RATIO_AVG = 1.41421 - READ_RATIO_MAX = 1.73205 - COMPUTE_MS_AVG = 55 - COMPUTE_MS_MAX = 99 - COMPUTE_RATIO_AVG = 0.69315 - COMPUTE_RATIO_MAX = 1.09861 - WRITE_MS_AVG = 203 - WRITE_MS_MAX = 340 - WRITE_RATIO_AVG = 3.32193 - WRITE_RATIO_MAX = 2.30258 - RECORDS_READ = 100 - RECORDS_WRITTEN = 1 - STATUS = "STATUS" - SHUFFLE_OUTPUT_BYTES = 1024 - SHUFFLE_OUTPUT_BYTES_SPILLED = 1 - - START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z" - END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z" - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import QueryPlanEntry - - return QueryPlanEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - - self.assertIsNone(entry.name) - self.assertIsNone(entry.entry_id) - self.assertEqual(entry.input_stages, []) - self.assertIsNone(entry.start) - self.assertIsNone(entry.end) - self.assertIsNone(entry.parallel_inputs) - self.assertIsNone(entry.completed_parallel_inputs) - self.assertIsNone(entry.wait_ms_avg) - self.assertIsNone(entry.wait_ms_max) - self.assertIsNone(entry.wait_ratio_avg) - self.assertIsNone(entry.wait_ratio_max) - self.assertIsNone(entry.read_ms_avg) - self.assertIsNone(entry.read_ms_max) - self.assertIsNone(entry.read_ratio_avg) - self.assertIsNone(entry.read_ratio_max) - self.assertIsNone(entry.compute_ms_avg) - self.assertIsNone(entry.compute_ms_max) - self.assertIsNone(entry.compute_ratio_avg) - self.assertIsNone(entry.compute_ratio_max) - self.assertIsNone(entry.write_ms_avg) - self.assertIsNone(entry.write_ms_max) - self.assertIsNone(entry.write_ratio_avg) - self.assertIsNone(entry.write_ratio_max) - self.assertIsNone(entry.records_read) - self.assertIsNone(entry.records_written) - self.assertIsNone(entry.status) - self.assertIsNone(entry.shuffle_output_bytes) - self.assertIsNone(entry.shuffle_output_bytes_spilled) - self.assertEqual(entry.steps, []) - - def test_from_api_repr_normal(self): - from google.cloud.bigquery.job import QueryPlanEntryStep - - steps = [ - QueryPlanEntryStep( - kind=TestQueryPlanEntryStep.KIND, - substeps=TestQueryPlanEntryStep.SUBSTEPS, - ) - ] - resource = { - "name": self.NAME, - "id": self.ENTRY_ID, - "inputStages": self.INPUT_STAGES, - "startMs": self.START_MS, - "endMs": self.END_MS, - "waitMsAvg": self.WAIT_MS_AVG, - "waitMsMax": self.WAIT_MS_MAX, - "waitRatioAvg": self.WAIT_RATIO_AVG, - "waitRatioMax": self.WAIT_RATIO_MAX, - "readMsAvg": self.READ_MS_AVG, - "readMsMax": self.READ_MS_MAX, - "readRatioAvg": self.READ_RATIO_AVG, - "readRatioMax": self.READ_RATIO_MAX, - "computeMsAvg": self.COMPUTE_MS_AVG, - "computeMsMax": self.COMPUTE_MS_MAX, - "computeRatioAvg": self.COMPUTE_RATIO_AVG, - "computeRatioMax": self.COMPUTE_RATIO_MAX, - "writeMsAvg": self.WRITE_MS_AVG, - "writeMsMax": self.WRITE_MS_MAX, - "writeRatioAvg": self.WRITE_RATIO_AVG, - "writeRatioMax": self.WRITE_RATIO_MAX, - "recordsRead": self.RECORDS_READ, - "recordsWritten": self.RECORDS_WRITTEN, - "status": self.STATUS, - "shuffleOutputBytes": self.SHUFFLE_OUTPUT_BYTES, - "shuffleOutputBytesSpilled": self.SHUFFLE_OUTPUT_BYTES_SPILLED, - "steps": [ - { - "kind": TestQueryPlanEntryStep.KIND, - "substeps": TestQueryPlanEntryStep.SUBSTEPS, - } - ], - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.name, self.NAME) - self.assertEqual(entry.entry_id, self.ENTRY_ID) - self.assertEqual(entry.wait_ratio_avg, self.WAIT_RATIO_AVG) - self.assertEqual(entry.wait_ratio_max, self.WAIT_RATIO_MAX) - self.assertEqual(entry.read_ratio_avg, self.READ_RATIO_AVG) - self.assertEqual(entry.read_ratio_max, self.READ_RATIO_MAX) - self.assertEqual(entry.compute_ratio_avg, self.COMPUTE_RATIO_AVG) - self.assertEqual(entry.compute_ratio_max, self.COMPUTE_RATIO_MAX) - self.assertEqual(entry.write_ratio_avg, self.WRITE_RATIO_AVG) - self.assertEqual(entry.write_ratio_max, self.WRITE_RATIO_MAX) - self.assertEqual(entry.records_read, self.RECORDS_READ) - self.assertEqual(entry.records_written, self.RECORDS_WRITTEN) - self.assertEqual(entry.status, self.STATUS) - self.assertEqual(entry.steps, steps) - - def test_start(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.start, None) - - entry._properties["startMs"] = self.START_MS - self.assertEqual( - entry.start.strftime(_RFC3339_MICROS), self.START_RFC3339_MICROS - ) - - def test_end(self): - from google.cloud._helpers import _RFC3339_MICROS - - klass = self._get_target_class() - - entry = klass.from_api_repr({}) - self.assertEqual(entry.end, None) - - entry._properties["endMs"] = self.END_MS - self.assertEqual(entry.end.strftime(_RFC3339_MICROS), self.END_RFC3339_MICROS) - - -class TestScriptStackFrame(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStackFrame - - return ScriptStackFrame(resource) - - def test_procedure_id(self): - frame = self._make_one({"procedureId": "some-procedure"}) - self.assertEqual(frame.procedure_id, "some-procedure") - del frame._properties["procedureId"] - self.assertIsNone(frame.procedure_id) - - def test_start_line(self): - frame = self._make_one({"startLine": 5}) - self.assertEqual(frame.start_line, 5) - frame._properties["startLine"] = "5" - self.assertEqual(frame.start_line, 5) - - def test_start_column(self): - frame = self._make_one({"startColumn": 29}) - self.assertEqual(frame.start_column, 29) - frame._properties["startColumn"] = "29" - self.assertEqual(frame.start_column, 29) - - def test_end_line(self): - frame = self._make_one({"endLine": 9}) - self.assertEqual(frame.end_line, 9) - frame._properties["endLine"] = "9" - self.assertEqual(frame.end_line, 9) - - def test_end_column(self): - frame = self._make_one({"endColumn": 14}) - self.assertEqual(frame.end_column, 14) - frame._properties["endColumn"] = "14" - self.assertEqual(frame.end_column, 14) - - def test_text(self): - frame = self._make_one({"text": "QUERY TEXT"}) - self.assertEqual(frame.text, "QUERY TEXT") - - -class TestScriptStatistics(unittest.TestCase, _Base): - def _make_one(self, resource): - from google.cloud.bigquery.job import ScriptStatistics - - return ScriptStatistics(resource) - - def test_evalutation_kind(self): - stats = self._make_one({"evaluationKind": "EXPRESSION"}) - self.assertEqual(stats.evaluation_kind, "EXPRESSION") - self.assertEqual(stats.stack_frames, []) - - def test_stack_frames(self): - stats = self._make_one( - { - "stackFrames": [ - { - "procedureId": "some-procedure", - "startLine": 5, - "startColumn": 29, - "endLine": 9, - "endColumn": 14, - "text": "QUERY TEXT", - }, - {}, - ] - } - ) - stack_frames = stats.stack_frames - self.assertEqual(len(stack_frames), 2) - stack_frame = stack_frames[0] - self.assertEqual(stack_frame.procedure_id, "some-procedure") - self.assertEqual(stack_frame.start_line, 5) - self.assertEqual(stack_frame.start_column, 29) - self.assertEqual(stack_frame.end_line, 9) - self.assertEqual(stack_frame.end_column, 14) - self.assertEqual(stack_frame.text, "QUERY TEXT") - stack_frame = stack_frames[1] - self.assertIsNone(stack_frame.procedure_id) - self.assertIsNone(stack_frame.start_line) - self.assertIsNone(stack_frame.start_column) - self.assertIsNone(stack_frame.end_line) - self.assertIsNone(stack_frame.end_column) - self.assertIsNone(stack_frame.text) - - -class TestTimelineEntry(unittest.TestCase, _Base): - ELAPSED_MS = 101 - ACTIVE_UNITS = 50 - PENDING_UNITS = 98 - COMPLETED_UNITS = 520 - SLOT_MILLIS = 12029 - - @staticmethod - def _get_target_class(): - from google.cloud.bigquery.job import TimelineEntry - - return TimelineEntry - - def test_from_api_repr_empty(self): - klass = self._get_target_class() - entry = klass.from_api_repr({}) - self.assertIsNone(entry.elapsed_ms) - self.assertIsNone(entry.active_units) - self.assertIsNone(entry.pending_units) - self.assertIsNone(entry.completed_units) - self.assertIsNone(entry.slot_millis) - - def test_from_api_repr_normal(self): - resource = { - "elapsedMs": self.ELAPSED_MS, - "activeUnits": self.ACTIVE_UNITS, - "pendingUnits": self.PENDING_UNITS, - "completedUnits": self.COMPLETED_UNITS, - "totalSlotMs": self.SLOT_MILLIS, - } - klass = self._get_target_class() - - entry = klass.from_api_repr(resource) - self.assertEqual(entry.elapsed_ms, self.ELAPSED_MS) - self.assertEqual(entry.active_units, self.ACTIVE_UNITS) - self.assertEqual(entry.pending_units, self.PENDING_UNITS) - self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) - self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) - - -@pytest.mark.parametrize( - "query,expected", - ( - (None, False), - ("", False), - ("select name, age from table", False), - ("select name, age from table LIMIT 10;", False), - ("select name, age from table order by other_column;", True), - ("Select name, age From table Order By other_column", True), - ("SELECT name, age FROM table ORDER BY other_column;", True), - ("select name, age from table order\nby other_column", True), - ("Select name, age From table Order\nBy other_column;", True), - ("SELECT name, age FROM table ORDER\nBY other_column", True), - ("SelecT name, age froM table OrdeR \n\t BY other_column;", True), - ), -) -def test__contains_order_by(query, expected): - from google.cloud.bigquery import job as mut - - if expected: - assert mut._contains_order_by(query) - else: - assert not mut._contains_order_by(query) - - -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif( - bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" -) -@pytest.mark.parametrize( - "query", - ( - "select name, age from table order by other_column;", - "Select name, age From table Order By other_column;", - "SELECT name, age FROM table ORDER BY other_column;", - "select name, age from table order\nby other_column;", - "Select name, age From table Order\nBy other_column;", - "SELECT name, age FROM table ORDER\nBY other_column;", - "SelecT name, age froM table OrdeR \n\t BY other_column;", - ), -) -def test_to_dataframe_bqstorage_preserve_order(query): - from google.cloud.bigquery.job import QueryJob as target_class - - job_resource = _make_job_resource( - project_id="test-project", job_type="query", ended=True - ) - job_resource["configuration"]["query"]["query"] = query - job_resource["status"] = {"state": "DONE"} - get_query_results_resource = { - "jobComplete": True, - "jobReference": {"projectId": "test-project", "jobId": "test-job"}, - "schema": { - "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, - ] - }, - "totalRows": "4", - } - connection = _make_connection(get_query_results_resource, job_resource) - client = _make_client(connection=connection) - job = target_class.from_api_repr(job_resource, client) - bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) - session = bigquery_storage.types.ReadSession() - session.avro_schema.schema = json.dumps( - { - "type": "record", - "name": "__root__", - "fields": [ - {"name": "name", "type": ["null", "string"]}, - {"name": "age", "type": ["null", "long"]}, - ], - } - ) - bqstorage_client.create_read_session.return_value = session - - job.to_dataframe(bqstorage_client=bqstorage_client) - - destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( - **job_resource["configuration"]["query"]["destinationTable"] - ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, - ) - bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/test-project", - read_session=expected_session, - max_stream_count=1, # Use a single stream to preserve row order. - ) From 0c3476d56380d70115f6fd765bf5c5261967052f Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 4 Nov 2020 13:42:01 -0600 Subject: [PATCH 045/341] perf: use `jobs.getQueryResults` to download result sets (#363) * refactor: break job into multiple modules Original paths are retained for backwards compatibility. * perf: use `jobs.getQueryResults` to download result sets Replaces `tabledata.list` when `RowIterator` is used for query results. This likely also fixes a few edge cases around BigQuery scripting jobs. * revert unnecessary changes to _get_query_results * simplify RowIterator. no need to hack Table object * fix tests for bqstorage warning * populate location --- google/cloud/bigquery/_pandas_helpers.py | 16 +-- google/cloud/bigquery/client.py | 104 +++++++++++++++--- google/cloud/bigquery/job/query.py | 14 +-- google/cloud/bigquery/table.py | 17 +-- tests/unit/job/helpers.py | 10 +- tests/unit/job/test_base.py | 42 ++++--- tests/unit/job/test_query.py | 133 +++++++++++++---------- tests/unit/test__pandas_helpers.py | 18 +-- tests/unit/test_client.py | 12 +- tests/unit/test_magics.py | 10 +- tests/unit/test_table.py | 11 +- 11 files changed, 256 insertions(+), 131 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 57c8f95f6..7774ce26b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -474,7 +474,7 @@ def dataframe_to_parquet(dataframe, bq_schema, filepath, parquet_compression="SN pyarrow.parquet.write_table(arrow_table, filepath, compression=parquet_compression) -def _tabledata_list_page_to_arrow(page, column_names, arrow_types): +def _row_iterator_page_to_arrow(page, column_names, arrow_types): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -490,8 +490,8 @@ def _tabledata_list_page_to_arrow(page, column_names, arrow_types): return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) -def download_arrow_tabledata_list(pages, bq_schema): - """Use tabledata.list to construct an iterable of RecordBatches. +def download_arrow_row_iterator(pages, bq_schema): + """Use HTTP JSON RowIterator to construct an iterable of RecordBatches. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -510,10 +510,10 @@ def download_arrow_tabledata_list(pages, bq_schema): arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_arrow(page, column_names, arrow_types) + yield _row_iterator_page_to_arrow(page, column_names, arrow_types) -def _tabledata_list_page_to_dataframe(page, column_names, dtypes): +def _row_iterator_page_to_dataframe(page, column_names, dtypes): # Iterate over the page to force the API request to get the page data. try: next(iter(page)) @@ -528,8 +528,8 @@ def _tabledata_list_page_to_dataframe(page, column_names, dtypes): return pandas.DataFrame(columns, columns=column_names) -def download_dataframe_tabledata_list(pages, bq_schema, dtypes): - """Use (slower, but free) tabledata.list to construct a DataFrame. +def download_dataframe_row_iterator(pages, bq_schema, dtypes): + """Use HTTP JSON RowIterator to construct a DataFrame. Args: pages (Iterator[:class:`google.api_core.page_iterator.Page`]): @@ -549,7 +549,7 @@ def download_dataframe_tabledata_list(pages, bq_schema, dtypes): bq_schema = schema._to_schema_fields(bq_schema) column_names = [field.name for field in bq_schema] for page in pages: - yield _tabledata_list_page_to_dataframe(page, column_names, dtypes) + yield _row_iterator_page_to_dataframe(page, column_names, dtypes) def _bqstorage_page_to_arrow(page): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 57df9455e..cd1474336 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -80,18 +80,19 @@ _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = ( - u"https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - u"{project}/jobs?uploadType=" + "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" + "{project}/jobs?uploadType=" ) -_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"multipart" -_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + u"resumable" -_GENERIC_CONTENT_TYPE = u"*/*" +_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" +_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" +_GENERIC_CONTENT_TYPE = "*/*" _READ_LESS_THAN_SIZE = ( "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." ) _NEED_TABLE_ARGUMENT = ( "The table argument should be a table ID string, Table, or TableReference" ) +_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" class Project(object): @@ -293,7 +294,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -371,7 +372,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -1129,7 +1130,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1207,7 +1208,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1284,7 +1285,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) result = page_iterator.HTTPIterator( @@ -1510,7 +1511,7 @@ def delete_table( raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None + self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, ): """Get the query results object for a query job. @@ -1890,7 +1891,7 @@ def api_request(*args, **kwargs): span_attributes=span_attributes, *args, timeout=timeout, - **kwargs + **kwargs, ) return page_iterator.HTTPIterator( @@ -2374,7 +2375,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = u"\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( @@ -3169,6 +3170,83 @@ def list_rows( # Pass in selected_fields separately from schema so that full # tables can be fetched without a column filter. selected_fields=selected_fields, + total_rows=getattr(table, "num_rows", None), + ) + return row_iterator + + def _list_rows_from_query_results( + self, + job_id, + location, + project, + schema, + total_rows=None, + destination=None, + max_results=None, + start_index=None, + page_size=None, + retry=DEFAULT_RETRY, + timeout=None, + ): + """List the rows of a completed query. + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults + Args: + job_id (str): + ID of a query job. + location (str): Location of the query job. + project (str): + ID of the project where the query job was run. + schema (Sequence[google.cloud.bigquery.schema.SchemaField]): + The fields expected in these query results. Used to convert + from JSON to expected Python types. + total_rows (Optional[int]): + Total number of rows in the query results. + destination (Optional[Union[ \ + google.cloud.bigquery.table.Table, \ + google.cloud.bigquery.table.TableListItem, \ + google.cloud.bigquery.table.TableReference, \ + str, \ + ]]): + Destination table reference. Used to fetch the query results + with the BigQuery Storage API. + max_results (Optional[int]): + Maximum number of rows to return across the whole iterator. + start_index (Optional[int]): + The zero-based index of the starting row to read. + page_size (Optional[int]): + The maximum number of rows in each page of results from this request. + Non-positive values are ignored. Defaults to a sensible value set by the API. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + If multiple requests are made under the hood, ``timeout`` + applies to each individual request. + Returns: + google.cloud.bigquery.table.RowIterator: + Iterator of row data + :class:`~google.cloud.bigquery.table.Row`-s. + """ + params = { + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": location, + } + + if start_index is not None: + params["startIndex"] = start_index + + row_iterator = RowIterator( + client=self, + api_request=functools.partial(self._call_api, retry, timeout=timeout), + path=f"/projects/{project}/queries/{job_id}", + schema=schema, + max_results=max_results, + page_size=page_size, + table=destination, + extra_params=params, + total_rows=total_rows, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index e25077360..1e2002eab 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -38,7 +38,6 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning @@ -1159,12 +1158,13 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - schema = self._query_results.schema - dest_table_ref = self.destination - dest_table = Table(dest_table_ref, schema=schema) - dest_table._properties["numRows"] = self._query_results.total_rows - rows = self._client.list_rows( - dest_table, + rows = self._client._list_rows_from_query_results( + self._query_results.job_id, + self.location, + self._query_results.project, + self._query_results.schema, + total_rows=self._query_results.total_rows, + destination=self.destination, page_size=page_size, max_results=max_results, start_index=start_index, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d6d966eee..e46b7e3cd 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1306,6 +1306,8 @@ class RowIterator(HTTPIterator): call the BigQuery Storage API to fetch rows. selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): A subset of columns to select from this table. + total_rows (Optional[int]): + Total number of rows in the table. """ @@ -1321,6 +1323,7 @@ def __init__( extra_params=None, table=None, selected_fields=None, + total_rows=None, ): super(RowIterator, self).__init__( client, @@ -1342,7 +1345,7 @@ def __init__( self._schema = schema self._selected_fields = selected_fields self._table = table - self._total_rows = getattr(table, "num_rows", None) + self._total_rows = total_rows def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1419,7 +1422,7 @@ def _to_arrow_iterable(self, bqstorage_client=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_arrow_tabledata_list, iter(self.pages), self.schema + _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema ) return self._to_page_iterable( bqstorage_download, @@ -1496,7 +1499,7 @@ def to_arrow( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -1582,7 +1585,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): selected_fields=self._selected_fields, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_dataframe_tabledata_list, + _pandas_helpers.download_dataframe_row_iterator, iter(self.pages), self.schema, dtypes, @@ -1680,7 +1683,7 @@ def to_dataframe( ) and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the tabledata.list endpoint.", + "reverting to fetching data with the REST endpoint.", stacklevel=2, ) create_bqstorage_client = False @@ -2167,7 +2170,7 @@ def _item_to_row(iterator, resource): ) -def _tabledata_list_page_columns(schema, response): +def _row_iterator_page_columns(schema, response): """Make a generator of all the columns in a page from tabledata.list. This enables creating a :class:`pandas.DataFrame` and other @@ -2197,7 +2200,7 @@ def _rows_page_start(iterator, page, response): """ # Make a (lazy) copy of the page in column-oriented format for use in data # science packages. - page._columns = _tabledata_list_page_columns(iterator._schema, response) + page._columns = _row_iterator_page_columns(iterator._schema, response) total_rows = response.get("totalRows") if total_rows is not None: diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index f928054f6..ea071c5ac 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -60,6 +60,7 @@ def _make_job_resource( endpoint="https://bigquery.googleapis.com", job_type="load", job_id="a-random-id", + location="US", project_id="some-project", user_email="bq-user@example.com", ): @@ -69,7 +70,11 @@ def _make_job_resource( "statistics": {"creationTime": creation_time_ms, job_type: {}}, "etag": etag, "id": "{}:{}".format(project_id, job_id), - "jobReference": {"projectId": project_id, "jobId": job_id}, + "jobReference": { + "projectId": project_id, + "jobId": job_id, + "location": location, + }, "selfLink": "{}/bigquery/v2/projects/{}/jobs/{}".format( endpoint, project_id, job_id ), @@ -130,7 +135,7 @@ def _table_ref(self, table_id): return TableReference(self.DS_REF, table_id) - def _make_resource(self, started=False, ended=False): + def _make_resource(self, started=False, ended=False, location="US"): self._setUpConstants() return _make_job_resource( creation_time_ms=int(self.WHEN_TS * 1000), @@ -144,6 +149,7 @@ def _make_resource(self, started=False, ended=False): job_id=self.JOB_ID, project_id=self.PROJECT, user_email=self.USER_EMAIL, + location=location, ) def _verifyInitialReadonlyProperties(self, job): diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 90d4388b8..12e2d4b8b 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -882,10 +882,14 @@ def test_done_already(self): def test_result_default_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="US", + started=True, + ended=True, ) conn = _make_connection( _make_retriable_exception(), @@ -907,7 +911,7 @@ def test_result_default_wo_state(self): reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "US"}, timeout=None, ) conn.api_request.assert_has_calls( @@ -916,38 +920,48 @@ def test_result_default_wo_state(self): def test_result_w_retry_wo_state(self): begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True + job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True ) done_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, started=True, ended=True + job_id=self.JOB_ID, + project_id=self.PROJECT, + location="EU", + started=True, + ended=True, ) conn = _make_connection( exceptions.NotFound("not normally retriable"), begun_job_resource, - # The call to done() / reload() does not get the custom retry - # policy passed to it, so we don't throw a non-retriable - # exception here. See: - # https://github.com/googleapis/python-bigquery/issues/24 - _make_retriable_exception(), + exceptions.NotFound("not normally retriable"), done_job_resource, ) client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one(self.JOB_ID, client) + job = self._make_one( + self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client + ) custom_predicate = mock.Mock() custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry(predicate=custom_predicate) + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + ) self.assertIs(job.result(retry=custom_retry), job) begin_call = mock.call( method="POST", path=f"/projects/{self.PROJECT}/jobs", - data={"jobReference": {"jobId": self.JOB_ID, "projectId": self.PROJECT}}, + data={ + "jobReference": { + "jobId": self.JOB_ID, + "projectId": self.PROJECT, + "location": "EU", + } + }, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) conn.api_request.assert_has_calls( diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index c0b90d8ea..daaf2e557 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -23,6 +23,7 @@ import requests from six.moves import http_client +from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query from .helpers import _Base from .helpers import _make_client @@ -40,8 +41,10 @@ def _get_target_class(): return QueryJob - def _make_resource(self, started=False, ended=False): - resource = super(TestQueryJob, self)._make_resource(started, ended) + def _make_resource(self, started=False, ended=False, location="US"): + resource = super(TestQueryJob, self)._make_resource( + started, ended, location=location + ) config = resource["configuration"]["query"] config["query"] = self.QUERY return resource @@ -770,22 +773,30 @@ def test_result(self): query_resource = { "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, } query_resource_done = { "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "jobReference": { + "projectId": self.PROJECT, + "jobId": self.JOB_ID, + "location": "EU", + }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="EU") + job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. "totalRows": "1", @@ -793,7 +804,7 @@ def test_result(self): "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( - query_resource, query_resource_done, job_resource_done, tabledata_resource + query_resource, query_resource_done, job_resource_done, query_page_resource ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -809,26 +820,30 @@ def test_result(self): # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) conn.api_request.assert_has_calls( - [query_results_call, query_results_call, reload_call, tabledata_call] + [query_results_call, query_results_call, reload_call, query_page_call] ) def test_result_with_done_job_calls_get_query_results(self): @@ -838,18 +853,18 @@ def test_result_with_done_job_calls_get_query_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "1", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", "tableId": "dest_table", } - tabledata_resource = { + results_page_resource = { "totalRows": "1", "pageToken": None, "rows": [{"f": [{"v": "abc"}]}], } - conn = _make_connection(query_resource_done, tabledata_resource) + conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -859,19 +874,23 @@ def test_result_with_done_job_calls_get_query_results(self): self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + path=query_results_path, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) - tabledata_call = mock.call( + query_results_page_call = mock.call( method="GET", - path="/projects/dest-project/datasets/dest_dataset/tables/dest_table/data", - query_params={}, + path=query_results_path, + query_params={ + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "EU", + }, timeout=None, ) - conn.api_request.assert_has_calls([query_results_call, tabledata_call]) + conn.api_request.assert_has_calls([query_results_call, query_results_page_call]) def test_result_with_max_results(self): from google.cloud.bigquery.table import RowIterator @@ -882,7 +901,7 @@ def test_result_with_max_results(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", } - tabledata_resource = { + query_page_resource = { "totalRows": "5", "pageToken": None, "rows": [ @@ -891,7 +910,7 @@ def test_result_with_max_results(self): {"f": [{"v": "ghi"}]}, ], } - connection = _make_connection(query_resource, tabledata_resource) + connection = _make_connection(query_resource, query_page_resource) client = _make_client(self.PROJECT, connection=connection) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -907,9 +926,9 @@ def test_result_with_max_results(self): self.assertEqual(len(rows), 3) self.assertEqual(len(connection.api_request.call_args_list), 2) - tabledata_list_request = connection.api_request.call_args_list[1] + query_page_request = connection.api_request.call_args_list[1] self.assertEqual( - tabledata_list_request[1]["query_params"]["maxResults"], max_results + query_page_request[1]["query_params"]["maxResults"], max_results ) def test_result_w_retry(self): @@ -925,8 +944,10 @@ def test_result_w_retry(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "2", } - job_resource = self._make_resource(started=True) - job_resource_done = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, location="asia-northeast1") + job_resource_done = self._make_resource( + started=True, ended=True, location="asia-northeast1" + ) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -958,13 +979,13 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={}, + query_params={"location": "asia-northeast1"}, timeout=None, ) @@ -1059,14 +1080,14 @@ def test_result_w_page_size(self): "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", } - job_resource = self._make_resource(started=True, ended=True) + job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] q_config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, "tableId": self.TABLE_ID, } - tabledata_resource = { + query_page_resource = { "totalRows": 4, "pageToken": "some-page-token", "rows": [ @@ -1075,9 +1096,9 @@ def test_result_w_page_size(self): {"f": [{"v": "row3"}]}, ], } - tabledata_resource_page_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} + query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]} conn = _make_connection( - query_results_resource, tabledata_resource, tabledata_resource_page_2 + query_results_resource, query_page_resource, query_page_resource_2 ) client = _make_client(self.PROJECT, connection=conn) job = self._get_target_class().from_api_repr(job_resource, client) @@ -1089,27 +1110,29 @@ def test_result_w_page_size(self): actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - tabledata_path = "/projects/%s/datasets/%s/tables/%s/data" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, + query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" + query_page_1_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="GET", - path=tabledata_path, - query_params={"maxResults": 3}, - timeout=None, - ), - mock.call( - method="GET", - path=tabledata_path, - query_params={"pageToken": "some-page-token", "maxResults": 3}, - timeout=None, - ), - ] + query_page_2_call = mock.call( + method="GET", + path=query_results_path, + query_params={ + "pageToken": "some-page-token", + "maxResults": 3, + "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, + "location": "US", + }, + timeout=None, ) + conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call]) def test_result_with_start_index(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index bdb1c56ea..ef0c40e1a 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1202,7 +1202,7 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): +def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1216,7 +1216,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): schema.SchemaField("alien_field", "ALIEN_FLOAT_TYPE"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1238,7 +1238,7 @@ def test_download_arrow_tabledata_list_unknown_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_known_field_type(module_under_test): +def test_download_arrow_row_iterator_known_field_type(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1252,7 +1252,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): schema.SchemaField("non_alien_field", "STRING"), ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, bq_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, bq_schema) with warnings.catch_warnings(record=True) as warned: result = next(results_gen) @@ -1273,7 +1273,7 @@ def test_download_arrow_tabledata_list_known_field_type(module_under_test): @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1287,7 +1287,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_arrow_tabledata_list(pages, dict_schema) + results_gen = module_under_test.download_arrow_row_iterator(pages, dict_schema) result = next(results_gen) assert len(result.columns) == 2 @@ -1301,7 +1301,7 @@ def test_download_arrow_tabledata_list_dict_sequence_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_test): +def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test): fake_page = api_core.page_iterator.Page( parent=mock.Mock(), items=[{"page_data": "foo"}], @@ -1315,7 +1315,7 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes {"name": "non_alien_field", "type": "STRING", "mode": "NULLABLE"}, ] - results_gen = module_under_test.download_dataframe_tabledata_list( + results_gen = module_under_test.download_dataframe_row_iterator( pages, dict_schema, dtypes={} ) result = next(results_gen) @@ -1335,5 +1335,5 @@ def test_download_dataframe_tabledata_list_dict_sequence_schema(module_under_tes def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): - dataframe = module_under_test._tabledata_list_page_to_dataframe([], [], {}) + dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e507834f6..ca2f7ea66 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6786,12 +6786,17 @@ def _bigquery_timestamp_float_repr(ts_float): age = SchemaField("age", "INTEGER", mode="NULLABLE") joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE") table = Table(self.TABLE_REF, schema=[full_name, age, joined]) + table._properties["numRows"] = 7 iterator = client.list_rows(table, timeout=7.5) + + # Check that initial total_rows is populated from the table. + self.assertEqual(iterator.total_rows, 7) page = six.next(iterator.pages) rows = list(page) - total_rows = iterator.total_rows - page_token = iterator.next_page_token + + # Check that total_rows is updated based on API response. + self.assertEqual(iterator.total_rows, ROWS) f2i = {"full_name": 0, "age": 1, "joined": 2} self.assertEqual(len(rows), 4) @@ -6799,8 +6804,7 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(rows[1], Row(("Bharney Rhubble", 33, WHEN_1), f2i)) self.assertEqual(rows[2], Row(("Wylma Phlyntstone", 29, WHEN_2), f2i)) self.assertEqual(rows[3], Row(("Bhettye Rhubble", None, None), f2i)) - self.assertEqual(total_rows, ROWS) - self.assertEqual(page_token, TOKEN) + self.assertEqual(iterator.next_page_token, TOKEN) conn.api_request.assert_called_once_with( method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index b2877845a..a7cf92919 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -170,7 +170,7 @@ def test_context_with_default_connection(): default_conn = make_connection(QUERY_RESOURCE, QUERY_RESULTS_RESOURCE) conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -235,7 +235,7 @@ def test_context_with_custom_connection(): default_conn = make_connection() conn_patch = mock.patch("google.cloud.bigquery.client.Connection", autospec=True) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) @@ -1078,7 +1078,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1117,7 +1117,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: @@ -1156,7 +1156,7 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): ) conn = magics.context._connection = make_connection(resource, query_results, data) list_rows_patch = mock.patch( - "google.cloud.bigquery.client.Client.list_rows", + "google.cloud.bigquery.client.Client._list_rows_from_query_results", return_value=google.cloud.bigquery.table._EmptyRowIterator(), ) with list_rows_patch, default_patch: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e21453b9f..e232f32e6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1572,10 +1572,7 @@ def test_constructor_with_table(self): from google.cloud.bigquery.table import Table table = Table("proj.dset.tbl") - table._properties["numRows"] = 100 - - iterator = self._make_one(table=table) - + iterator = self._make_one(table=table, total_rows=100) self.assertIs(iterator._table, table) self.assertEqual(iterator.total_rows, 100) @@ -1883,7 +1880,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() @@ -2667,7 +2664,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @@ -2703,7 +2700,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): for warning in warned if warning.category is UserWarning and "cannot use bqstorage_client" in str(warning).lower() - and "tabledata.list" in str(warning) + and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") mock_client._create_bqstorage_client.assert_not_called() From d1bf94e0c2c559f82793117e9e90e10ddb2cbdc5 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 5 Nov 2020 11:13:26 -0600 Subject: [PATCH 046/341] chore: release 2.3.0 (#351) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 384704bbf..cdcfbe81f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) + + +### Features + +* add `reload` argument to `*Job.done()` functions ([#341](https://www.github.com/googleapis/python-bigquery/issues/341)) ([e51fd45](https://www.github.com/googleapis/python-bigquery/commit/e51fd45fdb0481ac5d59cc0edbfa0750928b2596)) +* pass retry from Job.result() to Job.done() ([#41](https://www.github.com/googleapis/python-bigquery/issues/41)) ([284e17a](https://www.github.com/googleapis/python-bigquery/commit/284e17a17adf6844a17db2c6fed54a649b1f997e)) + + +### Bug Fixes + +* add missing spaces in opentelemetry log message ([#360](https://www.github.com/googleapis/python-bigquery/issues/360)) ([4f326b1](https://www.github.com/googleapis/python-bigquery/commit/4f326b1ca4411cfbf5ded86955a963d3e05a409f)) +* **dbapi:** avoid running % format with no query parameters ([#348](https://www.github.com/googleapis/python-bigquery/issues/348)) ([5dd1a5e](https://www.github.com/googleapis/python-bigquery/commit/5dd1a5e77f13b8e576e917069e247c5390a81900)) +* create_job method accepts dictionary arguments ([#300](https://www.github.com/googleapis/python-bigquery/issues/300)) ([155bacc](https://www.github.com/googleapis/python-bigquery/commit/155bacc156f181384ca6dba699ab83d0398176d1)) + + +### Performance Improvements + +* use `jobs.getQueryResults` to download result sets ([#363](https://www.github.com/googleapis/python-bigquery/issues/363)) ([0c3476d](https://www.github.com/googleapis/python-bigquery/commit/0c3476d56380d70115f6fd765bf5c5261967052f)) + + +### Documentation + +* add documents for QueryPlanEntry and QueryPlanEntryStep ([#344](https://www.github.com/googleapis/python-bigquery/issues/344)) ([dca2e4c](https://www.github.com/googleapis/python-bigquery/commit/dca2e4ca7c2ae183ac4bb60f653d425a43a86bea)) + ## [2.2.0](https://www.github.com/googleapis/python-bigquery/compare/v2.1.0...v2.2.0) (2020-10-19) From 0c387dadd57fba9cdbfd39abe530de209943db9a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 5 Nov 2020 11:42:06 -0600 Subject: [PATCH 047/341] chore: release v2.3.1 (#370) Follow-up to failed #351 release --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cdcfbe81f..787ba7557 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## 2.3.1 + +11-05-2020 09:27 PST + +### Internal / Testing Changes + +- update `google.cloud.bigquery.__version__` + ## [2.3.0](https://www.github.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.0) (2020-11-04) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index bd0f8e5c7..474ccbcf2 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.2.0" +__version__ = "2.3.1" From 30de15f7255de5ea221df4e8db7991d279e0ea28 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:11:21 -0600 Subject: [PATCH 048/341] perf: use `getQueryResults` from DB-API (#375) I suspect `list_rows` (`tabledata.list`) was being called directly due to no `page_size` parameter on `QueryJob.result` at the time. --- google/cloud/bigquery/dbapi/cursor.py | 7 +------ tests/unit/test_dbapi_cursor.py | 8 ++++++-- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 597313fd6..74f8aec4e 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -229,7 +229,6 @@ def _try_fetch(self, size=None): return if self._query_data is None: - client = self.connection._client bqstorage_client = self.connection._bqstorage_client if bqstorage_client is not None: @@ -237,11 +236,7 @@ def _try_fetch(self, size=None): self._query_data = _helpers.to_bq_table_rows(rows_iterable) return - rows_iter = client.list_rows( - self._query_job.destination, - selected_fields=self._query_job._query_results.schema, - page_size=self.arraysize, - ) + rows_iter = self._query_job.result(page_size=self.arraysize) self._query_data = iter(rows_iter) def _bqstorage_fetch(self, bqstorage_client): diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 5c3bfcae9..f55b3fd3f 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -66,8 +66,8 @@ def _mock_client( num_dml_affected_rows=num_dml_affected_rows, dry_run=dry_run_job, total_bytes_processed=total_bytes_processed, + rows=rows, ) - mock_client.list_rows.return_value = rows mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. @@ -102,9 +102,13 @@ def _mock_job( num_dml_affected_rows=None, dry_run=False, total_bytes_processed=0, + rows=None, ): from google.cloud.bigquery import job + if rows is None: + rows = [] + mock_job = mock.create_autospec(job.QueryJob) mock_job.error_result = None mock_job.state = "DONE" @@ -114,7 +118,7 @@ def _mock_job( mock_job.result.side_effect = exceptions.NotFound mock_job.total_bytes_processed = total_bytes_processed else: - mock_job.result.return_value = mock_job + mock_job.result.return_value = rows mock_job._query_results = self._mock_results( total_rows=total_rows, schema=schema, From cd9febd20c34983781386c3bf603e5fca7135695 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 11:48:10 -0600 Subject: [PATCH 049/341] deps: expand pyarrow dependencies to include version 2 (#368) Pyarrow 2.0 includes several bug fixes. The wire format remains the same, so it continues to be compatible with the BigQuery Storage API. --- setup.py | 4 ++-- tests/unit/test_table.py | 16 +++++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 548ceac09..48c4a7518 100644 --- a/setup.py +++ b/setup.py @@ -46,12 +46,12 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "pandas": [ "pandas>=0.23.0", # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 2.0dev", + "pyarrow >= 1.0.0, < 3.0dev", ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e232f32e6..eccc46a7a 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -19,6 +19,7 @@ import warnings import mock +import pkg_resources import pytest import six @@ -41,8 +42,11 @@ try: import pyarrow import pyarrow.types + + PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None + PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -52,6 +56,9 @@ from google.cloud.bigquery.dataset import DatasetReference +PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") + + def _mock_client(): from google.cloud.bigquery import client @@ -2339,12 +2346,19 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) + tzinfo = None + if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: + tzinfo = dt.timezone.utc + self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) self.assertEqual( list(df["some_timestamp"]), - [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + [ + dt.datetime(4567, 1, 1, tzinfo=tzinfo), + dt.datetime(9999, 12, 31, tzinfo=tzinfo), + ], ) @pytest.mark.xfail( From 86f6a516d1c7c5dc204ab085ea2578793e6561ff Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 10 Nov 2020 13:33:59 -0600 Subject: [PATCH 050/341] perf: cache first page of `jobs.getQueryResults` rows (#374) Co-authored-by: Steffany Brown <30247553+steffnay@users.noreply.github.com> --- google/cloud/bigquery/client.py | 4 +- google/cloud/bigquery/job/query.py | 85 ++++++++++++++++++----------- google/cloud/bigquery/table.py | 11 +++- tests/unit/job/test_query.py | 55 ++++++++++++++----- tests/unit/job/test_query_pandas.py | 16 ++---- tests/unit/test_client.py | 4 +- 6 files changed, 115 insertions(+), 60 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cd1474336..c67ef54e0 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1534,7 +1534,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {"maxResults": 0} + extra_params = {} if project is None: project = self.project @@ -3187,6 +3187,7 @@ def _list_rows_from_query_results( page_size=None, retry=DEFAULT_RETRY, timeout=None, + first_page_response=None, ): """List the rows of a completed query. See @@ -3247,6 +3248,7 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, + first_page_response=first_page_response, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 1e2002eab..6c9221043 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -990,48 +990,22 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Returns: bool: True if the job is complete, False otherwise. """ - is_done = ( - # Only consider a QueryJob complete when we know we have the final - # query results available. - self._query_results is not None - and self._query_results.complete - and self.state == _DONE_STATE - ) # Do not refresh if the state is already done, as the job will not # change once complete. + is_done = self.state == _DONE_STATE if not reload or is_done: return is_done - # Since the API to getQueryResults can hang up to the timeout value - # (default of 10 seconds), set the timeout parameter to ensure that - # the timeout from the futures API is respected. See: - # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 - timeout_ms = None - if self._done_timeout is not None: - # Subtract a buffer for context switching, network latency, etc. - api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS - api_timeout = max(min(api_timeout, 10), 0) - self._done_timeout -= api_timeout - self._done_timeout = max(0, self._done_timeout) - timeout_ms = int(api_timeout * 1000) + self._reload_query_results(retry=retry, timeout=timeout) # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout - self._query_results = self._client._get_query_results( - self.job_id, - retry, - project=self.project, - timeout_ms=timeout_ms, - location=self.location, - timeout=transport_timeout, - ) - # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. - if self._query_results.complete and self.state != _DONE_STATE: + if self._query_results.complete: self.reload(retry=retry, timeout=transport_timeout) return self.state == _DONE_STATE @@ -1098,6 +1072,45 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise + def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + """Refresh the cached query results. + + Args: + retry (Optional[google.api_core.retry.Retry]): + How to retry the call that retrieves query results. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + """ + if self._query_results and self._query_results.complete: + return + + # Since the API to getQueryResults can hang up to the timeout value + # (default of 10 seconds), set the timeout parameter to ensure that + # the timeout from the futures API is respected. See: + # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135 + timeout_ms = None + if self._done_timeout is not None: + # Subtract a buffer for context switching, network latency, etc. + api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS + api_timeout = max(min(api_timeout, 10), 0) + self._done_timeout -= api_timeout + self._done_timeout = max(0, self._done_timeout) + timeout_ms = int(api_timeout * 1000) + + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + self._query_results = self._client._get_query_results( + self.job_id, + retry, + project=self.project, + timeout_ms=timeout_ms, + location=self.location, + timeout=transport_timeout, + ) + def result( self, page_size=None, @@ -1144,6 +1157,11 @@ def result( """ try: super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self @@ -1158,10 +1176,14 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() + first_page_response = None + if max_results is None and page_size is None and start_index is None: + first_page_response = self._query_results._properties + rows = self._client._list_rows_from_query_results( - self._query_results.job_id, + self.job_id, self.location, - self._query_results.project, + self.project, self._query_results.schema, total_rows=self._query_results.total_rows, destination=self.destination, @@ -1170,6 +1192,7 @@ def result( start_index=start_index, retry=retry, timeout=timeout, + first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e46b7e3cd..c14a8adc4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1308,7 +1308,9 @@ class RowIterator(HTTPIterator): A subset of columns to select from this table. total_rows (Optional[int]): Total number of rows in the table. - + first_page_response (Optional[dict]): + API response for the first page of results. These are returned when + the first page is requested. """ def __init__( @@ -1324,6 +1326,7 @@ def __init__( table=None, selected_fields=None, total_rows=None, + first_page_response=None, ): super(RowIterator, self).__init__( client, @@ -1346,6 +1349,7 @@ def __init__( self._selected_fields = selected_fields self._table = table self._total_rows = total_rows + self._first_page_response = first_page_response def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1354,6 +1358,11 @@ def _get_next_page_response(self): Dict[str, object]: The parsed JSON response of the next page's contents. """ + if self._first_page_response: + response = self._first_page_response + self._first_page_response = None + return response + params = self._get_query_params() if self._page_size is not None: if self.page_number and "startIndex" in params: diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index daaf2e557..41e31f469 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -787,7 +787,9 @@ def test_result(self): "location": "EU", }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", + "totalRows": "3", + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "next-page", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -799,9 +801,9 @@ def test_result(self): query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. - "totalRows": "1", + "totalRows": "2", "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], + "rows": [{"f": [{"v": "def"}]}], } conn = _make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource @@ -812,19 +814,20 @@ def test_result(self): result = job.result() self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 2) + self.assertEqual(result.total_rows, 3) rows = list(result) - self.assertEqual(len(rows), 1) + self.assertEqual(len(rows), 2) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(rows[1].col1, "def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. - self.assertEqual(result.total_rows, 1) + self.assertEqual(result.total_rows, 2) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"maxResults": 0, "location": "EU"}, + query_params={"location": "EU"}, timeout=None, ) reload_call = mock.call( @@ -839,6 +842,7 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "pageToken": "next-page", }, timeout=None, ) @@ -851,7 +855,9 @@ def test_result_with_done_job_calls_get_query_results(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "1", + "totalRows": "2", + "rows": [{"f": [{"v": "abc"}]}], + "pageToken": "next-page", } job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { @@ -860,9 +866,9 @@ def test_result_with_done_job_calls_get_query_results(self): "tableId": "dest_table", } results_page_resource = { - "totalRows": "1", + "totalRows": "2", "pageToken": None, - "rows": [{"f": [{"v": "abc"}]}], + "rows": [{"f": [{"v": "def"}]}], } conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) @@ -871,14 +877,15 @@ def test_result_with_done_job_calls_get_query_results(self): result = job.result() rows = list(result) - self.assertEqual(len(rows), 1) + self.assertEqual(len(rows), 2) self.assertEqual(rows[0].col1, "abc") + self.assertEqual(rows[1].col1, "def") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"maxResults": 0, "location": "EU"}, + query_params={"location": "EU"}, timeout=None, ) query_results_page_call = mock.call( @@ -887,6 +894,7 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "pageToken": "next-page", }, timeout=None, ) @@ -900,6 +908,12 @@ def test_result_with_max_results(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", + # These rows are discarded because max_results is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } query_page_resource = { "totalRows": "5", @@ -925,6 +939,7 @@ def test_result_with_max_results(self): rows = list(result) self.assertEqual(len(rows), 3) + self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) query_page_request = connection.api_request.call_args_list[1] self.assertEqual( @@ -979,7 +994,7 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0, "location": "asia-northeast1"}, + query_params={"location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( @@ -1079,6 +1094,12 @@ def test_result_w_page_size(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", + # These rows are discarded because page_size is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] @@ -1109,6 +1130,7 @@ def test_result_w_page_size(self): # Assert actual_rows = list(result) self.assertEqual(len(actual_rows), 4) + self.assertEqual(actual_rows[0].col1, "row1") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_page_1_call = mock.call( @@ -1142,6 +1164,12 @@ def test_result_with_start_index(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", + # These rows are discarded because start_index is set. + "rows": [ + {"f": [{"v": "xyz"}]}, + {"f": [{"v": "uvw"}]}, + {"f": [{"v": "rst"}]}, + ], } tabledata_resource = { "totalRows": "5", @@ -1168,6 +1196,7 @@ def test_result_with_start_index(self): rows = list(result) self.assertEqual(len(rows), 4) + self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) tabledata_list_request = connection.api_request.call_args_list[1] self.assertEqual( diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 37f4a6dec..b0a652b78 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -161,8 +161,6 @@ def test_to_arrow(): }, ] }, - } - tabledata_resource = { "rows": [ { "f": [ @@ -176,13 +174,11 @@ def test_to_arrow(): {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, ] }, - ] + ], } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) @@ -234,20 +230,16 @@ def test_to_dataframe(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - } - tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] + ], } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection( - begun_resource, query_resource, done_resource, tabledata_resource - ) + connection = _make_connection(begun_resource, query_resource, done_resource) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca2f7ea66..dd57ee798 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): conn.api_request.assert_called_once_with( method="GET", path=path, - query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + query_params={"timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self): conn.api_request.assert_called_once_with( method="GET", path="/projects/PROJECT/queries/nothere", - query_params={"maxResults": 0, "location": self.LOCATION}, + query_params={"location": self.LOCATION}, timeout=None, ) From f9480dc2a1bc58367083176bd74725aa8b903301 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Tue, 10 Nov 2020 16:02:15 -0600 Subject: [PATCH 051/341] feat: allow routine references (#378) * feat: allow routine references in dataset access property * build: black formatting --- google/cloud/bigquery/dataset.py | 46 ++++++++++++++++++++++---------- tests/unit/test_dataset.py | 26 ++++++++++++++++++ 2 files changed, 58 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 9a80f30b5..ce07c8048 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -79,8 +79,9 @@ class AccessEntry(object): """Represents grant of an access role to an entity. An entry must have exactly one of the allowed :attr:`ENTITY_TYPES`. If - anything but ``view`` is set, a ``role`` is also required. ``role`` is - omitted for a ``view``, because ``view`` s are always read-only. + anything but ``view`` or ``routine`` are set, a ``role`` is also required. + ``role`` is omitted for ``view`` and ``routine``, because they are always + read-only. See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets. @@ -88,17 +89,17 @@ class AccessEntry(object): role (str): Role granted to the entity. The following string values are supported: `'READER'`, `'WRITER'`, `'OWNER'`. It may also be - :data:`None` if the ``entity_type`` is ``view``. + :data:`None` if the ``entity_type`` is ``view`` or ``routine``. entity_type (str): Type of entity being granted the role. One of :attr:`ENTITY_TYPES`. entity_id (Union[str, Dict[str, str]]): - If the ``entity_type`` is not 'view', the ``entity_id`` is the - ``str`` ID of the entity being granted the role. If the - ``entity_type`` is 'view', the ``entity_id`` is a ``dict`` - representing the view from a different dataset to grant access to - in the following format:: + If the ``entity_type`` is not 'view' or 'routine', the ``entity_id`` + is the ``str`` ID of the entity being granted the role. If the + ``entity_type`` is 'view' or 'routine', the ``entity_id`` is a ``dict`` + representing the view or routine from a different dataset to grant + access to in the following format for views:: { 'projectId': string, @@ -106,11 +107,19 @@ class AccessEntry(object): 'tableId': string } + For routines:: + + { + 'projectId': string, + 'datasetId': string, + 'routineId': string + } + Raises: ValueError: If the ``entity_type`` is not among :attr:`ENTITY_TYPES`, or if a - ``view`` has ``role`` set, or a non ``view`` **does not** have a - ``role`` set. + ``view`` or a ``routine`` has ``role`` set, or a non ``view`` and + non ``routine`` **does not** have a ``role`` set. Examples: >>> entry = AccessEntry('OWNER', 'userByEmail', 'user@example.com') @@ -124,7 +133,15 @@ class AccessEntry(object): """ ENTITY_TYPES = frozenset( - ["userByEmail", "groupByEmail", "domain", "specialGroup", "view", "iamMember"] + [ + "userByEmail", + "groupByEmail", + "domain", + "specialGroup", + "view", + "iamMember", + "routine", + ] ) """Allowed entity types.""" @@ -135,10 +152,11 @@ def __init__(self, role, entity_type, entity_id): ", ".join(self.ENTITY_TYPES), ) raise ValueError(message) - if entity_type == "view": + if entity_type in ("view", "routine"): if role is not None: raise ValueError( - "Role must be None for a view. Received " "role: %r" % (role,) + "Role must be None for a %r. Received " + "role: %r" % (entity_type, role) ) else: if role is None: @@ -409,7 +427,7 @@ def access_entries(self): entries. ``role`` augments the entity type and must be present **unless** the - entity type is ``view``. + entity type is ``view`` or ``routine``. Raises: TypeError: If 'value' is not a sequence diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index e4977a270..b3a53a08d 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -53,6 +53,21 @@ def test_ctor_view_success(self): self.assertEqual(entry.entity_type, entity_type) self.assertEqual(entry.entity_id, entity_id) + def test_ctor_routine_with_role(self): + role = "READER" + entity_type = "routine" + with self.assertRaises(ValueError): + self._make_one(role, entity_type, None) + + def test_ctor_routine_success(self): + role = None + entity_type = "routine" + entity_id = object() + entry = self._make_one(role, entity_type, entity_id) + self.assertEqual(entry.role, role) + self.assertEqual(entry.entity_type, entity_type) + self.assertEqual(entry.entity_id, entity_id) + def test_ctor_nonview_without_role(self): role = None entity_type = "userByEmail" @@ -115,6 +130,17 @@ def test_to_api_repr_view(self): exp_resource = {"view": view} self.assertEqual(resource, exp_resource) + def test_to_api_repr_routine(self): + routine = { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + entry = self._make_one(None, "routine", routine) + resource = entry.to_api_repr() + exp_resource = {"routine": routine} + self.assertEqual(resource, exp_resource) + def test_from_api_repr(self): resource = {"role": "OWNER", "userByEmail": "salmon@example.com"} entry = self._get_target_class().from_api_repr(resource) From c52b31789998fc0dfde07c3296650c85104d719d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Nov 2020 15:03:44 -0600 Subject: [PATCH 052/341] perf: avoid extra API calls from `to_dataframe` if all rows are cached (#384) Follow-up to previous PR, which cached the first page of `getQueryResults`. If the first page is the only page (no `pageToken`), then it is unnecessary to make extra API calls from `to_dataframe` or `to_arrow` to the BigQuery Storage API. --- google/cloud/bigquery/table.py | 56 ++++++++++++++++++++--------- tests/unit/job/test_query_pandas.py | 28 ++++++++++++--- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c14a8adc4..1ee36c7ea 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1351,6 +1351,41 @@ def __init__( self._total_rows = total_rows self._first_page_response = first_page_response + def _is_completely_cached(self): + """Check if all results are completely cached. + + This is useful to know, because we can avoid alternative download + mechanisms. + """ + if self._first_page_response is None or self.next_page_token: + return False + + return self._first_page_response.get(self._next_token) is None + + def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): + """Returns if the BigQuery Storage API can be used. + + Returns: + bool + True if the BigQuery Storage client can be used or created. + """ + using_bqstorage_api = bqstorage_client or create_bqstorage_client + if not using_bqstorage_api: + return False + + if self._is_completely_cached(): + return False + + if self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=2, + ) + return False + + return True + def _get_next_page_response(self): """Requests the next page from the path provided. @@ -1412,6 +1447,9 @@ def _get_progress_bar(self, progress_bar_type): def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): + if not self._validate_bqstorage(bqstorage_client, False): + bqstorage_client = None + if bqstorage_client is not None: for item in bqstorage_download(): yield item @@ -1503,14 +1541,7 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) - if ( - bqstorage_client or create_bqstorage_client - ) and self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1687,14 +1718,7 @@ def to_dataframe( if dtypes is None: dtypes = {} - if ( - bqstorage_client or create_bqstorage_client - ) and self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index b0a652b78..a481bff69 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -99,6 +99,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): ] }, "totalRows": "4", + "pageToken": "next-page", } connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) @@ -133,7 +134,16 @@ def test_to_dataframe_bqstorage_preserve_order(query): @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -def test_to_arrow(): +@pytest.mark.parametrize( + "method_kwargs", + [ + {"create_bqstorage_client": False}, + # Since all rows are contained in the first page of results, the BigQuery + # Storage API won't actually be used. + {"create_bqstorage_client": True}, + ], +) +def test_to_arrow(method_kwargs): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -182,7 +192,7 @@ def test_to_arrow(): client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - tbl = job.to_arrow(create_bqstorage_client=False) + tbl = job.to_arrow(**method_kwargs) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 @@ -216,7 +226,16 @@ def test_to_arrow(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +@pytest.mark.parametrize( + "method_kwargs", + [ + {"create_bqstorage_client": False}, + # Since all rows are contained in the first page of results, the BigQuery + # Storage API won't actually be used. + {"create_bqstorage_client": True}, + ], +) +def test_to_dataframe(method_kwargs): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -243,7 +262,7 @@ def test_to_dataframe(): client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(create_bqstorage_client=False) + df = job.to_dataframe(**method_kwargs) assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows @@ -288,6 +307,7 @@ def test_to_dataframe_bqstorage(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + "pageToken": "next-page", } connection = _make_connection(query_resource) client = _make_client(connection=connection) From b899ad12e17cb87c58d3ae46b4388d917c5743f2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 12 Nov 2020 17:22:09 -0600 Subject: [PATCH 053/341] fix(dbapi): allow rows to be fetched from scripts (#387) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `is_dml` logic is not needed now that we moved to `getQueryResults` instead of `tabledata.list` (https://github.com/googleapis/python-bigquery/pull/375). Previously, the destination table of a DML query would return a non-null value that was unreadable or would return nonsense with DML (and some DDL) queries. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #377 🦕 --- google/cloud/bigquery/dbapi/cursor.py | 8 ----- tests/system.py | 43 ++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 74f8aec4e..f48b47c12 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -220,14 +220,6 @@ def _try_fetch(self, size=None): self._query_data = iter([]) return - is_dml = ( - self._query_job.statement_type - and self._query_job.statement_type.upper() != "SELECT" - ) - if is_dml: - self._query_data = iter([]) - return - if self._query_data is None: bqstorage_client = self.connection._bqstorage_client diff --git a/tests/system.py b/tests/system.py index 68fcb918c..51a47c0b7 100644 --- a/tests/system.py +++ b/tests/system.py @@ -180,6 +180,7 @@ class Config(object): CLIENT = None CURSOR = None + DATASET = None def setUpModule(): @@ -189,7 +190,9 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): - self.to_delete = [] + Config.DATASET = _make_dataset_id("bq_system_tests") + dataset = Config.CLIENT.create_dataset(Config.DATASET) + self.to_delete = [dataset] def tearDown(self): def _still_in_use(bad_request): @@ -1790,6 +1793,44 @@ def test_dbapi_fetchall(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(1, 2), (3, 4), (5, 6)]) + def test_dbapi_fetchall_from_script(self): + query = """ + CREATE TEMP TABLE Example + ( + x INT64, + y STRING + ); + + INSERT INTO Example + VALUES (5, 'foo'), + (6, 'bar'), + (7, 'baz'); + + SELECT * + FROM Example + ORDER BY x ASC; + """ + + Config.CURSOR.execute(query) + self.assertEqual(Config.CURSOR.rowcount, 3, "expected 3 rows") + rows = Config.CURSOR.fetchall() + row_tuples = [r.values() for r in rows] + self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) + + def test_dbapi_create_view(self): + + query = """ + CREATE VIEW {}.dbapi_create_view + AS SELECT name, SUM(number) AS total + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name; + """.format( + Config.DATASET + ) + + Config.CURSOR.execute(query) + self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") + @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) From dc78eddde7a6a312c8fed7bace7d64036837ab1a Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 16 Nov 2020 11:46:28 -0500 Subject: [PATCH 054/341] feat: add progress bar to `QueryJob.to_dataframe` and `to_arrow` (#352) * feat: add progress bar for to_arrow method * feat: add progress bar for to_dataframe * feat: add default progress bar and unit test * feat: nit * feat: result timout for without queryplan --- google/cloud/bigquery/_tqdm_helpers.py | 94 +++++++++ google/cloud/bigquery/job/query.py | 7 +- google/cloud/bigquery/table.py | 41 +--- tests/unit/job/test_query_pandas.py | 261 +++++++++++++++++++++++++ tests/unit/test_table.py | 4 +- 5 files changed, 367 insertions(+), 40 deletions(-) create mode 100644 google/cloud/bigquery/_tqdm_helpers.py diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py new file mode 100644 index 000000000..bdecefe4a --- /dev/null +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -0,0 +1,94 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared helper functions for tqdm progress bar.""" + +import concurrent.futures +import time +import warnings + +try: + import tqdm +except ImportError: # pragma: NO COVER + tqdm = None + +_NO_TQDM_ERROR = ( + "A progress bar was requested, but there was an error loading the tqdm " + "library. Please install tqdm to use the progress bar functionality." +) + +_PROGRESS_BAR_UPDATE_INTERVAL = 0.5 + + +def get_progress_bar(progress_bar_type, description, total, unit): + """Construct a tqdm progress bar object, if tqdm is .""" + if tqdm is None: + if progress_bar_type is not None: + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + try: + if progress_bar_type == "tqdm": + return tqdm.tqdm(desc=description, total=total, unit=unit) + elif progress_bar_type == "tqdm_notebook": + return tqdm.tqdm_notebook(desc=description, total=total, unit=unit) + elif progress_bar_type == "tqdm_gui": + return tqdm.tqdm_gui(desc=description, total=total, unit=unit) + except (KeyError, TypeError): + # Protect ourselves from any tqdm errors. In case of + # unexpected tqdm behavior, just fall back to showing + # no progress bar. + warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) + return None + + +def wait_for_query(query_job, progress_bar_type=None): + """Return query result and display a progress bar while the query running, if tqdm is installed.""" + if progress_bar_type is None: + return query_job.result() + + default_total = 1 + current_stage = None + start_time = time.time() + progress_bar = get_progress_bar( + progress_bar_type, "Query is running", default_total, "query" + ) + i = 0 + while True: + if query_job.query_plan: + default_total = len(query_job.query_plan) + current_stage = query_job.query_plan[i] + progress_bar.total = len(query_job.query_plan) + progress_bar.set_description( + "Query executing stage {} and status {} : {:0.2f}s".format( + current_stage.name, current_stage.status, time.time() - start_time, + ), + ) + try: + query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + progress_bar.update(default_total) + progress_bar.set_description( + "Query complete after {:0.2f}s".format(time.time() - start_time), + ) + break + except concurrent.futures.TimeoutError: + query_job.reload() # Refreshes the state via a GET request. + if current_stage: + if current_stage.status == "COMPLETE": + if i < default_total - 1: + progress_bar.update(i + 1) + i += 1 + continue + progress_bar.close() + return query_result diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 6c9221043..7a1a74954 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -40,6 +40,7 @@ from google.cloud.bigquery.table import _table_arg_to_table_ref from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning +from google.cloud.bigquery._tqdm_helpers import wait_for_query from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _DONE_STATE @@ -1259,7 +1260,8 @@ def to_arrow( ..versionadded:: 1.17.0 """ - return self.result().to_arrow( + query_result = wait_for_query(self, progress_bar_type) + return query_result.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, @@ -1328,7 +1330,8 @@ def to_dataframe( Raises: ValueError: If the `pandas` library cannot be imported. """ - return self.result().to_dataframe( + query_result = wait_for_query(self, progress_bar_type) + return query_result.to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, progress_bar_type=progress_bar_type, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 1ee36c7ea..4bfedd758 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -36,11 +36,6 @@ except ImportError: # pragma: NO COVER pyarrow = None -try: - import tqdm -except ImportError: # pragma: NO COVER - tqdm = None - import google.api_core.exceptions from google.api_core.page_iterator import HTTPIterator @@ -50,6 +45,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery._tqdm_helpers import get_progress_bar from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration @@ -68,10 +64,7 @@ "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." ) -_NO_TQDM_ERROR = ( - "A progress bar was requested, but there was an error loading the tqdm " - "library. Please install tqdm to use the progress bar functionality." -) + _TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' @@ -1418,32 +1411,6 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows - def _get_progress_bar(self, progress_bar_type): - """Construct a tqdm progress bar object, if tqdm is installed.""" - if tqdm is None: - if progress_bar_type is not None: - warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) - return None - - description = "Downloading" - unit = "rows" - - try: - if progress_bar_type == "tqdm": - return tqdm.tqdm(desc=description, total=self.total_rows, unit=unit) - elif progress_bar_type == "tqdm_notebook": - return tqdm.tqdm_notebook( - desc=description, total=self.total_rows, unit=unit - ) - elif progress_bar_type == "tqdm_gui": - return tqdm.tqdm_gui(desc=description, total=self.total_rows, unit=unit) - except (KeyError, TypeError): - # Protect ourselves from any tqdm errors. In case of - # unexpected tqdm behavior, just fall back to showing - # no progress bar. - warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) - return None - def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1551,7 +1518,9 @@ def to_arrow( owns_bqstorage_client = bqstorage_client is not None try: - progress_bar = self._get_progress_bar(progress_bar_type) + progress_bar = get_progress_bar( + progress_bar_type, "Downloading", self.total_rows, "rows" + ) record_batches = [] for record_batch in self._to_arrow_iterable( diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index a481bff69..f9d823eb0 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import copy import json @@ -225,6 +226,154 @@ def test_to_arrow(method_kwargs): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_w_query_plan(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[ + concurrent.futures.TimeoutError, + concurrent.futures.TimeoutError, + row_iterator, + ], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 3 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_w_pending_status(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "PENDING"}, + {"name": "S00: Input", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_arrow_w_tqdm_wo_query_plan(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + + begun_resource = _make_job_resource(job_type="query") + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + result_patch_tqdm.assert_called() + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.parametrize( "method_kwargs", @@ -460,3 +609,115 @@ def test_to_dataframe_with_progress_bar(tqdm_mock): job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) tqdm_mock.assert_called() + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm_pending(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "PRNDING"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 2 + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df) == ["name", "age"] # verify the column names + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, + ] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[ + concurrent.futures.TimeoutError, + concurrent.futures.TimeoutError, + row_iterator, + ], + ) + + with result_patch as result_patch_tqdm, reload_patch: + df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False) + + assert result_patch_tqdm.call_count == 3 + assert isinstance(df, pandas.DataFrame) + assert len(df) == 4 # verify the number of rows + assert list(df), ["name", "age"] # verify the column names + result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index eccc46a7a..be67eafcd 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2433,7 +2433,7 @@ def test_to_dataframe_progress_bar( self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm_no_progress_bar(self): from google.cloud.bigquery.schema import SchemaField @@ -2461,7 +2461,7 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): self.assertEqual(len(df), 4) @unittest.skipIf(pandas is None, "Requires `pandas`") - @mock.patch("google.cloud.bigquery.table.tqdm", new=None) + @mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None) def test_to_dataframe_no_tqdm(self): from google.cloud.bigquery.schema import SchemaField From 9a9db54cd09f4f70aff02b18a7514faf2edf99b6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 16 Nov 2020 14:39:01 -0600 Subject: [PATCH 055/341] test: fix DML system tests (#388) --- tests/system.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/system.py b/tests/system.py index 51a47c0b7..e347c8a70 100644 --- a/tests/system.py +++ b/tests/system.py @@ -249,7 +249,7 @@ def test_close_releases_open_sockets(self): client.close() conn_count_end = len(current_process.connections()) - self.assertEqual(conn_count_end, conn_count_start) + self.assertLessEqual(conn_count_end, conn_count_start) def test_create_dataset(self): DATASET_ID = _make_dataset_id("create_dataset") @@ -1972,7 +1972,9 @@ def test_query_w_dml(self): def test_dbapi_w_dml(self): dataset_name = _make_dataset_id("dml_dbapi") table_name = "test_table" - self._load_table_for_dml([("Hello World",)], dataset_name, table_name) + self._load_table_for_dml( + [("こんにちは",), ("Hello World",), ("Howdy!",)], dataset_name, table_name + ) query_template = """UPDATE {}.{} SET greeting = 'Guten Tag' WHERE greeting = 'Hello World' @@ -1983,7 +1985,6 @@ def test_dbapi_w_dml(self): job_id="test_dbapi_w_dml_{}".format(str(uuid.uuid4())), ) self.assertEqual(Config.CURSOR.rowcount, 1) - self.assertIsNone(Config.CURSOR.fetchone()) def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig From 168f0ecd078f92f34dc731da984551c671c57d49 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 16 Nov 2020 16:05:35 -0600 Subject: [PATCH 056/341] chore: release 2.4.0 (#381) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 25 +++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 787ba7557..03a465926 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.4.0](https://www.github.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) (2020-11-16) + + +### Features + +* add progress bar to `QueryJob.to_dataframe` and `to_arrow` ([#352](https://www.github.com/googleapis/python-bigquery/issues/352)) ([dc78edd](https://www.github.com/googleapis/python-bigquery/commit/dc78eddde7a6a312c8fed7bace7d64036837ab1a)) +* allow routine references ([#378](https://www.github.com/googleapis/python-bigquery/issues/378)) ([f9480dc](https://www.github.com/googleapis/python-bigquery/commit/f9480dc2a1bc58367083176bd74725aa8b903301)) + + +### Bug Fixes + +* **dbapi:** allow rows to be fetched from scripts ([#387](https://www.github.com/googleapis/python-bigquery/issues/387)) ([b899ad1](https://www.github.com/googleapis/python-bigquery/commit/b899ad12e17cb87c58d3ae46b4388d917c5743f2)), closes [#377](https://www.github.com/googleapis/python-bigquery/issues/377) + + +### Performance Improvements + +* avoid extra API calls from `to_dataframe` if all rows are cached ([#384](https://www.github.com/googleapis/python-bigquery/issues/384)) ([c52b317](https://www.github.com/googleapis/python-bigquery/commit/c52b31789998fc0dfde07c3296650c85104d719d)) +* cache first page of `jobs.getQueryResults` rows ([#374](https://www.github.com/googleapis/python-bigquery/issues/374)) ([86f6a51](https://www.github.com/googleapis/python-bigquery/commit/86f6a516d1c7c5dc204ab085ea2578793e6561ff)) +* use `getQueryResults` from DB-API ([#375](https://www.github.com/googleapis/python-bigquery/issues/375)) ([30de15f](https://www.github.com/googleapis/python-bigquery/commit/30de15f7255de5ea221df4e8db7991d279e0ea28)) + + +### Dependencies + +* expand pyarrow dependencies to include version 2 ([#368](https://www.github.com/googleapis/python-bigquery/issues/368)) ([cd9febd](https://www.github.com/googleapis/python-bigquery/commit/cd9febd20c34983781386c3bf603e5fca7135695)) + ## 2.3.1 11-05-2020 09:27 PST diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 474ccbcf2..fe11624d9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.3.1" +__version__ = "2.4.0" From 809e4a27b94ba30c10e0c9a7e89576a9de9fda2b Mon Sep 17 00:00:00 2001 From: Ryan Yuan Date: Wed, 18 Nov 2020 03:04:07 +1100 Subject: [PATCH 057/341] docs(samples): add more clustering code snippets (#330) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add sample code for creating a clustered table from a query result. File: samples/client_query_destination_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_from_a_query_result) Add sample code for creating a clustered table when you load data. File: samples/load_table_clustered.py Section: [https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data](https://cloud.google.com/bigquery/docs/creating-clustered-tables#creating_a_clustered_table_when_you_load_data) Fixes #329 🦕 --- docs/usage/tables.rst | 17 ++++++ google/cloud/bigquery/__init__.py | 4 +- ...lient_query_destination_table_clustered.py | 43 +++++++++++++++ samples/load_table_clustered.py | 55 +++++++++++++++++++ ...lient_query_destination_table_clustered.py | 27 +++++++++ samples/tests/test_load_table_clustered.py | 27 +++++++++ 6 files changed, 172 insertions(+), 1 deletion(-) create mode 100644 samples/client_query_destination_table_clustered.py create mode 100644 samples/load_table_clustered.py create mode 100644 samples/tests/test_client_query_destination_table_clustered.py create mode 100644 samples/tests/test_load_table_clustered.py diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst index 7afca05e2..d924fe214 100644 --- a/docs/usage/tables.rst +++ b/docs/usage/tables.rst @@ -85,6 +85,23 @@ Load table data from a file with the :start-after: [START bigquery_load_from_file] :end-before: [END bigquery_load_from_file] +Creating a clustered table from a query result: + +.. literalinclude:: ../samples/client_query_destination_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_clustered_table] + :end-before: [END bigquery_query_clustered_table] + +Creating a clustered table when you load data with the +:func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: + +.. literalinclude:: ../samples/load_table_clustered.py + :language: python + :dedent: 4 + :start-after: [START bigquery_load_table_clustered] + :end-before: [END bigquery_load_table_clustered] + Load a CSV file from Cloud Storage with the :func:`~google.cloud.bigquery.client.Client.load_table_from_uri` method: diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index b8d1cc4d7..41f987228 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions @@ -137,8 +138,9 @@ "Encoding", "QueryPriority", "SchemaUpdateOption", - "StandardSqlDataTypes", "SourceFormat", + "SqlTypeNames", + "StandardSqlDataTypes", "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py new file mode 100644 index 000000000..5a109ed10 --- /dev/null +++ b/samples/client_query_destination_table_clustered.py @@ -0,0 +1,43 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def client_query_destination_table_clustered(table_id): + + # [START bigquery_query_clustered_table] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the destination table. + # table_id = "your-project.your_dataset.your_table_name" + + sql = "SELECT * FROM `bigquery-public-data.samples.shakespeare`" + cluster_fields = ["corpus"] + + job_config = bigquery.QueryJobConfig( + clustering_fields=cluster_fields, destination=table_id + ) + + # Start the query, passing in the extra configuration. + query_job = client.query(sql, job_config=job_config) # Make an API request. + query_job.result() # Wait for the job to complete. + + table = client.get_table(table_id) # Make an API request. + if table.clustering_fields == cluster_fields: + print( + "The destination table is written using the cluster_fields configuration." + ) + # [END bigquery_query_clustered_table] diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py new file mode 100644 index 000000000..20d412cb3 --- /dev/null +++ b/samples/load_table_clustered.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_clustered(table_id): + + # [START bigquery_load_table_clustered] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + # TODO(developer): Set table_id to the ID of the table to create. + # table_id = "your-project.your_dataset.your_table_name" + + job_config = bigquery.LoadJobConfig( + skip_leading_rows=1, + source_format=bigquery.SourceFormat.CSV, + schema=[ + bigquery.SchemaField("timestamp", bigquery.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("origin", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("destination", bigquery.SqlTypeNames.STRING), + bigquery.SchemaField("amount", bigquery.SqlTypeNames.NUMERIC), + ], + time_partitioning=bigquery.TimePartitioning(field="timestamp"), + clustering_fields=["origin", "destination"], + ) + + job = client.load_table_from_uri( + ["gs://cloud-samples-data/bigquery/sample-transactions/transactions.csv"], + table_id, + job_config=job_config, + ) + + job.result() # Waits for the job to complete. + + table = client.get_table(table_id) # Make an API request. + print( + "Loaded {} rows and {} columns to {}".format( + table.num_rows, len(table.schema), table_id + ) + ) + # [END bigquery_load_table_clustered] + return table diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py new file mode 100644 index 000000000..b4bdd588c --- /dev/null +++ b/samples/tests/test_client_query_destination_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import client_query_destination_table_clustered + + +def test_client_query_destination_table_clustered(capsys, random_table_id): + + client_query_destination_table_clustered.client_query_destination_table_clustered( + random_table_id + ) + out, err = capsys.readouterr() + assert ( + "The destination table is written using the cluster_fields configuration." + in out + ) diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py new file mode 100644 index 000000000..bafdc2051 --- /dev/null +++ b/samples/tests/test_load_table_clustered.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .. import load_table_clustered + + +def test_load_table_clustered(capsys, random_table_id, client): + + table = load_table_clustered.load_table_clustered(random_table_id) + + out, _ = capsys.readouterr() + assert "rows and 4 columns" in out + + rows = list(client.list_rows(table)) # Make an API request. + assert len(rows) > 0 + assert table.clustering_fields == ["origin", "destination"] From c6359d9d5acda3017382961cffca346e7093e0ae Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 17 Nov 2020 23:38:45 +0100 Subject: [PATCH 058/341] chore(deps): update dependency matplotlib to v3.3.3 (#385) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bf895a1ae..2c1fb38c9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.2 grpcio==1.33.2 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.2 +matplotlib==3.3.3 pandas==1.1.4 pyarrow==1.0.1 pytz==2020.1 From 273d58ee19e1305d09f5aff31bbec358ed71ce59 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 17 Nov 2020 23:54:04 +0100 Subject: [PATCH 059/341] chore(deps): update dependency google-cloud-bigquery to v2.3.1 (#371) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.2.0` -> `==2.3.1` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.3.1`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​231) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.2.0...v2.3.1) 11-05-2020 09:27 PST ##### Internal / Testing Changes - update `google.cloud.bigquery.__version__`
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2c1fb38c9..509a61ade 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.2.0 +google-cloud-bigquery==2.3.1 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Mon, 23 Nov 2020 14:37:45 -0500 Subject: [PATCH 060/341] fix: avoid floating point for timestamp in `insert_rows` (#393) * fix: timestamp precision in insert_rows * fix: remove floating point coversion and add datetime format * fix: add formatted string in unit tests --- google/cloud/bigquery/_helpers.py | 9 +++------ tests/unit/test__helpers.py | 10 +++++++--- tests/unit/test_client.py | 16 ++++++++-------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index b59b3d794..35129d844 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -23,7 +23,7 @@ from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date from google.cloud._helpers import _datetime_from_microseconds -from google.cloud._helpers import _microseconds_from_datetime +from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes @@ -313,12 +313,9 @@ def _timestamp_to_json_parameter(value): def _timestamp_to_json_row(value): - """Coerce 'value' to an JSON-compatible representation. - - This version returns floating-point seconds value used in row data. - """ + """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): - value = _microseconds_from_datetime(value) * 1e-6 + value = value.strftime(_RFC3339_MICROS) return value diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 16c4fb8a5..a52581501 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -728,10 +728,14 @@ def test_w_string(self): self.assertEqual(self._call_fut(ZULU), ZULU) def test_w_datetime(self): - from google.cloud._helpers import _microseconds_from_datetime - when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) - self.assertEqual(self._call_fut(when), _microseconds_from_datetime(when) / 1e6) + self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z") + + def test_w_datetime_w_utc_zone(self): + from google.cloud._helpers import UTC + + when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC) + self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z") class Test_datetime_to_json(unittest.TestCase): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dd57ee798..4fba1150c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5804,7 +5804,7 @@ def test_insert_rows_w_schema(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 @@ -5834,7 +5834,7 @@ def _row_data(row): result = {"full_name": row[0], "age": str(row[1])} joined = row[2] if isinstance(joined, datetime.datetime): - joined = _microseconds_from_datetime(joined) * 1e-6 + joined = joined.strftime(_RFC3339_MICROS) if joined is not None: result["joined"] = joined return result @@ -5864,7 +5864,7 @@ def test_insert_rows_w_list_of_dictionaries(self): import datetime from google.cloud._helpers import UTC from google.cloud._helpers import _datetime_to_rfc3339 - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5910,7 +5910,7 @@ def _row_data(row): row = copy.deepcopy(row) del row["joined"] elif isinstance(joined, datetime.datetime): - row["joined"] = _microseconds_from_datetime(joined) * 1e-6 + row["joined"] = joined.strftime(_RFC3339_MICROS) row["age"] = str(row["age"]) return row @@ -6109,16 +6109,16 @@ def test_insert_rows_w_repeated_fields(self): { "score": "12", "times": [ - 1543665600.0, # 2018-12-01 12:00 UTC - 1543669200.0, # 2018-12-01 13:00 UTC + "2018-12-01T12:00:00.000000Z", + "2018-12-01T13:00:00.000000Z", ], "distances": [1.25, 2.5], }, { "score": "13", "times": [ - 1543752000.0, # 2018-12-02 12:00 UTC - 1543755600.0, # 2018-12-02 13:00 UTC + "2018-12-02T12:00:00.000000Z", + "2018-12-02T13:00:00.000000Z", ], "distances": [-1.25, -2.5], }, From 673a9cb51c577c1dd016e76f3634b1e9e21482c5 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Tue, 24 Nov 2020 16:28:22 -0500 Subject: [PATCH 061/341] deps: update required version of opentelementry for opentelemetry-exporter-google-cloud (#398) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 48c4a7518..5f4e506eb 100644 --- a/setup.py +++ b/setup.py @@ -55,9 +55,9 @@ ], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.9b0", - "opentelemetry-sdk==0.9b0", - "opentelemetry-instrumentation==0.9b0 ", + "opentelemetry-api==0.11b0", + "opentelemetry-sdk==0.11b0", + "opentelemetry-instrumentation==0.11b0", ], } From 730df17ae1ab0b0bb2454f3c134c8f62665bc51b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 24 Nov 2020 16:44:02 -0600 Subject: [PATCH 062/341] perf: don't fetch rows when waiting for query to finish (#400) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are large result sets, fetching rows while waiting for the query to finish can cause the API to hang indefinitely. (This may be due to an interaction between connection timeout and API timeout.) This reverts commit 86f6a516d1c7c5dc204ab085ea2578793e6561ff (#374). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes https://github.com/pydata/pandas-gbq/issues/343 Fixes #394 🦕 --- google/cloud/bigquery/client.py | 4 +- google/cloud/bigquery/job/query.py | 5 --- tests/unit/job/test_query.py | 55 +++++++--------------------- tests/unit/job/test_query_pandas.py | 44 ++++++++-------------- tests/unit/test_client.py | 4 +- tests/unit/test_table.py | 57 +++++++++++++++++++++++++++++ 6 files changed, 89 insertions(+), 80 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c67ef54e0..cd1474336 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1534,7 +1534,7 @@ def _get_query_results( A new ``_QueryResults`` instance. """ - extra_params = {} + extra_params = {"maxResults": 0} if project is None: project = self.project @@ -3187,7 +3187,6 @@ def _list_rows_from_query_results( page_size=None, retry=DEFAULT_RETRY, timeout=None, - first_page_response=None, ): """List the rows of a completed query. See @@ -3248,7 +3247,6 @@ def _list_rows_from_query_results( table=destination, extra_params=params, total_rows=total_rows, - first_page_response=first_page_response, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 7a1a74954..9e8908613 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1177,10 +1177,6 @@ def result( if self._query_results.total_rows is None: return _EmptyRowIterator() - first_page_response = None - if max_results is None and page_size is None and start_index is None: - first_page_response = self._query_results._properties - rows = self._client._list_rows_from_query_results( self.job_id, self.location, @@ -1193,7 +1189,6 @@ def result( start_index=start_index, retry=retry, timeout=timeout, - first_page_response=first_page_response, ) rows._preserve_order = _contains_order_by(self.query) return rows diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 41e31f469..daaf2e557 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -787,9 +787,7 @@ def test_result(self): "location": "EU", }, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "3", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "2", } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") @@ -801,9 +799,9 @@ def test_result(self): query_page_resource = { # Explicitly set totalRows to be different from the initial # response to test update during iteration. - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection( query_resource, query_resource_done, job_resource_done, query_page_resource @@ -814,20 +812,19 @@ def test_result(self): result = job.result() self.assertIsInstance(result, RowIterator) - self.assertEqual(result.total_rows, 3) + self.assertEqual(result.total_rows, 2) rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. - self.assertEqual(result.total_rows, 2) + self.assertEqual(result.total_rows, 1) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) reload_call = mock.call( @@ -842,7 +839,6 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -855,9 +851,7 @@ def test_result_with_done_job_calls_get_query_results(self): "jobComplete": True, "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - "rows": [{"f": [{"v": "abc"}]}], - "pageToken": "next-page", + "totalRows": "1", } job_resource = self._make_resource(started=True, ended=True, location="EU") job_resource["configuration"]["query"]["destinationTable"] = { @@ -866,9 +860,9 @@ def test_result_with_done_job_calls_get_query_results(self): "tableId": "dest_table", } results_page_resource = { - "totalRows": "2", + "totalRows": "1", "pageToken": None, - "rows": [{"f": [{"v": "def"}]}], + "rows": [{"f": [{"v": "abc"}]}], } conn = _make_connection(query_resource_done, results_page_resource) client = _make_client(self.PROJECT, connection=conn) @@ -877,15 +871,14 @@ def test_result_with_done_job_calls_get_query_results(self): result = job.result() rows = list(result) - self.assertEqual(len(rows), 2) + self.assertEqual(len(rows), 1) self.assertEqual(rows[0].col1, "abc") - self.assertEqual(rows[1].col1, "def") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( method="GET", path=query_results_path, - query_params={"location": "EU"}, + query_params={"maxResults": 0, "location": "EU"}, timeout=None, ) query_results_page_call = mock.call( @@ -894,7 +887,6 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", - "pageToken": "next-page", }, timeout=None, ) @@ -908,12 +900,6 @@ def test_result_with_max_results(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because max_results is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } query_page_resource = { "totalRows": "5", @@ -939,7 +925,6 @@ def test_result_with_max_results(self): rows = list(result) self.assertEqual(len(rows), 3) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) query_page_request = connection.api_request.call_args_list[1] self.assertEqual( @@ -994,7 +979,7 @@ def test_result_w_retry(self): query_results_call = mock.call( method="GET", path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"location": "asia-northeast1"}, + query_params={"maxResults": 0, "location": "asia-northeast1"}, timeout=None, ) reload_call = mock.call( @@ -1094,12 +1079,6 @@ def test_result_w_page_size(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "4", - # These rows are discarded because page_size is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } job_resource = self._make_resource(started=True, ended=True, location="US") q_config = job_resource["configuration"]["query"] @@ -1130,7 +1109,6 @@ def test_result_w_page_size(self): # Assert actual_rows = list(result) self.assertEqual(len(actual_rows), 4) - self.assertEqual(actual_rows[0].col1, "row1") query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_page_1_call = mock.call( @@ -1164,12 +1142,6 @@ def test_result_with_start_index(self): "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, "totalRows": "5", - # These rows are discarded because start_index is set. - "rows": [ - {"f": [{"v": "xyz"}]}, - {"f": [{"v": "uvw"}]}, - {"f": [{"v": "rst"}]}, - ], } tabledata_resource = { "totalRows": "5", @@ -1196,7 +1168,6 @@ def test_result_with_start_index(self): rows = list(result) self.assertEqual(len(rows), 4) - self.assertEqual(rows[0].col1, "abc") self.assertEqual(len(connection.api_request.call_args_list), 2) tabledata_list_request = connection.api_request.call_args_list[1] self.assertEqual( diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index f9d823eb0..cdd6f2b3c 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -100,7 +100,6 @@ def test_to_dataframe_bqstorage_preserve_order(query): ] }, "totalRows": "4", - "pageToken": "next-page", } connection = _make_connection(get_query_results_resource, job_resource) client = _make_client(connection=connection) @@ -135,16 +134,7 @@ def test_to_dataframe_bqstorage_preserve_order(query): @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_arrow(method_kwargs): +def test_to_arrow(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -172,6 +162,8 @@ def test_to_arrow(method_kwargs): }, ] }, + } + tabledata_resource = { "rows": [ { "f": [ @@ -185,15 +177,17 @@ def test_to_arrow(method_kwargs): {"v": {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}}, ] }, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - tbl = job.to_arrow(**method_kwargs) + tbl = job.to_arrow(create_bqstorage_client=False) assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 @@ -375,16 +369,7 @@ def test_to_arrow_w_tqdm_wo_query_plan(): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.parametrize( - "method_kwargs", - [ - {"create_bqstorage_client": False}, - # Since all rows are contained in the first page of results, the BigQuery - # Storage API won't actually be used. - {"create_bqstorage_client": True}, - ], -) -def test_to_dataframe(method_kwargs): +def test_to_dataframe(): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") @@ -398,20 +383,24 @@ def test_to_dataframe(method_kwargs): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, + } + tabledata_resource = { "rows": [ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ], + ] } done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} - connection = _make_connection(begun_resource, query_resource, done_resource) + connection = _make_connection( + begun_resource, query_resource, done_resource, tabledata_resource + ) client = _make_client(connection=connection) job = target_class.from_api_repr(begun_resource, client) - df = job.to_dataframe(**method_kwargs) + df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows @@ -456,7 +445,6 @@ def test_to_dataframe_bqstorage(): {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, ] }, - "pageToken": "next-page", } connection = _make_connection(query_resource) client = _make_client(connection=connection) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4fba1150c..c4bdea2f8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -319,7 +319,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): conn.api_request.assert_called_once_with( method="GET", path=path, - query_params={"timeoutMs": 500, "location": self.LOCATION}, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, timeout=42, ) @@ -336,7 +336,7 @@ def test__get_query_results_miss_w_client_location(self): conn.api_request.assert_called_once_with( method="GET", path="/projects/PROJECT/queries/nothere", - query_params={"location": self.LOCATION}, + query_params={"maxResults": 0, "location": self.LOCATION}, timeout=None, ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index be67eafcd..1dd5fab46 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1630,6 +1630,40 @@ def test_iterate(self): api_request.assert_called_once_with(method="GET", path=path, query_params={}) + def test_iterate_with_cached_first_page(self): + from google.cloud.bigquery.schema import SchemaField + + first_page = { + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + ], + "pageToken": "next-page", + } + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = self._make_one( + _mock_client(), api_request, path, schema, first_page_response=first_page + ) + rows = list(row_iterator) + self.assertEqual(len(rows), 4) + self.assertEqual(rows[0].age, 27) + self.assertEqual(rows[1].age, 28) + self.assertEqual(rows[2].age, 32) + self.assertEqual(rows[3].age, 33) + + api_request.assert_called_once_with( + method="GET", path=path, query_params={"pageToken": "next-page"} + ) + def test_page_size(self): from google.cloud.bigquery.schema import SchemaField @@ -1655,6 +1689,29 @@ def test_page_size(self): query_params={"maxResults": row_iterator._page_size}, ) + def test__is_completely_cached_returns_false_without_first_page(self): + iterator = self._make_one(first_page_response=None) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_false_with_page_token(self): + first_page = {"pageToken": "next-page"} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse(iterator._is_completely_cached()) + + def test__is_completely_cached_returns_true(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertTrue(iterator._is_completely_cached()) + + def test__validate_bqstorage_returns_false_when_completely_cached(self): + first_page = {"rows": []} + iterator = self._make_one(first_page_response=first_page) + self.assertFalse( + iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField From 9f3930d56684de129e3e623b3859f6f2bb3ea5a4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 20:42:04 +0100 Subject: [PATCH 063/341] chore(deps): update dependency google-cloud-bigquery to v2.4.0 (#391) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.3.1` -> `==2.4.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.4.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​240-httpswwwgithubcomgoogleapispython-bigquerycomparev231v240-2020-11-16) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) ##### Features - add progress bar to `QueryJob.to_dataframe` and `to_arrow` ([#​352](https://www.github.com/googleapis/python-bigquery/issues/352)) ([dc78edd](https://www.github.com/googleapis/python-bigquery/commit/dc78eddde7a6a312c8fed7bace7d64036837ab1a)) - allow routine references ([#​378](https://www.github.com/googleapis/python-bigquery/issues/378)) ([f9480dc](https://www.github.com/googleapis/python-bigquery/commit/f9480dc2a1bc58367083176bd74725aa8b903301)) ##### Bug Fixes - **dbapi:** allow rows to be fetched from scripts ([#​387](https://www.github.com/googleapis/python-bigquery/issues/387)) ([b899ad1](https://www.github.com/googleapis/python-bigquery/commit/b899ad12e17cb87c58d3ae46b4388d917c5743f2)), closes [#​377](https://www.github.com/googleapis/python-bigquery/issues/377) ##### Performance Improvements - avoid extra API calls from `to_dataframe` if all rows are cached ([#​384](https://www.github.com/googleapis/python-bigquery/issues/384)) ([c52b317](https://www.github.com/googleapis/python-bigquery/commit/c52b31789998fc0dfde07c3296650c85104d719d)) - cache first page of `jobs.getQueryResults` rows ([#​374](https://www.github.com/googleapis/python-bigquery/issues/374)) ([86f6a51](https://www.github.com/googleapis/python-bigquery/commit/86f6a516d1c7c5dc204ab085ea2578793e6561ff)) - use `getQueryResults` from DB-API ([#​375](https://www.github.com/googleapis/python-bigquery/issues/375)) ([30de15f](https://www.github.com/googleapis/python-bigquery/commit/30de15f7255de5ea221df4e8db7991d279e0ea28)) ##### Dependencies - expand pyarrow dependencies to include version 2 ([#​368](https://www.github.com/googleapis/python-bigquery/issues/368)) ([cd9febd](https://www.github.com/googleapis/python-bigquery/commit/cd9febd20c34983781386c3bf603e5fca7135695))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 509a61ade..36363a377 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.3.1 +google-cloud-bigquery==2.4.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.33.2 From 4cad985ee9bf0afdd831480e8f0f9874560b6492 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 20:56:03 +0100 Subject: [PATCH 064/341] chore(deps): update dependency pyarrow to v2 (#380) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [pyarrow](https://arrow.apache.org/) | major | `==1.0.1` -> `==2.0.0` | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 36363a377..f47f2228e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -6,5 +6,5 @@ ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.4 -pyarrow==1.0.1 +pyarrow==2.0.0 pytz==2020.1 From 5a422eb20c57dae66c5716fd319b66432d3edce6 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Nov 2020 21:16:14 +0100 Subject: [PATCH 065/341] chore(deps): update dependency pytz to v2020.4 (#356) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f47f2228e..eeb94db5a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,4 +7,4 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.4 pyarrow==2.0.0 -pytz==2020.1 +pytz==2020.4 From 168f0354c4815bd1aeadbd4e388dcc9b32f97d6b Mon Sep 17 00:00:00 2001 From: Steffany Brown <30247553+steffnay@users.noreply.github.com> Date: Mon, 30 Nov 2020 13:55:22 -0800 Subject: [PATCH 066/341] feat: add support for unrecognized model types (#401) * feat: add support for unrecognized model types * refactor Co-authored-by: Tim Swast --- google/cloud/bigquery/model.py | 12 +++++++++--- tests/unit/model/test_model.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 1143b71f9..0f5d8f83b 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -305,9 +305,15 @@ def from_api_repr(cls, resource): start_time = datetime_helpers.from_microseconds(1e3 * float(start_time)) training_run["startTime"] = datetime_helpers.to_rfc3339(start_time) - this._proto = json_format.ParseDict( - resource, types.Model()._pb, ignore_unknown_fields=True - ) + try: + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) + except json_format.ParseError: + resource["modelType"] = "MODEL_TYPE_UNSPECIFIED" + this._proto = json_format.ParseDict( + resource, types.Model()._pb, ignore_unknown_fields=True + ) return this def _build_resource(self, filter_fields): diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py index 9fa29a496..8f0bf58d5 100644 --- a/tests/unit/model/test_model.py +++ b/tests/unit/model/test_model.py @@ -186,6 +186,23 @@ def test_from_api_repr_w_unknown_fields(target_class): assert got._properties is resource +def test_from_api_repr_w_unknown_type(target_class): + from google.cloud.bigquery import ModelReference + + resource = { + "modelReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "modelId": "my_model", + }, + "modelType": "BE_A_GOOD_ROLE_MODEL", + } + got = target_class.from_api_repr(resource) + assert got.reference == ModelReference.from_string("my-project.my_dataset.my_model") + assert got.model_type == 0 + assert got._properties is resource + + @pytest.mark.parametrize( "resource,filter_fields,expected", [ From 53dff2ad3889af04369a22437e6ab9b92c5755b6 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 30 Nov 2020 16:10:04 -0600 Subject: [PATCH 067/341] feat: add `TableReference.__str__` to get table ID in standard SQL (#405) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the natural inverse of the `TableReference.from_string` method. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #354 🦕 --- google/cloud/bigquery/table.py | 8 ++++++-- tests/unit/test_table.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4bfedd758..f30c05773 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -262,6 +262,9 @@ def __ne__(self, other): def __hash__(self): return hash(self._key()) + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.table_id}" + def __repr__(self): from google.cloud.bigquery.dataset import DatasetReference @@ -475,7 +478,7 @@ def full_table_id(self): """Union[str, None]: ID for the table (:data:`None` until set from the server). - In the format ``project_id:dataset_id.table_id``. + In the format ``project-id:dataset_id.table_id``. """ return self._properties.get("id") @@ -484,7 +487,8 @@ def table_type(self): """Union[str, None]: The type of the table (:data:`None` until set from the server). - Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. + Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or + ``'EXTERNAL'``. """ return self._properties.get("type") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 1dd5fab46..67874ff91 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -272,6 +272,11 @@ def test___repr__(self): ) self.assertEqual(repr(table1), expected) + def test___str__(self): + dataset = DatasetReference("project1", "dataset1") + table1 = self._make_one(dataset, "table1") + self.assertEqual(str(table1), "project1.dataset1.table1") + class TestTable(unittest.TestCase, _SchemaBase): @@ -813,6 +818,9 @@ def test_from_string(self): self.assertEqual(got.project, "string-project") self.assertEqual(got.dataset_id, "string_dataset") self.assertEqual(got.table_id, "string_table") + self.assertEqual( + str(got.reference), "string-project.string_dataset.string_table" + ) def test_from_string_legacy_string(self): cls = self._get_target_class() From 04d027317a99e3f353e0b7a18076da9b6ba4d8d3 Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 2 Dec 2020 11:28:48 -0500 Subject: [PATCH 068/341] feat: add progress bar for magics (#396) * feat: add progress bar for magics * feat: remove default progress bar * feat: add default tqdm value in magic --- google/cloud/bigquery/_tqdm_helpers.py | 5 +- google/cloud/bigquery/magics/magics.py | 38 +++++++++++++- tests/unit/test_magics.py | 71 +++++++++++++++++++++++++- 3 files changed, 107 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index bdecefe4a..2fcf2a981 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -55,15 +55,14 @@ def get_progress_bar(progress_bar_type, description, total, unit): def wait_for_query(query_job, progress_bar_type=None): """Return query result and display a progress bar while the query running, if tqdm is installed.""" - if progress_bar_type is None: - return query_job.result() - default_total = 1 current_stage = None start_time = time.time() progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) + if progress_bar is None: + return query_job.result() i = 0 while True: if query_job.query_plan: diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 5645a84a5..f04a6364a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -182,6 +182,7 @@ def __init__(self): self._default_query_job_config = bigquery.QueryJobConfig() self._bigquery_client_options = client_options.ClientOptions() self._bqstorage_client_options = client_options.ClientOptions() + self._progress_bar_type = "tqdm" @property def credentials(self): @@ -313,6 +314,26 @@ def default_query_job_config(self): def default_query_job_config(self, value): self._default_query_job_config = value + @property + def progress_bar_type(self): + """str: Default progress bar type to use to display progress bar while + executing queries through IPython magics. + + Note:: + Install the ``tqdm`` package to use this feature. + + Example: + Manually setting the progress_bar_type: + + >>> from google.cloud.bigquery import magics + >>> magics.context.progress_bar_type = "tqdm" + """ + return self._progress_bar_type + + @progress_bar_type.setter + def progress_bar_type(self, value): + self._progress_bar_type = value + context = Context() @@ -524,6 +545,15 @@ def _create_dataset_if_necessary(client, dataset_id): "name (ex. $my_dict_var)." ), ) +@magic_arguments.argument( + "--progress_bar_type", + type=str, + default=None, + help=( + "Sets progress bar type to display a progress bar while executing the query." + "Defaults to use tqdm. Install the ``tqdm`` package to use this feature." + ), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -687,12 +717,16 @@ def _cell_magic(line, query): ) return query_job + progress_bar = context.progress_bar_type or args.progress_bar_type + if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar ) else: - result = query_job.to_dataframe(bqstorage_client=bqstorage_client) + result = query_job.to_dataframe( + bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a7cf92919..ff41fe720 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -623,7 +623,7 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" ) assert isinstance(return_value, pandas.DataFrame) @@ -665,7 +665,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) bqstorage_mock.assert_not_called() - query_job_mock.to_dataframe.assert_called_once_with(bqstorage_client=None) + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type="tqdm" + ) assert isinstance(return_value, pandas.DataFrame) @@ -1167,6 +1169,71 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(): assert sent_config["maximumBytesBilled"] == "10203" +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context._project = None + + magics.context.progress_bar_type = "tqdm_gui" + + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT 17 AS num" + result = pandas.DataFrame([17], columns=["num"]) + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + with run_query_patch as run_query_mock, bqstorage_client_patch: + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "--use_rest_api", sql) + + bqstorage_mock.assert_not_called() + query_job_mock.to_dataframe.assert_called_once_with( + bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + ) + + assert isinstance(return_value, pandas.DataFrame) + + +@pytest.mark.usefixtures("ipython_interactive") +def test_bigquery_magic_with_progress_bar_type(): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + magics.context.progress_bar_type = None + + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + with run_query_patch as run_query_mock: + ip.run_cell_magic( + "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" + ) + + progress_bar_used = run_query_mock.mock_calls[1][2]["progress_bar_type"] + assert progress_bar_used == "tqdm_gui" + # context progress bar type should not change + assert magics.context.progress_bar_type is None + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_with_project(): ip = IPython.get_ipython() From c384b45e374ee0ee106a07922e04ce0438a2d59b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Dec 2020 13:36:57 -0600 Subject: [PATCH 069/341] chore: release 2.5.0 (#406) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 29 +++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03a465926..c71f85d0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,35 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.5.0](https://www.github.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) (2020-12-02) + + +### Features + +* add `TableReference.__str__` to get table ID in standard SQL ([#405](https://www.github.com/googleapis/python-bigquery/issues/405)) ([53dff2a](https://www.github.com/googleapis/python-bigquery/commit/53dff2ad3889af04369a22437e6ab9b92c5755b6)), closes [#354](https://www.github.com/googleapis/python-bigquery/issues/354) +* add progress bar for magics ([#396](https://www.github.com/googleapis/python-bigquery/issues/396)) ([04d0273](https://www.github.com/googleapis/python-bigquery/commit/04d027317a99e3f353e0b7a18076da9b6ba4d8d3)) +* add support for unrecognized model types ([#401](https://www.github.com/googleapis/python-bigquery/issues/401)) ([168f035](https://www.github.com/googleapis/python-bigquery/commit/168f0354c4815bd1aeadbd4e388dcc9b32f97d6b)) + + +### Bug Fixes + +* avoid floating point for timestamp in `insert_rows` ([#393](https://www.github.com/googleapis/python-bigquery/issues/393)) ([a1949ae](https://www.github.com/googleapis/python-bigquery/commit/a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf)) + + +### Performance Improvements + +* don't fetch rows when waiting for query to finish ([#400](https://www.github.com/googleapis/python-bigquery/issues/400)) ([730df17](https://www.github.com/googleapis/python-bigquery/commit/730df17ae1ab0b0bb2454f3c134c8f62665bc51b)), closes [#374](https://www.github.com/googleapis/python-bigquery/issues/374) [#394](https://www.github.com/googleapis/python-bigquery/issues/394) + + +### Documentation + +* **samples:** add more clustering code snippets ([#330](https://www.github.com/googleapis/python-bigquery/issues/330)) ([809e4a2](https://www.github.com/googleapis/python-bigquery/commit/809e4a27b94ba30c10e0c9a7e89576a9de9fda2b)), closes [#329](https://www.github.com/googleapis/python-bigquery/issues/329) + + +### Dependencies + +* update required version of opentelementry for opentelemetry-exporter-google-cloud ([#398](https://www.github.com/googleapis/python-bigquery/issues/398)) ([673a9cb](https://www.github.com/googleapis/python-bigquery/commit/673a9cb51c577c1dd016e76f3634b1e9e21482c5)) + ## [2.4.0](https://www.github.com/googleapis/python-bigquery/compare/v2.3.1...v2.4.0) (2020-11-16) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fe11624d9..5836d8051 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.4.0" +__version__ = "2.5.0" From 57ffc665319331e0a00583d5d652fd14a510cf2a Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 3 Dec 2020 09:32:02 -0600 Subject: [PATCH 070/341] feat: add support for materialized views (#408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #407 🦕 --- google/cloud/bigquery/table.py | 95 +++++++++++++++++++--- samples/snippets/materialized_view.py | 86 ++++++++++++++++++++ samples/snippets/materialized_view_test.py | 93 +++++++++++++++++++++ tests/unit/test_table.py | 61 ++++++++++++-- 4 files changed, 317 insertions(+), 18 deletions(-) create mode 100644 samples/snippets/materialized_view.py create mode 100644 samples/snippets/materialized_view_test.py diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index f30c05773..6daccf518 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -293,15 +293,18 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { - "friendly_name": "friendlyName", + "encryption_configuration": "encryptionConfiguration", "expires": "expirationTime", - "time_partitioning": "timePartitioning", - "partitioning_type": "timePartitioning", + "external_data_configuration": "externalDataConfiguration", + "friendly_name": "friendlyName", + "mview_enable_refresh": "materializedView", + "mview_query": "materializedView", + "mview_refresh_interval": "materializedView", "partition_expiration": "timePartitioning", + "partitioning_type": "timePartitioning", + "time_partitioning": "timePartitioning", "view_use_legacy_sql": "view", "view_query": "view", - "external_data_configuration": "externalDataConfiguration", - "encryption_configuration": "encryptionConfiguration", "require_partition_filter": "requirePartitionFilter", } @@ -714,18 +717,14 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - view = self._properties.get("view") - if view is not None: - return view.get("query") + return _helpers._get_sub_prop(self._properties, ["view", "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, six.string_types): raise ValueError("Pass a string") - view = self._properties.get("view") - if view is None: - view = self._properties["view"] = {} - view["query"] = value + _helpers._set_sub_prop(self._properties, ["view", "query"], value) + view = self._properties["view"] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -746,6 +745,78 @@ def view_use_legacy_sql(self, value): self._properties["view"] = {} self._properties["view"]["useLegacySql"] = value + @property + def mview_query(self): + """Optional[str]: SQL query defining the table as a materialized + view (defaults to :data:`None`). + """ + return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + + @mview_query.setter + def mview_query(self, value): + _helpers._set_sub_prop( + self._properties, ["materializedView", "query"], str(value) + ) + + @mview_query.deleter + def mview_query(self): + """Delete SQL query defining the table as a materialized view.""" + self._properties.pop("materializedView", None) + + @property + def mview_last_refresh_time(self): + """Optional[datetime.datetime]: Datetime at which the materialized view was last + refreshed (:data:`None` until set from the server). + """ + refresh_time = _helpers._get_sub_prop( + self._properties, ["materializedView", "lastRefreshTime"] + ) + if refresh_time is not None: + # refresh_time will be in milliseconds. + return google.cloud._helpers._datetime_from_microseconds( + 1000 * int(refresh_time) + ) + + @property + def mview_enable_refresh(self): + """Optional[bool]: Enable automatic refresh of the materialized view + when the base table is updated. The default value is :data:`True`. + """ + return _helpers._get_sub_prop( + self._properties, ["materializedView", "enableRefresh"] + ) + + @mview_enable_refresh.setter + def mview_enable_refresh(self, value): + return _helpers._set_sub_prop( + self._properties, ["materializedView", "enableRefresh"], value + ) + + @property + def mview_refresh_interval(self): + """Optional[datetime.timedelta]: The maximum frequency at which this + materialized view will be refreshed. The default value is 1800000 + milliseconds (30 minutes). + """ + refresh_interval = _helpers._get_sub_prop( + self._properties, ["materializedView", "refreshIntervalMs"] + ) + if refresh_interval is not None: + return datetime.timedelta(milliseconds=int(refresh_interval)) + + @mview_refresh_interval.setter + def mview_refresh_interval(self, value): + if value is None: + refresh_interval_ms = None + else: + refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + + _helpers._set_sub_prop( + self._properties, + ["materializedView", "refreshIntervalMs"], + refresh_interval_ms, + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's diff --git a/samples/snippets/materialized_view.py b/samples/snippets/materialized_view.py new file mode 100644 index 000000000..d925ec230 --- /dev/null +++ b/samples/snippets/materialized_view.py @@ -0,0 +1,86 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_materialized_view(override_values={}): + # [START bigquery_create_materialized_view] + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + base_table_id = "my-project.my_dataset.my_base_table" + # [END bigquery_create_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + base_table_id = override_values.get("base_table_id", view_id) + # [START bigquery_create_materialized_view] + view = bigquery.Table(view_id) + view.mview_query = f""" + SELECT product_id, SUM(clicks) AS sum_clicks + FROM `{base_table_id}` + GROUP BY 1 + """ + + # Make an API request to create the materialized view. + view = bigquery_client.create_table(view) + print(f"Created {view.table_type}: {str(view.reference)}") + # [END bigquery_create_materialized_view] + return view + + +def update_materialized_view(override_values={}): + # [START bigquery_update_materialized_view] + import datetime + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + # [END bigquery_update_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_update_materialized_view] + view = bigquery.Table(view_id) + view.mview_enable_refresh = True + view.mview_refresh_interval = datetime.timedelta(hours=1) + + # Make an API request to update the materialized view. + view = bigquery_client.update_table( + view, + # Pass in a list of any fields you need to modify. + ["mview_enable_refresh", "mview_refresh_interval"], + ) + print(f"Updated {view.table_type}: {str(view.reference)}") + # [END bigquery_update_materialized_view] + return view + + +def delete_materialized_view(override_values={}): + # [START bigquery_delete_materialized_view] + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + view_id = "my-project.my_dataset.my_materialized_view" + # [END bigquery_delete_materialized_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_delete_materialized_view] + # Make an API request to delete the materialized view. + bigquery_client.delete_table(view_id) + # [END bigquery_delete_materialized_view] diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py new file mode 100644 index 000000000..fc3db533c --- /dev/null +++ b/samples/snippets/materialized_view_test.py @@ -0,0 +1,93 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.api_core import exceptions +from google.cloud import bigquery +import pytest + +import materialized_view + + +def temp_suffix(): + return str(uuid.uuid4()).replace("-", "_") + + +@pytest.fixture(scope="module") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(autouse=True) +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) + + +@pytest.fixture(scope="module") +def project_id(bigquery_client): + return bigquery_client.project + + +@pytest.fixture(scope="module") +def dataset_id(bigquery_client): + dataset_id = f"mvdataset_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def base_table_id(bigquery_client, project_id, dataset_id): + base_table_id = f"{project_id}.{dataset_id}.base_{temp_suffix()}" + # Schema from materialized views guide: + # https://cloud.google.com/bigquery/docs/materialized-views#create + base_table = bigquery.Table(base_table_id) + base_table.schema = [ + bigquery.SchemaField("product_id", bigquery.SqlTypeNames.INT64), + bigquery.SchemaField("clicks", bigquery.SqlTypeNames.INT64), + ] + bigquery_client.create_table(base_table) + yield base_table_id + bigquery_client.delete_table(base_table_id) + + +@pytest.fixture(scope="module") +def view_id(bigquery_client, project_id, dataset_id): + view_id = f"{project_id}.{dataset_id}.mview_{temp_suffix()}" + yield view_id + bigquery_client.delete_table(view_id, not_found_ok=True) + + +def test_materialized_view(capsys, bigquery_client, base_table_id, view_id): + override_values = { + "base_table_id": base_table_id, + "view_id": view_id, + } + view = materialized_view.create_materialized_view(override_values) + assert base_table_id in view.mview_query + out, _ = capsys.readouterr() + assert view_id in out + + view = materialized_view.update_materialized_view(override_values) + assert view.mview_enable_refresh + assert view.mview_refresh_interval == datetime.timedelta(hours=1) + out, _ = capsys.readouterr() + assert view_id in out + + materialized_view.delete_materialized_view(override_values) + with pytest.raises(exceptions.NotFound): + bigquery_client.get_table(view_id) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 67874ff91..c1876adaa 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime as dt +import datetime import logging import time import unittest @@ -21,6 +21,7 @@ import mock import pkg_resources import pytest +import pytz import six import google.api_core.exceptions @@ -292,6 +293,13 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): + from google.cloud.bigquery.dataset import DatasetReference + + if len(args) == 0: + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + args = (table_ref,) + return self._get_target_class()(*args, **kw) def _setUpConstants(self): @@ -812,6 +820,48 @@ def test_labels_setter_bad_value(self): with self.assertRaises(ValueError): table.labels = 12345 + def test_mview_query(self): + table = self._make_one() + self.assertIsNone(table.mview_query) + table.mview_query = "SELECT name, SUM(number) FROM dset.tbl GROUP BY 1" + self.assertEqual( + table.mview_query, "SELECT name, SUM(number) FROM dset.tbl GROUP BY 1" + ) + del table.mview_query + self.assertIsNone(table.mview_query) + + def test_mview_last_refresh_time(self): + table = self._make_one() + self.assertIsNone(table.mview_last_refresh_time) + table._properties["materializedView"] = { + "lastRefreshTime": "1606751842496", + } + self.assertEqual( + table.mview_last_refresh_time, + datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + ) + + def test_mview_enable_refresh(self): + table = self._make_one() + self.assertIsNone(table.mview_enable_refresh) + table.mview_enable_refresh = True + self.assertTrue(table.mview_enable_refresh) + table.mview_enable_refresh = False + self.assertFalse(table.mview_enable_refresh) + table.mview_enable_refresh = None + self.assertIsNone(table.mview_enable_refresh) + + def test_mview_refresh_interval(self): + table = self._make_one() + self.assertIsNone(table.mview_refresh_interval) + table.mview_refresh_interval = datetime.timedelta(minutes=30) + self.assertEqual(table.mview_refresh_interval, datetime.timedelta(minutes=30)) + self.assertEqual( + table._properties["materializedView"]["refreshIntervalMs"], "1800000" + ) + table.mview_refresh_interval = None + self.assertIsNone(table.mview_refresh_interval) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") @@ -1286,7 +1336,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def _setUpConstants(self): - import datetime from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.125 @@ -2413,7 +2462,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): tzinfo = None if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = dt.timezone.utc + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows @@ -2421,8 +2470,8 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): self.assertEqual( list(df["some_timestamp"]), [ - dt.datetime(4567, 1, 1, tzinfo=tzinfo), - dt.datetime(9999, 12, 31, tzinfo=tzinfo), + datetime.datetime(4567, 1, 1, tzinfo=tzinfo), + datetime.datetime(9999, 12, 31, tzinfo=tzinfo), ], ) @@ -2454,7 +2503,7 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): self.assertEqual(list(df.columns), ["some_datetime"]) self.assertEqual( list(df["some_datetime"]), - [dt.datetime(4567, 1, 1), dt.datetime(9999, 12, 31)], + [datetime.datetime(4567, 1, 1), datetime.datetime(9999, 12, 31)], ) @unittest.skipIf(pandas is None, "Requires `pandas`") From f4210580442a0438ef07ab5b9a304dcea4831dbb Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Dec 2020 19:21:10 +0100 Subject: [PATCH 071/341] chore(deps): update dependency grpcio to v1.34.0 (#411) Co-authored-by: Takashi Matsuo --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index eeb94db5a..d11397bc6 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.4.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 -grpcio==1.33.2 +grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From 04510a7dc7570466550bbdf500d7020bef2af44d Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Fri, 4 Dec 2020 15:42:46 -0500 Subject: [PATCH 072/341] fix: preserve timestamp microsecond precision with rows from REST API (#402) * feat: add formatOption default tru for tablelist and query result * feat: remove float point serialize * fix: lint * feat: remove comments --- google/cloud/bigquery/_helpers.py | 4 +- google/cloud/bigquery/client.py | 2 + tests/unit/job/test_query.py | 4 ++ tests/unit/job/test_query_pandas.py | 6 +-- tests/unit/test__helpers.py | 8 ++-- tests/unit/test_client.py | 74 +++++++++++++++-------------- tests/unit/test_table.py | 24 ++++++---- 7 files changed, 69 insertions(+), 53 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 35129d844..6f6a63ea5 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -81,8 +81,8 @@ def _bytes_from_json(value, field): def _timestamp_from_json(value, field): """Coerce 'value' to a datetime, if set or not nullable.""" if _not_null(value, field): - # value will be a float in seconds, to microsecond precision, in UTC. - return _datetime_from_microseconds(1e6 * float(value)) + # value will be a integer in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(int(value)) def _timestamp_query_param_from_json(value, field): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cd1474336..168054623 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3157,6 +3157,7 @@ def list_rows( if start_index is not None: params["startIndex"] = start_index + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), @@ -3237,6 +3238,7 @@ def _list_rows_from_query_results( if start_index is not None: params["startIndex"] = start_index + params["formatOptions.useInt64Timestamp"] = True row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index daaf2e557..0567b59cd 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -839,6 +839,7 @@ def test_result(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -887,6 +888,7 @@ def test_result_with_done_job_calls_get_query_results(self): query_params={ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "EU", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -1118,6 +1120,7 @@ def test_result_w_page_size(self): "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) @@ -1129,6 +1132,7 @@ def test_result_w_page_size(self): "maxResults": 3, "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, "location": "US", + "formatOptions.useInt64Timestamp": True, }, timeout=None, ) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index cdd6f2b3c..d1600ad43 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -501,7 +501,7 @@ def test_to_dataframe_column_dtypes(): } row_data = [ [ - "1.4338368E9", + "1433836800000000", "420", "1.1", "1.77", @@ -509,8 +509,8 @@ def test_to_dataframe_column_dtypes(): "true", "1999-12-01", ], - ["1.3878117E9", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], + ["1387811700000000", "2580", "17.7", "28.5", "Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] query_resource["rows"] = rows diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index a52581501..5907a3678 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -190,18 +190,18 @@ def test_w_none_required(self): with self.assertRaises(TypeError): self._call_fut(None, _Field("REQUIRED")) - def test_w_string_value(self): + def test_w_string_int_value(self): from google.cloud._helpers import _EPOCH - coerced = self._call_fut("1.234567", object()) + coerced = self._call_fut("1234567", object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) ) - def test_w_float_value(self): + def test_w_int_value(self): from google.cloud._helpers import _EPOCH - coerced = self._call_fut(1.234567, object()) + coerced = self._call_fut(1234567, object()) self.assertEqual( coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c4bdea2f8..f28455cf8 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6739,42 +6739,21 @@ def test_list_rows(self): self.DS_ID, self.TABLE_ID, ) - WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) - WHEN_1 = WHEN + datetime.timedelta(seconds=1) - WHEN_2 = WHEN + datetime.timedelta(seconds=2) + WHEN_TS = 1437767599006000 + + WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC) + WHEN_1 = WHEN + datetime.timedelta(microseconds=1) + WHEN_2 = WHEN + datetime.timedelta(microseconds=2) ROWS = 1234 TOKEN = "TOKEN" - def _bigquery_timestamp_float_repr(ts_float): - # Preserve microsecond precision for E+09 timestamps - return "%0.15E" % (ts_float,) - DATA = { "totalRows": str(ROWS), "pageToken": TOKEN, "rows": [ - { - "f": [ - {"v": "Phred Phlyntstone"}, - {"v": "32"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS)}, - ] - }, - { - "f": [ - {"v": "Bharney Rhubble"}, - {"v": "33"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS + 1)}, - ] - }, - { - "f": [ - {"v": "Wylma Phlyntstone"}, - {"v": "29"}, - {"v": _bigquery_timestamp_float_repr(WHEN_TS + 2)}, - ] - }, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}, {"v": WHEN_TS}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}, {"v": WHEN_TS + 1}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}, {"v": WHEN_TS + 2}]}, {"f": [{"v": "Bhettye Rhubble"}, {"v": None}, {"v": None}]}, ], } @@ -6807,7 +6786,10 @@ def _bigquery_timestamp_float_repr(ts_float): self.assertEqual(iterator.next_page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=7.5 + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=7.5, ) def test_list_rows_w_start_index_w_page_size(self): @@ -6856,20 +6838,30 @@ def test_list_rows_w_start_index_w_page_size(self): self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i)) self.assertEqual(rows[1], Row(("Bhettye Rhubble",), f2i)) - self.assertEqual(extra_params, {"startIndex": 1}) + self.assertEqual( + extra_params, {"startIndex": 1, "formatOptions.useInt64Timestamp": True} + ) conn.api_request.assert_has_calls( [ mock.call( method="GET", path="/%s" % PATH, - query_params={"startIndex": 1, "maxResults": 2}, + query_params={ + "startIndex": 1, + "maxResults": 2, + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ), mock.call( method="GET", path="/%s" % PATH, - query_params={"pageToken": "some-page-token", "maxResults": 2}, + query_params={ + "pageToken": "some-page-token", + "maxResults": 2, + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ), ] @@ -6920,6 +6912,7 @@ def test_list_rows_query_params(self): iterator = client.list_rows(table, **test[0]) six.next(iterator.pages) req = conn.api_request.call_args_list[i] + test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) def test_list_rows_repeated_fields(self): @@ -6979,7 +6972,10 @@ def test_list_rows_repeated_fields(self): conn.api_request.assert_called_once_with( method="GET", path="/%s" % PATH, - query_params={"selectedFields": "color,struct"}, + query_params={ + "selectedFields": "color,struct", + "formatOptions.useInt64Timestamp": True, + }, timeout=None, ) @@ -7047,7 +7043,10 @@ def test_list_rows_w_record_schema(self): self.assertEqual(page_token, TOKEN) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=None, ) def test_list_rows_with_missing_schema(self): @@ -7109,7 +7108,10 @@ def test_list_rows_with_missing_schema(self): rows = list(row_iter) conn.api_request.assert_called_once_with( - method="GET", path=tabledata_path, query_params={}, timeout=None + method="GET", + path=tabledata_path, + query_params={"formatOptions.useInt64Timestamp": True}, + timeout=None, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index c1876adaa..0e7b0bb4d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2451,8 +2451,8 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): schema = [SchemaField("some_timestamp", "TIMESTAMP")] rows = [ - {"f": [{"v": "81953424000.0"}]}, # 4567-01-01 00:00:00 UTC - {"f": [{"v": "253402214400.0"}]}, # 9999-12-31 00:00:00 UTC + {"f": [{"v": "81953424000000000"}]}, # 4567-01-01 00:00:00 UTC + {"f": [{"v": "253402214400000000"}]}, # 9999-12-31 00:00:00 UTC ] path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) @@ -2675,9 +2675,9 @@ def test_to_dataframe_w_various_types_nullable(self): ] row_data = [ [None, None, None, None, None, None], - ["1.4338368E9", "420", "1.1", u"Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", u"Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", u"Cash", "true", "1999-12-01"], + ["1387811700000000", "2580", "17.7", u"Cash", "false", "1953-06-14"], + ["1385565300000000", "2280", "4.4", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" @@ -2715,9 +2715,17 @@ def test_to_dataframe_column_dtypes(self): SchemaField("date", "DATE"), ] row_data = [ - ["1.4338368E9", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], - ["1.3878117E9", "2580", "17.7", "28.5", u"Cash", "false", "1953-06-14"], - ["1.3855653E9", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], + ["1433836800000000", "420", "1.1", "1.77", u"Cash", "true", "1999-12-01"], + [ + "1387811700000000", + "2580", + "17.7", + "28.5", + u"Cash", + "false", + "1953-06-14", + ], + ["1385565300000000", "2280", "4.4", "7.1", u"Credit", "true", "1981-11-04"], ] rows = [{"f": [{"v": field} for field in row]} for row in row_data] path = "/foo" From 985a8cfecce6915d05d7d5d4852fcc7cdbb3770c Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 4 Dec 2020 13:58:04 -0700 Subject: [PATCH 073/341] chore: require samples checks (#409) Make samples kokoro sessions required --- .github/sync-repo-settings.yaml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/sync-repo-settings.yaml diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml new file mode 100644 index 000000000..b18fb9c29 --- /dev/null +++ b/.github/sync-repo-settings.yaml @@ -0,0 +1,14 @@ +# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings +# Rules for master branch protection +branchProtectionRules: +# Identifies the protection rule pattern. Name of the branch to be protected. +# Defaults to `master` +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' From d472d2d2b33e40b954652d31476dea8c90e6a2dc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 4 Dec 2020 16:04:09 -0600 Subject: [PATCH 074/341] feat: convert `BIGNUMERIC` values to decimal objects (#414) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards #367 🦕 --- google/cloud/bigquery/_helpers.py | 2 + tests/unit/test_client.py | 88 +++++++++++++++++++++++-------- 2 files changed, 67 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 6f6a63ea5..716c8a394 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -188,6 +188,7 @@ def _record_from_json(value, field): "FLOAT": _float_from_json, "FLOAT64": _float_from_json, "NUMERIC": _decimal_from_json, + "BIGNUMERIC": _decimal_from_json, "BOOLEAN": _bool_from_json, "BOOL": _bool_from_json, "STRING": _string_from_json, @@ -347,6 +348,7 @@ def _time_to_json(value): "FLOAT": _float_to_json, "FLOAT64": _float_to_json, "NUMERIC": _decimal_to_json, + "BIGNUMERIC": _decimal_to_json, "BOOLEAN": _bool_to_json, "BOOL": _bool_to_json, "BYTES": _bytes_to_json, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f28455cf8..0e68b2538 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -6290,38 +6290,43 @@ def test_insert_rows_w_numeric(self): creds = _make_credentials() http = object() client = self._make_one(project=project, credentials=creds, _http=http) - conn = client._connection = make_connection({}) table_ref = DatasetReference(project, ds_id).table(table_id) - schema = [SchemaField("account", "STRING"), SchemaField("balance", "NUMERIC")] - insert_table = table.Table(table_ref, schema=schema) rows = [ ("Savings", decimal.Decimal("23.47")), ("Checking", decimal.Decimal("1.98")), ("Mortgage", decimal.Decimal("-12345678909.87654321")), ] + schemas = [ + [SchemaField("account", "STRING"), SchemaField("balance", "NUMERIC")], + [SchemaField("account", "STRING"), SchemaField("balance", "BIGNUMERIC")], + ] - with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): - errors = client.insert_rows(insert_table, rows) + for schema in schemas: + conn = client._connection = make_connection({}) - self.assertEqual(len(errors), 0) - rows_json = [ - {"account": "Savings", "balance": "23.47"}, - {"account": "Checking", "balance": "1.98"}, - {"account": "Mortgage", "balance": "-12345678909.87654321"}, - ] - sent = { - "rows": [ - {"json": row, "insertId": str(i)} for i, row in enumerate(rows_json) + insert_table = table.Table(table_ref, schema=schema) + with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): + errors = client.insert_rows(insert_table, rows) + + self.assertEqual(len(errors), 0) + rows_json = [ + {"account": "Savings", "balance": "23.47"}, + {"account": "Checking", "balance": "1.98"}, + {"account": "Mortgage", "balance": "-12345678909.87654321"}, ] - } - conn.api_request.assert_called_once_with( - method="POST", - path="/projects/{}/datasets/{}/tables/{}/insertAll".format( - project, ds_id, table_id - ), - data=sent, - timeout=None, - ) + sent = { + "rows": [ + {"json": row, "insertId": str(i)} for i, row in enumerate(rows_json) + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/{}/datasets/{}/tables/{}/insertAll".format( + project, ds_id, table_id + ), + data=sent, + timeout=None, + ) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_insert_rows_from_dataframe(self): @@ -6915,6 +6920,43 @@ def test_list_rows_query_params(self): test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) + def test_list_rows_w_numeric(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + resource = { + "totalRows": 3, + "rows": [ + {"f": [{"v": "-1.23456789"}, {"v": "-123456789.987654321"}]}, + {"f": [{"v": None}, {"v": "3.141592653589793238462643383279502884"}]}, + {"f": [{"v": "2718281828459045235360287471.352662497"}, {"v": None}]}, + ], + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + client._connection = make_connection(resource) + schema = [ + SchemaField("num", "NUMERIC"), + SchemaField("bignum", "BIGNUMERIC"), + ] + table = Table(self.TABLE_REF, schema=schema) + + iterator = client.list_rows(table) + rows = list(iterator) + + self.assertEqual(len(rows), 3) + self.assertEqual(rows[0]["num"], decimal.Decimal("-1.23456789")) + self.assertEqual(rows[0]["bignum"], decimal.Decimal("-123456789.987654321")) + self.assertIsNone(rows[1]["num"]) + self.assertEqual( + rows[1]["bignum"], decimal.Decimal("3.141592653589793238462643383279502884") + ) + self.assertEqual( + rows[2]["num"], decimal.Decimal("2718281828459045235360287471.352662497") + ) + self.assertIsNone(rows[2]["bignum"]) + def test_list_rows_repeated_fields(self): from google.cloud.bigquery.schema import SchemaField From 34b9948b48fda22341715d5e2261b7bd0b376179 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 5 Dec 2020 06:50:06 +0100 Subject: [PATCH 075/341] chore(deps): update dependency google-cloud-bigquery to v2.5.0 (#410) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | minor | `==2.4.0` -> `==2.5.0` | --- ### Release Notes
googleapis/python-bigquery ### [`v2.5.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​250-httpswwwgithubcomgoogleapispython-bigquerycomparev240v250-2020-12-02) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) ##### Features - add `TableReference.__str__` to get table ID in standard SQL ([#​405](https://www.github.com/googleapis/python-bigquery/issues/405)) ([53dff2a](https://www.github.com/googleapis/python-bigquery/commit/53dff2ad3889af04369a22437e6ab9b92c5755b6)), closes [#​354](https://www.github.com/googleapis/python-bigquery/issues/354) - add progress bar for magics ([#​396](https://www.github.com/googleapis/python-bigquery/issues/396)) ([04d0273](https://www.github.com/googleapis/python-bigquery/commit/04d027317a99e3f353e0b7a18076da9b6ba4d8d3)) - add support for unrecognized model types ([#​401](https://www.github.com/googleapis/python-bigquery/issues/401)) ([168f035](https://www.github.com/googleapis/python-bigquery/commit/168f0354c4815bd1aeadbd4e388dcc9b32f97d6b)) ##### Bug Fixes - avoid floating point for timestamp in `insert_rows` ([#​393](https://www.github.com/googleapis/python-bigquery/issues/393)) ([a1949ae](https://www.github.com/googleapis/python-bigquery/commit/a1949ae20ec4f9c771b0cffbcd70792dd6a30dbf)) ##### Performance Improvements - don't fetch rows when waiting for query to finish ([#​400](https://www.github.com/googleapis/python-bigquery/issues/400)) ([730df17](https://www.github.com/googleapis/python-bigquery/commit/730df17ae1ab0b0bb2454f3c134c8f62665bc51b)), closes [#​374](https://www.github.com/googleapis/python-bigquery/issues/374) [#​394](https://www.github.com/googleapis/python-bigquery/issues/394) ##### Documentation - **samples:** add more clustering code snippets ([#​330](https://www.github.com/googleapis/python-bigquery/issues/330)) ([809e4a2](https://www.github.com/googleapis/python-bigquery/commit/809e4a27b94ba30c10e0c9a7e89576a9de9fda2b)), closes [#​329](https://www.github.com/googleapis/python-bigquery/issues/329) ##### Dependencies - update required version of opentelementry for opentelemetry-exporter-google-cloud ([#​398](https://www.github.com/googleapis/python-bigquery/issues/398)) ([673a9cb](https://www.github.com/googleapis/python-bigquery/commit/673a9cb51c577c1dd016e76f3634b1e9e21482c5))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d11397bc6..3eecbf546 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.4.0 +google-cloud-bigquery==2.5.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From a9d8ae8a920dec655b77dca9d9128e569f1d07a7 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 7 Dec 2020 12:38:45 -0600 Subject: [PATCH 076/341] docs: update intersphinx links (#404) --- .kokoro/docs/common.cfg | 2 +- .kokoro/samples/python3.6/common.cfg | 6 + .kokoro/samples/python3.7/common.cfg | 6 + .kokoro/samples/python3.8/common.cfg | 6 + .kokoro/test-samples.sh | 8 +- CODE_OF_CONDUCT.md | 123 +++++++++++++----- docs/bigquery_v2/types.rst | 1 + docs/conf.py | 7 +- google/cloud/bigquery_v2/types/model.py | 4 +- .../cloud/bigquery_v2/types/standard_sql.py | 4 +- samples/snippets/noxfile.py | 26 +++- synth.metadata | 94 +------------ synth.py | 19 ++- 13 files changed, 170 insertions(+), 136 deletions(-) diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 8f9807f72..0c99ae611 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -30,7 +30,7 @@ env_vars: { env_vars: { key: "V2_STAGING_BUCKET" - value: "docs-staging-v2-staging" + value: "docs-staging-v2" } # It will upload the docker image after successful builds. diff --git a/.kokoro/samples/python3.6/common.cfg b/.kokoro/samples/python3.6/common.cfg index a56768eae..f3b930960 100644 --- a/.kokoro/samples/python3.6/common.cfg +++ b/.kokoro/samples/python3.6/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.6" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py36" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg index c93747180..fc0654565 100644 --- a/.kokoro/samples/python3.7/common.cfg +++ b/.kokoro/samples/python3.7/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.7" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py37" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg index 9808f15e3..2b0bf59b3 100644 --- a/.kokoro/samples/python3.8/common.cfg +++ b/.kokoro/samples/python3.8/common.cfg @@ -13,6 +13,12 @@ env_vars: { value: "py-3.8" } +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py38" +} + env_vars: { key: "TRAMPOLINE_BUILD_FILE" value: "github/python-bigquery/.kokoro/test-samples.sh" diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 905732a40..c5653a81d 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -28,6 +28,12 @@ if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then git checkout $LATEST_RELEASE fi +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -101,4 +107,4 @@ cd "$ROOT" # Workaround for Kokoro permissions issue: delete secrets rm testing/{test-env.sh,client-secrets.json,service-account.json} -exit "$RTN" \ No newline at end of file +exit "$RTN" diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b3d1f6029..039f43681 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,44 +1,95 @@ -# Contributor Code of Conduct +# Code of Conduct -As contributors and maintainers of this project, -and in the interest of fostering an open and welcoming community, -we pledge to respect all people who contribute through reporting issues, -posting feature requests, updating documentation, -submitting pull requests or patches, and other activities. +## Our Pledge -We are committed to making participation in this project -a harassment-free experience for everyone, -regardless of level of experience, gender, gender identity and expression, -sexual orientation, disability, personal appearance, -body size, race, ethnicity, age, religion, or nationality. +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of +experience, education, socio-economic status, nationality, personal appearance, +race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members Examples of unacceptable behavior by participants include: -* The use of sexualized language or imagery -* Personal attacks -* Trolling or insulting/derogatory comments -* Public or private harassment -* Publishing other's private information, -such as physical or electronic -addresses, without explicit permission -* Other unethical or unprofessional conduct. +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions -that are not aligned to this Code of Conduct. -By adopting this Code of Conduct, -project maintainers commit themselves to fairly and consistently -applying these principles to every aspect of managing this project. -Project maintainers who do not follow or enforce the Code of Conduct -may be permanently removed from the project team. - -This code of conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. - -Instances of abusive, harassing, or otherwise unacceptable behavior -may be reported by opening an issue -or contacting one or more of the project maintainers. - -This Code of Conduct is adapted from the [Contributor Covenant](http://contributor-covenant.org), version 1.2.0, -available at [http://contributor-covenant.org/version/1/2/0/](http://contributor-covenant.org/version/1/2/0/) +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, or to ban temporarily or permanently any +contributor for other behaviors that they deem inappropriate, threatening, +offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +This Code of Conduct also applies outside the project spaces when the Project +Steward has a reasonable belief that an individual's behavior may have a +negative impact on the project or its community. + +## Conflict Resolution + +We do not believe that all conflict is bad; healthy debate and disagreement +often yield positive results. However, it is never okay to be disrespectful or +to engage in behavior that violates the project’s code of conduct. + +If you see someone violating the code of conduct, you are encouraged to address +the behavior directly with those involved. Many issues can be resolved quickly +and easily, and this gives people more control over the outcome of their +dispute. If you are unable to resolve the matter for any reason, or if the +behavior is threatening or harassing, report it. We are dedicated to providing +an environment where participants feel welcome and safe. + + +Reports should be directed to *googleapis-stewards@google.com*, the +Project Steward(s) for *Google Cloud Client Libraries*. It is the Project Steward’s duty to +receive and address reported violations of the code of conduct. They will then +work with a committee consisting of representatives from the Open Source +Programs Office and the Google Open Source Strategy team. If for any reason you +are uncomfortable reaching out to the Project Steward, please email +opensource@google.com. + +We will investigate every complaint, but you may not receive a direct response. +We will use our discretion in determining when and how to follow up on reported +incidents, which may range from not taking action to permanent expulsion from +the project and project-sponsored spaces. We will notify the accused of the +report and provide them an opportunity to discuss it before any action is taken. +The identity of the reporter will be omitted from the details of the report +supplied to the accused. In potentially harmful situations, such as ongoing +harassment or threats to anyone's safety, we may take action without notice. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 1.4, +available at +https://www.contributor-covenant.org/version/1/4/code-of-conduct.html \ No newline at end of file diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery_v2/types.rst index f43809958..41b906514 100644 --- a/docs/bigquery_v2/types.rst +++ b/docs/bigquery_v2/types.rst @@ -3,3 +3,4 @@ Types for Google Cloud Bigquery v2 API .. automodule:: google.cloud.bigquery_v2.types :members: + :show-inheritance: diff --git a/docs/conf.py b/docs/conf.py index ee59f3492..37e0c46af 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -346,10 +346,11 @@ # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { - "python": ("http://python.readthedocs.org/en/latest/", None), - "google-auth": ("https://google-auth.readthedocs.io/en/stable", None), + "python": ("https://python.readthedocs.org/en/latest/", None), + "google-auth": ("https://googleapis.dev/python/google-auth/latest/", None), "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), - "grpc": ("https://grpc.io/grpc/python/", None), + "grpc": ("https://grpc.github.io/grpc/python/", None), + "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), } diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 3a7bbf43b..c3530dec2 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -1495,7 +1495,7 @@ class PatchModelRequest(proto.Message): model_id = proto.Field(proto.STRING, number=3) - model = proto.Field(proto.MESSAGE, number=4, message=Model,) + model = proto.Field(proto.MESSAGE, number=4, message="Model",) class DeleteModelRequest(proto.Message): @@ -1559,7 +1559,7 @@ class ListModelsResponse(proto.Message): def raw_page(self): return self - models = proto.RepeatedField(proto.MESSAGE, number=1, message=Model,) + models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) next_page_token = proto.Field(proto.STRING, number=2) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 1a32a3c75..80e4632f7 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -90,7 +90,7 @@ class StandardSqlField(proto.Message): name = proto.Field(proto.STRING, number=1) - type = proto.Field(proto.MESSAGE, number=2, message=StandardSqlDataType,) + type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) class StandardSqlStructType(proto.Message): @@ -101,7 +101,7 @@ class StandardSqlStructType(proto.Message): """ - fields = proto.RepeatedField(proto.MESSAGE, number=1, message=StandardSqlField,) + fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 5660f08be..ab2c49227 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -38,6 +38,9 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string @@ -130,7 +133,10 @@ def _determine_local_import_names(start_dir): @nox.session def lint(session): - session.install("flake8", "flake8-import-order") + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ @@ -141,6 +147,19 @@ def lint(session): session.run("flake8", *args) +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + # # Sample Tests # @@ -199,6 +218,11 @@ def _get_repo_root(): break if Path(p / ".git").exists(): return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) p = p.parent raise Exception("Unable to detect repository root.") diff --git a/synth.metadata b/synth.metadata index db77e463d..6b7854860 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,30 +3,30 @@ { "git": { "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "5178b55682f5e264bfc082cde26acb1fdc953a18" + "remote": "git@github.com:tswast/python-bigquery.git", + "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "215c12ade72d9d9616457d9b8b2f8a37f38e79f3", - "internalRef": "337113354" + "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff", + "internalRef": "344443035" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" + "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f3c04883d6c43261ff13db1f52d03a283be06871" + "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" } } ], @@ -40,87 +40,5 @@ "generator": "bazel" } } - ], - "generatedFiles": [ - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/bigquery_v2/services.rst", - "docs/bigquery_v2/types.rst", - "docs/conf.py", - "google/cloud/bigquery_v2/__init__.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/table_reference.proto", - "google/cloud/bigquery_v2/py.typed", - "google/cloud/bigquery_v2/types/__init__.py", - "google/cloud/bigquery_v2/types/encryption_config.py", - "google/cloud/bigquery_v2/types/model.py", - "google/cloud/bigquery_v2/types/model_reference.py", - "google/cloud/bigquery_v2/types/standard_sql.py", - "google/cloud/bigquery_v2/types/table_reference.py", - "mypy.ini", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" ] } \ No newline at end of file diff --git a/synth.py b/synth.py index 97466d0f4..341c5832f 100644 --- a/synth.py +++ b/synth.py @@ -59,14 +59,21 @@ # BigQuery has a custom multiprocessing note s.move( templated_files, - excludes=["noxfile.py", "docs/multiprocessing.rst", ".coveragerc"] + excludes=[ + "noxfile.py", + "docs/multiprocessing.rst", + ".coveragerc", + # Include custom SNIPPETS_TESTS job for performance. + # https://github.com/googleapis/python-bigquery/issues/191 + ".kokoro/presubmit/presubmit.cfg", + ] ) # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- -# python.py_samples() # TODO: why doesn't this work here with Bazel? +python.py_samples() # Do not expose ModelServiceClient, as there is no public API endpoint for the # models service. @@ -95,6 +102,14 @@ '{"members": True, "inherited-members": True}' ) +# Avoid breaking change due to change in field renames. +# https://github.com/googleapis/python-bigquery/issues/319 +s.replace( + "google/cloud/bigquery_v2/types/standard_sql.py", + r"type_ ", + "type " +) + # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", From 0046742abdd2b5eab3c3e935316f91e7eef44d44 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Mon, 7 Dec 2020 13:42:59 -0600 Subject: [PATCH 077/341] feat: support CSV format in `load_table_from_dataframe` pandas connector (#399) * WIP: support alternative serialization formats for load_table_from_dataframe * fix: address review comments * docs: make clear repeated fields are not supportedin csv --- google/cloud/bigquery/client.py | 82 ++++++++++++------- tests/system.py | 134 ++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 50 ++++++++++++ 3 files changed, 239 insertions(+), 27 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 168054623..c7cd694c6 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2111,9 +2111,12 @@ def load_table_from_dataframe( .. note:: - Due to the way REPEATED fields are encoded in the ``parquet`` file - format, a mismatch with the existing table schema can occur, and - 100% compatibility cannot be guaranteed for REPEATED fields. + REPEATED fields are NOT supported when using the CSV source format. + They are supported when using the PARQUET source format, but + due to the way they are encoded in the ``parquet`` file, + a mismatch with the existing table schema can occur, so + 100% compatibility cannot be guaranteed for REPEATED fields when + using the parquet format. https://github.com/googleapis/python-bigquery/issues/17 @@ -2153,6 +2156,14 @@ def load_table_from_dataframe( column names matching those of the dataframe. The BigQuery schema is used to determine the correct data type conversion. Indexes are not loaded. Requires the :mod:`pyarrow` library. + + By default, this method uses the parquet source format. To + override this, supply a value for + :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` + with the format name. Currently only + :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and + :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are + supported. parquet_compression (Optional[str]): [Beta] The compression method to use if intermittently serializing ``dataframe`` to a parquet file. @@ -2181,10 +2192,6 @@ def load_table_from_dataframe( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig` class. """ - if pyarrow is None: - # pyarrow is now the only supported parquet engine. - raise ValueError("This method requires pyarrow to be installed") - job_id = _make_job_id(job_id, job_id_prefix) if job_config: @@ -2197,15 +2204,20 @@ def load_table_from_dataframe( else: job_config = job.LoadJobConfig() - if job_config.source_format: - if job_config.source_format != job.SourceFormat.PARQUET: - raise ValueError( - "Got unexpected source_format: '{}'. Currently, only PARQUET is supported".format( - job_config.source_format - ) - ) - else: + supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET} + if job_config.source_format is None: + # default value job_config.source_format = job.SourceFormat.PARQUET + if job_config.source_format not in supported_formats: + raise ValueError( + "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( + job_config.source_format + ) + ) + + if pyarrow is None and job_config.source_format == job.SourceFormat.PARQUET: + # pyarrow is now the only supported parquet engine. + raise ValueError("This method requires pyarrow to be installed") if location is None: location = self.location @@ -2245,27 +2257,43 @@ def load_table_from_dataframe( stacklevel=2, ) - tmpfd, tmppath = tempfile.mkstemp(suffix="_job_{}.parquet".format(job_id[:8])) + tmpfd, tmppath = tempfile.mkstemp( + suffix="_job_{}.{}".format(job_id[:8], job_config.source_format.lower()) + ) os.close(tmpfd) try: - if job_config.schema: - if parquet_compression == "snappy": # adjust the default value - parquet_compression = parquet_compression.upper() - _pandas_helpers.dataframe_to_parquet( - dataframe, - job_config.schema, + if job_config.source_format == job.SourceFormat.PARQUET: + + if job_config.schema: + if parquet_compression == "snappy": # adjust the default value + parquet_compression = parquet_compression.upper() + + _pandas_helpers.dataframe_to_parquet( + dataframe, + job_config.schema, + tmppath, + parquet_compression=parquet_compression, + ) + else: + dataframe.to_parquet(tmppath, compression=parquet_compression) + + else: + + dataframe.to_csv( tmppath, - parquet_compression=parquet_compression, + index=False, + header=False, + encoding="utf-8", + float_format="%.17g", + date_format="%Y-%m-%d %H:%M:%S.%f", ) - else: - dataframe.to_parquet(tmppath, compression=parquet_compression) - with open(tmppath, "rb") as parquet_file: + with open(tmppath, "rb") as tmpfile: file_size = os.path.getsize(tmppath) return self.load_table_from_file( - parquet_file, + tmpfile, destination, num_retries=num_retries, rewind=True, diff --git a/tests/system.py b/tests/system.py index e347c8a70..d481967d8 100644 --- a/tests/system.py +++ b/tests/system.py @@ -1165,6 +1165,140 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): + from google.cloud.bigquery.job import SourceFormat + + table_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", ["abc", None, "def"]), + ( + "date_col", + [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], + ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + Config.CLIENT.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 3) + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self): + from google.cloud.bigquery.job import SourceFormat + + table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) + df_data = collections.OrderedDict( + [ + ( + "float_col", + [ + 0.14285714285714285, + 0.51428571485748, + 0.87128748, + 1.807960649, + 2.0679610649, + 2.4406779661016949, + 3.7148514257, + 3.8571428571428572, + 1.51251252e40, + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + dataset_id = _make_dataset_id("bq_load_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + Config.CLIENT.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = Config.CLIENT.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = Config.CLIENT.get_table(table_id) + rows = self._fetch_single_page(table) + floats = [r.values()[0] for r in rows] + self.assertEqual(tuple(table.schema), table_schema) + self.assertEqual(table.num_rows, 9) + self.assertEqual(floats, df_data["float_col"]) + def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 0e68b2538..e5ead0ccc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8410,6 +8410,56 @@ def test_load_table_from_dataframe_w_invaild_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_load_table_from_dataframe_with_csv_source_format(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + job_config = job.LoadJobConfig( + write_disposition=job.WriteDisposition.WRITE_TRUNCATE, + source_format=job.SourceFormat.CSV, + ) + + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + return_value=mock.Mock( + schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] + ), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + with load_patch as load_table_from_file, get_table_patch: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config + ) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + size=mock.ANY, + job_id=mock.ANY, + job_id_prefix=None, + location=None, + project=None, + job_config=mock.ANY, + timeout=None, + ) + + sent_file = load_table_from_file.mock_calls[0][1][1] + assert sent_file.closed + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.CSV + def test_load_table_from_json_basic_use(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job From 2f81e802f9c0347690069eefaf2ccad1f117cc21 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Dec 2020 13:19:42 -0700 Subject: [PATCH 078/341] chore: release 2.6.0 (#412) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c71f85d0c..c0233bbd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.6.0](https://www.github.com/googleapis/python-bigquery/compare/v2.5.0...v2.6.0) (2020-12-07) + + +### Features + +* add support for materialized views ([#408](https://www.github.com/googleapis/python-bigquery/issues/408)) ([57ffc66](https://www.github.com/googleapis/python-bigquery/commit/57ffc665319331e0a00583d5d652fd14a510cf2a)), closes [#407](https://www.github.com/googleapis/python-bigquery/issues/407) +* convert `BIGNUMERIC` values to decimal objects ([#414](https://www.github.com/googleapis/python-bigquery/issues/414)) ([d472d2d](https://www.github.com/googleapis/python-bigquery/commit/d472d2d2b33e40b954652d31476dea8c90e6a2dc)), closes [#367](https://www.github.com/googleapis/python-bigquery/issues/367) +* support CSV format in `load_table_from_dataframe` pandas connector ([#399](https://www.github.com/googleapis/python-bigquery/issues/399)) ([0046742](https://www.github.com/googleapis/python-bigquery/commit/0046742abdd2b5eab3c3e935316f91e7eef44d44)) + + +### Bug Fixes + +* preserve timestamp microsecond precision with rows from REST API ([#402](https://www.github.com/googleapis/python-bigquery/issues/402)) ([04510a7](https://www.github.com/googleapis/python-bigquery/commit/04510a7dc7570466550bbdf500d7020bef2af44d)) + + +### Documentation + +* update intersphinx links ([#404](https://www.github.com/googleapis/python-bigquery/issues/404)) ([a9d8ae8](https://www.github.com/googleapis/python-bigquery/commit/a9d8ae8a920dec655b77dca9d9128e569f1d07a7)) + ## [2.5.0](https://www.github.com/googleapis/python-bigquery/compare/v2.4.0...v2.5.0) (2020-12-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 5836d8051..ae34a9fbe 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.5.0" +__version__ = "2.6.0" From e68281e5ab3294aaa147ee63e76fde577b8651b8 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 8 Dec 2020 11:52:38 -0700 Subject: [PATCH 079/341] ci: skip docfx in main 'Kokoro' presubmit (#423) * ci: skip docfx in main 'Kokoro' presubmit * fix: specify default sessions in noxfile * add conditional for cd to project root --- .kokoro/build.sh | 6 +++++- .kokoro/docs/docs-presubmit.cfg | 11 +++++++++++ noxfile.py | 12 ++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 0e71e2aca..cb81a05f8 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,7 +15,11 @@ set -eo pipefail -cd github/python-bigquery +if [[ -z "${PROJECT_ROOT:-}" ]]; then + PROJECT_ROOT="github/python-bigquery" +fi + +cd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg index 111810782..5c216b4bc 100644 --- a/.kokoro/docs/docs-presubmit.cfg +++ b/.kokoro/docs/docs-presubmit.cfg @@ -15,3 +15,14 @@ env_vars: { key: "TRAMPOLINE_IMAGE_UPLOAD" value: "false" } + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/build.sh" +} + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "docs docfx" +} \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index 441782583..95818d3c8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -25,6 +25,18 @@ BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() +# 'docfx' is excluded since it only needs to run in 'docs-presubmit' +nox.options.sessions = [ + "unit", + "system", + "snippets", + "cover", + "lint", + "lint_setup_py", + "blacken", + "docs", +] + def default(session): """Default unit test session. From 8c7e02b0de2c92ee965414e7c430eb57d1877326 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 8 Dec 2020 13:16:28 -0600 Subject: [PATCH 080/341] docs: add examples of `fields` argument to update methods (#418) --- google/cloud/bigquery/client.py | 60 ++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c7cd694c6..28cac64ad 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -881,7 +881,22 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): dataset (google.cloud.bigquery.dataset.Dataset): The dataset to update. fields (Sequence[str]): - The properties of ``dataset`` to change (e.g. "friendly_name"). + The properties of ``dataset`` to change. These are strings + corresponding to the properties of + :class:`~google.cloud.bigquery.dataset.Dataset`. + + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -928,8 +943,18 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): Args: model (google.cloud.bigquery.model.Model): The model to update. fields (Sequence[str]): - The fields of ``model`` to change, spelled as the Model - properties (e.g. "friendly_name"). + The properties of ``model`` to change. These are strings + corresponding to the properties of + :class:`~google.cloud.bigquery.model.Model`. + + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -980,11 +1005,20 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): occurred since the read. Args: - routine (google.cloud.bigquery.routine.Routine): The routine to update. + routine (google.cloud.bigquery.routine.Routine): + The routine to update. fields (Sequence[str]): The fields of ``routine`` to change, spelled as the - :class:`~google.cloud.bigquery.routine.Routine` properties - (e.g. ``type_``). + :class:`~google.cloud.bigquery.routine.Routine` properties. + + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1035,8 +1069,18 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): Args: table (google.cloud.bigquery.table.Table): The table to update. fields (Sequence[str]): - The fields of ``table`` to change, spelled as the Table - properties (e.g. "friendly_name"). + The fields of ``table`` to change, spelled as the + :class:`~google.cloud.bigquery.table.Table` properties. + + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): From d141bd2810d03ea0132ba89e820ac29e32fd6ced Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Dec 2020 21:54:28 +0100 Subject: [PATCH 081/341] chore(deps): update dependency google-cloud-bigquery to v2.6.0 (#419) Co-authored-by: Tim Swast --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3eecbf546..f9211d66c 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.5.0 +google-cloud-bigquery==2.6.0 google-cloud-bigquery-storage==2.0.1 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 763fb2cf897bccebb3d8b98358fdca8c500d308a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Dec 2020 22:08:03 +0100 Subject: [PATCH 082/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.1.0 (#369) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | minor | `==2.0.1` -> `==2.1.0` | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.1.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​210-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev201v210-2020-11-04) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.0.1...v2.1.0) ##### Features - add public transport property and path formatting methods to client ([#​80](https://www.github.com/googleapis/python-bigquery-storage/issues/80)) ([fbbb439](https://www.github.com/googleapis/python-bigquery-storage/commit/fbbb439b8c77fa9367a4b5bea725dd0b0f26b769)) ##### Documentation - add intersphinx to proto-plus library ([#​86](https://www.github.com/googleapis/python-bigquery-storage/issues/86)) ([4cd35d2](https://www.github.com/googleapis/python-bigquery-storage/commit/4cd35d21de4486f659b7efc4ff4dcb9b4eee6c9e)) - show inheritance in types reference ([#​91](https://www.github.com/googleapis/python-bigquery-storage/issues/91)) ([e5fd4e6](https://www.github.com/googleapis/python-bigquery-storage/commit/e5fd4e62de2768a49d633dc3a81e03d64df9fe1f)) ##### [2.0.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.0.0...v2.0.1) (2020-10-21) ##### Bug Fixes - don't fail with 429 when downloading wide tables ([#​79](https://www.github.com/googleapis/python-bigquery-storage/issues/79)) ([45faf97](https://www.github.com/googleapis/python-bigquery-storage/commit/45faf9712b25bd63d962ca7e5afc8b8d3a0d8353)) ##### Documentation - update to_dataframe sample to latest dependencies ([#​72](https://www.github.com/googleapis/python-bigquery-storage/issues/72)) ([a7fe762](https://www.github.com/googleapis/python-bigquery-storage/commit/a7fe7626312a5b9fe1e7bd0e0fe5601ae97605c7))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f9211d66c..6000a4d24 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.6.0 -google-cloud-bigquery-storage==2.0.1 +google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' From 78fde4a92e61a89d0b490b93acc90fff9635d1bf Mon Sep 17 00:00:00 2001 From: HemangChothani <50404902+HemangChothani@users.noreply.github.com> Date: Wed, 9 Dec 2020 15:27:19 -0500 Subject: [PATCH 083/341] fix: handle null values in array query parameters (#426) --- google/cloud/bigquery/_helpers.py | 2 +- tests/unit/test_query.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 716c8a394..100136108 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -40,7 +40,7 @@ def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" - return value is not None or field.mode != "NULLABLE" + return value is not None or (field is not None and field.mode != "NULLABLE") def _int_from_json(value, field): diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index a7c639ed1..cf268daf1 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -383,6 +383,16 @@ def test_from_api_repr_wo_values(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, []) + def test_from_api_repr_w_none_values(self): + RESOURCE = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": None}]}, + } + klass = self._get_target_class() + param = klass.from_api_repr(RESOURCE) + self.assertEqual(param.array_type, "INT64") + self.assertEqual(param.values, [1, None]) + def test_from_api_repr_w_struct_type(self): from google.cloud.bigquery.query import StructQueryParameter From 8219f7999b75fe6f267b943357b877825a1d26f1 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 9 Dec 2020 21:29:10 +0100 Subject: [PATCH 084/341] chore(deps): update dependency pandas to v1.1.5 (#417) Co-authored-by: Tim Swast --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6000a4d24..1d3cace2b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -5,6 +5,6 @@ grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 -pandas==1.1.4 +pandas==1.1.5 pyarrow==2.0.0 pytz==2020.4 From 5e266d8589f4212343ce49ac9b633743efa59346 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 9 Dec 2020 14:56:52 -0600 Subject: [PATCH 085/341] chore: release 2.6.1 (#424) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c0233bbd4..d01f62ff6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) + + +### Bug Fixes + +* handle null values in array query parameters ([#426](https://www.github.com/googleapis/python-bigquery/issues/426)) ([78fde4a](https://www.github.com/googleapis/python-bigquery/commit/78fde4a92e61a89d0b490b93acc90fff9635d1bf)) + + +### Documentation + +* add examples of `fields` argument to update methods ([#418](https://www.github.com/googleapis/python-bigquery/issues/418)) ([8c7e02b](https://www.github.com/googleapis/python-bigquery/commit/8c7e02b0de2c92ee965414e7c430eb57d1877326)) + ## [2.6.0](https://www.github.com/googleapis/python-bigquery/compare/v2.5.0...v2.6.0) (2020-12-07) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ae34a9fbe..410cd066e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.0" +__version__ = "2.6.1" From dbc68b3d1f325f80d24a2da5f028b0f653fb0317 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Dec 2020 15:59:52 -0600 Subject: [PATCH 086/341] docs: add GEOGRAPHY data type code samples (#428) * docs: add GEOGRAPHY data type code samples These are added to a separate directory in order to isolate the GeoJSON and WKT dependencies from the other code samples. * skip geography samples in snippets session --- noxfile.py | 8 +- samples/geography/__init__.py | 13 ++ samples/geography/conftest.py | 55 +++++ samples/geography/insert_geojson.py | 49 +++++ samples/geography/insert_geojson_test.py | 20 ++ samples/geography/insert_wkt.py | 49 +++++ samples/geography/insert_wkt_test.py | 20 ++ samples/geography/noxfile.py | 246 +++++++++++++++++++++++ samples/geography/noxfile_config.py | 35 ++++ samples/geography/requirements-test.txt | 2 + samples/geography/requirements.txt | 3 + tests/system.py | 9 +- 12 files changed, 502 insertions(+), 7 deletions(-) create mode 100644 samples/geography/__init__.py create mode 100644 samples/geography/conftest.py create mode 100644 samples/geography/insert_geojson.py create mode 100644 samples/geography/insert_geojson_test.py create mode 100644 samples/geography/insert_wkt.py create mode 100644 samples/geography/insert_wkt_test.py create mode 100644 samples/geography/noxfile.py create mode 100644 samples/geography/noxfile_config.py create mode 100644 samples/geography/requirements-test.txt create mode 100644 samples/geography/requirements.txt diff --git a/noxfile.py b/noxfile.py index 95818d3c8..8523eabb5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -147,7 +147,13 @@ def snippets(session): # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) - session.run("py.test", "samples", "--ignore=samples/snippets", *session.posargs) + session.run( + "py.test", + "samples", + "--ignore=samples/snippets", + "--ignore=samples/geography", + *session.posargs, + ) @nox.session(python="3.8") diff --git a/samples/geography/__init__.py b/samples/geography/__init__.py new file mode 100644 index 000000000..c6334245a --- /dev/null +++ b/samples/geography/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/samples/geography/conftest.py b/samples/geography/conftest.py new file mode 100644 index 000000000..265900f5a --- /dev/null +++ b/samples/geography/conftest.py @@ -0,0 +1,55 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.cloud import bigquery +import pytest + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +@pytest.fixture(scope="session") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(scope="session") +def project_id(bigquery_client): + return bigquery_client.project + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"geography_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture +def table_id(bigquery_client, project_id, dataset_id): + table_id = f"{project_id}.{dataset_id}.geography_{temp_suffix()}" + table = bigquery.Table(table_id) + table.schema = [ + bigquery.SchemaField("geo", bigquery.SqlTypeNames.GEOGRAPHY), + ] + bigquery_client.create_table(table) + yield table_id + bigquery_client.delete_table(table_id) diff --git a/samples/geography/insert_geojson.py b/samples/geography/insert_geojson.py new file mode 100644 index 000000000..23f249c15 --- /dev/null +++ b/samples/geography/insert_geojson.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def insert_geojson(override_values={}): + # [START bigquery_insert_geojson] + import geojson + from google.cloud import bigquery + + bigquery_client = bigquery.Client() + + # This example uses a table containing a column named "geo" with the + # GEOGRAPHY data type. + table_id = "my-project.my_dataset.my_table" + # [END bigquery_insert_geojson] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + table_id = override_values.get("table_id", table_id) + # [START bigquery_insert_geojson] + + # Use the python-geojson library to generate GeoJSON of a line from LAX to + # JFK airports. Alternatively, you may define GeoJSON data directly, but it + # must be converted to a string before loading it into BigQuery. + my_geography = geojson.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + rows = [ + # Convert GeoJSON data into a string. + {"geo": geojson.dumps(my_geography)} + ] + + # table already exists and has a column + # named "geo" with data type GEOGRAPHY. + errors = bigquery_client.insert_rows_json(table_id, rows) + if errors: + raise RuntimeError(f"row insert failed: {errors}") + else: + print(f"wrote 1 row to {table_id}") + # [END bigquery_insert_geojson] + return errors diff --git a/samples/geography/insert_geojson_test.py b/samples/geography/insert_geojson_test.py new file mode 100644 index 000000000..5ef15ee13 --- /dev/null +++ b/samples/geography/insert_geojson_test.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import insert_geojson + + +def test_insert_geojson(table_id): + errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) + assert not errors diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py new file mode 100644 index 000000000..1f3d57546 --- /dev/null +++ b/samples/geography/insert_wkt.py @@ -0,0 +1,49 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def insert_wkt(override_values={}): + # [START bigquery_insert_geography_wkt] + from google.cloud import bigquery + import shapely + import shapely.wkt + + bigquery_client = bigquery.Client() + + # This example uses a table containing a column named "geo" with the + # GEOGRAPHY data type. + table_id = "my-project.my_dataset.my_table" + # [END bigquery_insert_geography_wkt] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + table_id = override_values.get("table_id", table_id) + # [START bigquery_insert_geography_wkt] + + # Use the Shapely library to generate WKT of a line from LAX to + # JFK airports. Alternatively, you may define WKT data directly. + my_geography = shapely.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + rows = [ + # Convert data into a WKT string. + {"geo": shapely.wkt.dumps(my_geography)}, + ] + + # table already exists and has a column + # named "geo" with data type GEOGRAPHY. + errors = bigquery_client.insert_rows_json(table_id, rows) + if errors: + raise RuntimeError(f"row insert failed: {errors}") + else: + print(f"wrote 1 row to {table_id}") + # [END bigquery_insert_geography_wkt] + return errors diff --git a/samples/geography/insert_wkt_test.py b/samples/geography/insert_wkt_test.py new file mode 100644 index 000000000..5ef15ee13 --- /dev/null +++ b/samples/geography/insert_wkt_test.py @@ -0,0 +1,20 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import insert_geojson + + +def test_insert_geojson(table_id): + errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) + assert not errors diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py new file mode 100644 index 000000000..ab2c49227 --- /dev/null +++ b/samples/geography/noxfile.py @@ -0,0 +1,246 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +from pathlib import Path +import sys + +import nox + + +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING +# DO NOT EDIT THIS FILE EVER! +# WARNING - WARNING - WARNING - WARNING - WARNING +# WARNING - WARNING - WARNING - WARNING - WARNING + +# Copy `noxfile_config.py` to your directory and modify it instead. + + +# `TEST_CONFIG` dict is a configuration hook that allows users to +# modify the test configurations. The values here should be in sync +# with `noxfile_config.py`. Users will copy `noxfile_config.py` into +# their directory and modify it. + +TEST_CONFIG = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": False, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} + + +try: + # Ensure we can import noxfile_config in the project's directory. + sys.path.append(".") + from noxfile_config import TEST_CONFIG_OVERRIDE +except ImportError as e: + print("No user noxfile_config found: detail: {}".format(e)) + TEST_CONFIG_OVERRIDE = {} + +# Update the TEST_CONFIG with the user supplied values. +TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) + + +def get_pytest_env_vars(): + """Returns a dict for pytest invocation.""" + ret = {} + + # Override the GCLOUD_PROJECT and the alias. + env_key = TEST_CONFIG["gcloud_project_env"] + # This should error out if not set. + ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + + # Apply user supplied envs. + ret.update(TEST_CONFIG["envs"]) + return ret + + +# DO NOT EDIT - automatically generated. +# All versions used to tested samples. +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] + +# Any default versions that should be ignored. +IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] + +TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) + +INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +# +# Style Checks +# + + +def _determine_local_import_names(start_dir): + """Determines all import names that should be considered "local". + + This is used when running the linter to insure that import order is + properly checked. + """ + file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)] + return [ + basename + for basename, extension in file_ext_pairs + if extension == ".py" + or os.path.isdir(os.path.join(start_dir, basename)) + and basename not in ("__pycache__") + ] + + +# Linting with flake8. +# +# We ignore the following rules: +# E203: whitespace before ‘:’ +# E266: too many leading ‘#’ for block comment +# E501: line too long +# I202: Additional newline in a section of imports +# +# We also need to specify the rules which are ignored by default: +# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121'] +FLAKE8_COMMON_ARGS = [ + "--show-source", + "--builtin=gettext", + "--max-complexity=20", + "--import-order-style=google", + "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py", + "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202", + "--max-line-length=88", +] + + +@nox.session +def lint(session): + if not TEST_CONFIG["enforce_type_hints"]: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") + + local_names = _determine_local_import_names(".") + args = FLAKE8_COMMON_ARGS + [ + "--application-import-names", + ",".join(local_names), + ".", + ] + session.run("flake8", *args) + + +# +# Black +# + + +@nox.session +def blacken(session): + session.install("black") + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) + + +# +# Sample Tests +# + + +PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] + + +def _session_tests(session, post_install=None): + """Runs py.test for a particular project.""" + if os.path.exists("requirements.txt"): + session.install("-r", "requirements.txt") + + if os.path.exists("requirements-test.txt"): + session.install("-r", "requirements-test.txt") + + if INSTALL_LIBRARY_FROM_SOURCE: + session.install("-e", _get_repo_root()) + + if post_install: + post_install(session) + + session.run( + "pytest", + *(PYTEST_COMMON_ARGS + session.posargs), + # Pytest will return 5 when no tests are collected. This can happen + # on travis where slow and flaky tests are excluded. + # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html + success_codes=[0, 5], + env=get_pytest_env_vars() + ) + + +@nox.session(python=ALL_VERSIONS) +def py(session): + """Runs py.test for a sample using the specified version of Python.""" + if session.python in TESTED_VERSIONS: + _session_tests(session) + else: + session.skip( + "SKIPPED: {} tests are disabled for this sample.".format(session.python) + ) + + +# +# Readmegen +# + + +def _get_repo_root(): + """ Returns the root folder of the project. """ + # Get root of this repository. Assume we don't have directories nested deeper than 10 items. + p = Path(os.getcwd()) + for i in range(10): + if p is None: + break + if Path(p / ".git").exists(): + return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) + p = p.parent + raise Exception("Unable to detect repository root.") + + +GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")]) + + +@nox.session +@nox.parametrize("path", GENERATED_READMES) +def readmegen(session, path): + """(Re-)generates the readme for a sample.""" + session.install("jinja2", "pyyaml") + dir_ = os.path.dirname(path) + + if os.path.exists(os.path.join(dir_, "requirements.txt")): + session.install("-r", os.path.join(dir_, "requirements.txt")) + + in_file = os.path.join(dir_, "README.rst.in") + session.run( + "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file + ) diff --git a/samples/geography/noxfile_config.py b/samples/geography/noxfile_config.py new file mode 100644 index 000000000..7d2e02346 --- /dev/null +++ b/samples/geography/noxfile_config.py @@ -0,0 +1,35 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT", + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt new file mode 100644 index 000000000..676ff949e --- /dev/null +++ b/samples/geography/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==5.4.3 +mock==4.0.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt new file mode 100644 index 000000000..9bd6638d7 --- /dev/null +++ b/samples/geography/requirements.txt @@ -0,0 +1,3 @@ +geojson==2.5.0 +google-cloud-bigquery==2.6.0 +Shapely==1.7.1 diff --git a/tests/system.py b/tests/system.py index d481967d8..185722e83 100644 --- a/tests/system.py +++ b/tests/system.py @@ -2414,9 +2414,8 @@ def test_querying_data_w_timeout(self): query_job = Config.CLIENT.query( """ - SELECT name, SUM(number) AS total_people - FROM `bigquery-public-data.usa_names.usa_1910_current` - GROUP BY name + SELECT COUNT(*) + FROM UNNEST(GENERATE_ARRAY(1,1000000)), UNNEST(GENERATE_ARRAY(1, 10000)) """, location="US", job_config=job_config, @@ -2427,9 +2426,7 @@ def test_querying_data_w_timeout(self): with self.assertRaises(requests.exceptions.Timeout): query_job.done(timeout=0.1) - # Now wait for the result using a more realistic deadline. - query_job.result(timeout=30) - self.assertTrue(query_job.done(timeout=30)) + Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): From 96a1c5b3c72855ba6ae8c88dfd0cdb02d2faf909 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 9 Dec 2020 16:57:16 -0600 Subject: [PATCH 087/341] docs: fix Shapely import in GEOGRAPHY sample (#431) --- samples/geography/insert_wkt.py | 6 ++++-- samples/geography/insert_wkt_test.py | 6 +++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py index 1f3d57546..d7d3accde 100644 --- a/samples/geography/insert_wkt.py +++ b/samples/geography/insert_wkt.py @@ -16,7 +16,7 @@ def insert_wkt(override_values={}): # [START bigquery_insert_geography_wkt] from google.cloud import bigquery - import shapely + import shapely.geometry import shapely.wkt bigquery_client = bigquery.Client() @@ -32,7 +32,9 @@ def insert_wkt(override_values={}): # Use the Shapely library to generate WKT of a line from LAX to # JFK airports. Alternatively, you may define WKT data directly. - my_geography = shapely.LineString([(-118.4085, 33.9416), (-73.7781, 40.6413)]) + my_geography = shapely.geometry.LineString( + [(-118.4085, 33.9416), (-73.7781, 40.6413)] + ) rows = [ # Convert data into a WKT string. {"geo": shapely.wkt.dumps(my_geography)}, diff --git a/samples/geography/insert_wkt_test.py b/samples/geography/insert_wkt_test.py index 5ef15ee13..8bcb62cec 100644 --- a/samples/geography/insert_wkt_test.py +++ b/samples/geography/insert_wkt_test.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import insert_geojson +from . import insert_wkt -def test_insert_geojson(table_id): - errors = insert_geojson.insert_geojson(override_values={"table_id": table_id}) +def test_insert_wkt(table_id): + errors = insert_wkt.insert_wkt(override_values={"table_id": table_id}) assert not errors From dab7af3463457052f75991fb7e532ea719da0129 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 11 Dec 2020 00:04:26 +0100 Subject: [PATCH 088/341] chore(deps): update dependency google-cloud-bigquery to v2.6.1 (#430) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9bd6638d7..3ea0e6e06 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.0 +google-cloud-bigquery==2.6.1 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1d3cace2b..0b9b69487 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.0 +google-cloud-bigquery==2.6.1 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 079b6a162f6929bf801366d92f8daeb3318426c4 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 10 Dec 2020 17:05:08 -0600 Subject: [PATCH 089/341] docs: move and refresh view samples (#420) docs: restore old view snippets remove relative imports docs: fix missing space in comment sort imports --- samples/snippets/conftest.py | 27 ++++ samples/snippets/materialized_view.py | 2 +- samples/snippets/materialized_view_test.py | 14 +- samples/snippets/view.py | 164 +++++++++++++++++++++ samples/snippets/view_test.py | 117 +++++++++++++++ 5 files changed, 311 insertions(+), 13 deletions(-) create mode 100644 samples/snippets/conftest.py create mode 100644 samples/snippets/view.py create mode 100644 samples/snippets/view_test.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py new file mode 100644 index 000000000..d22a33318 --- /dev/null +++ b/samples/snippets/conftest.py @@ -0,0 +1,27 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + + +@pytest.fixture(scope="session") +def bigquery_client(): + bigquery_client = bigquery.Client() + return bigquery_client + + +@pytest.fixture(scope="session") +def project_id(bigquery_client): + return bigquery_client.project diff --git a/samples/snippets/materialized_view.py b/samples/snippets/materialized_view.py index d925ec230..429bd98b4 100644 --- a/samples/snippets/materialized_view.py +++ b/samples/snippets/materialized_view.py @@ -25,7 +25,7 @@ def create_materialized_view(override_values={}): # To facilitate testing, we replace values with alternatives # provided by the testing harness. view_id = override_values.get("view_id", view_id) - base_table_id = override_values.get("base_table_id", view_id) + base_table_id = override_values.get("base_table_id", base_table_id) # [START bigquery_create_materialized_view] view = bigquery.Table(view_id) view.mview_query = f""" diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py index fc3db533c..75c6b2106 100644 --- a/samples/snippets/materialized_view_test.py +++ b/samples/snippets/materialized_view_test.py @@ -23,13 +23,8 @@ def temp_suffix(): - return str(uuid.uuid4()).replace("-", "_") - - -@pytest.fixture(scope="module") -def bigquery_client(): - bigquery_client = bigquery.Client() - return bigquery_client + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" @pytest.fixture(autouse=True) @@ -37,11 +32,6 @@ def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) -@pytest.fixture(scope="module") -def project_id(bigquery_client): - return bigquery_client.project - - @pytest.fixture(scope="module") def dataset_id(bigquery_client): dataset_id = f"mvdataset_{temp_suffix()}" diff --git a/samples/snippets/view.py b/samples/snippets/view.py new file mode 100644 index 000000000..ad3f11717 --- /dev/null +++ b/samples/snippets/view.py @@ -0,0 +1,164 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_view(override_values={}): + # [START bigquery_create_view] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + source_id = "my-project.my_dataset.my_table" + # [END bigquery_create_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + source_id = override_values.get("source_id", source_id) + # [START bigquery_create_view] + view = bigquery.Table(view_id) + + # The source table in this example is created from a CSV file in Google + # Cloud Storage located at + # `gs://cloud-samples-data/bigquery/us-states/us-states.csv`. It contains + # 50 US states, while the view returns only those states with names + # starting with the letter 'W'. + view.view_query = f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'W%'" + + # Make an API request to create the view. + view = client.create_table(view) + print(f"Created {view.table_type}: {str(view.reference)}") + # [END bigquery_create_view] + return view + + +def get_view(override_values={}): + # [START bigquery_get_view] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + # [END bigquery_get_view] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + # [START bigquery_get_view] + # Make an API request to get the table resource. + view = client.get_table(view_id) + + # Display view properties + print(f"Retrieved {view.table_type}: {str(view.reference)}") + print(f"View Query:\n{view.view_query}") + # [END bigquery_get_view] + return view + + +def update_view(override_values={}): + # [START bigquery_update_view_query] + from google.cloud import bigquery + + client = bigquery.Client() + + view_id = "my-project.my_dataset.my_view" + source_id = "my-project.my_dataset.my_table" + # [END bigquery_update_view_query] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_id = override_values.get("view_id", view_id) + source_id = override_values.get("source_id", source_id) + # [START bigquery_update_view_query] + view = bigquery.Table(view_id) + + # The source table in this example is created from a CSV file in Google + # Cloud Storage located at + # `gs://cloud-samples-data/bigquery/us-states/us-states.csv`. It contains + # 50 US states, while the view returns only those states with names + # starting with the letter 'M'. + view.view_query = f"SELECT name, post_abbr FROM `{source_id}` WHERE name LIKE 'M%'" + + # Make an API request to update the query property of the view. + view = client.update_table(view, ["view_query"]) + print(f"Updated {view.table_type}: {str(view.reference)}") + # [END bigquery_update_view_query] + return view + + +def grant_access(override_values={}): + # [START bigquery_grant_view_access] + from google.cloud import bigquery + + client = bigquery.Client() + + # To use a view, the analyst requires ACLs to both the view and the source + # table. Create an authorized view to allow an analyst to use a view + # without direct access permissions to the source table. + view_dataset_id = "my-project.my_view_dataset" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_dataset_id = override_values.get("view_dataset_id", view_dataset_id) + # [START bigquery_grant_view_access] + # Make an API request to get the view dataset ACLs. + view_dataset = client.get_dataset(view_dataset_id) + + analyst_group_email = "data_analysts@example.com" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + analyst_group_email = override_values.get( + "analyst_group_email", analyst_group_email + ) + # [START bigquery_grant_view_access] + access_entries = view_dataset.access_entries + access_entries.append( + bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) + ) + view_dataset.access_entries = access_entries + + # Make an API request to update the ACLs property of the view dataset. + view_dataset = client.update_dataset(view_dataset, ["access_entries"]) + print(f"Access to view: {view_dataset.access_entries}") + + # Group members of "data_analysts@example.com" now have access to the view, + # but they require access to the source table to use it. To remove this + # restriction, authorize the view to access the source dataset. + source_dataset_id = "my-project.my_source_dataset" + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + source_dataset_id = override_values.get("source_dataset_id", source_dataset_id) + # [START bigquery_grant_view_access] + # Make an API request to set the source dataset ACLs. + source_dataset = client.get_dataset(source_dataset_id) + + view_reference = { + "projectId": "my-project", + "datasetId": "my_view_dataset", + "tableId": "my_authorized_view", + } + # [END bigquery_grant_view_access] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + view_reference = override_values.get("view_reference", view_reference) + # [START bigquery_grant_view_access] + access_entries = source_dataset.access_entries + access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) + source_dataset.access_entries = access_entries + + # Make an API request to update the ACLs property of the source dataset. + source_dataset = client.update_dataset(source_dataset, ["access_entries"]) + print(f"Access to source: {source_dataset.access_entries}") + # [END bigquery_grant_view_access] + return view_dataset, source_dataset diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py new file mode 100644 index 000000000..77105b61a --- /dev/null +++ b/samples/snippets/view_test.py @@ -0,0 +1,117 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import uuid + +from google.cloud import bigquery +import pytest + +import view + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +@pytest.fixture(autouse=True) +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) + + +@pytest.fixture(scope="module") +def view_dataset_id(bigquery_client, project_id): + dataset_id = f"{project_id}.view_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def view_id(bigquery_client, view_dataset_id): + view_id = f"{view_dataset_id}.my_view" + yield view_id + bigquery_client.delete_table(view_id, not_found_ok=True) + + +@pytest.fixture(scope="module") +def source_dataset_id(bigquery_client, project_id): + dataset_id = f"{project_id}.view_{temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture(scope="module") +def source_table_id(bigquery_client, source_dataset_id): + source_table_id = f"{source_dataset_id}.us_states" + job_config = bigquery.LoadJobConfig( + schema=[ + bigquery.SchemaField("name", "STRING"), + bigquery.SchemaField("post_abbr", "STRING"), + ], + skip_leading_rows=1, + ) + load_job = bigquery_client.load_table_from_uri( + "gs://cloud-samples-data/bigquery/us-states/us-states.csv", + source_table_id, + job_config=job_config, + ) + load_job.result() + yield source_table_id + bigquery_client.delete_table(source_table_id, not_found_ok=True) + + +def test_view(capsys, view_id, view_dataset_id, source_table_id, source_dataset_id): + override_values = { + "view_id": view_id, + "source_id": source_table_id, + } + got = view.create_view(override_values) + assert source_table_id in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + + got = view.get_view(override_values) + assert source_table_id in got.view_query + assert "'W%'" in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + assert source_table_id in out + assert "'W%'" in out + + got = view.update_view(override_values) + assert source_table_id in got.view_query + assert "'M%'" in got.view_query + out, _ = capsys.readouterr() + assert view_id in out + + project_id, dataset_id, table_id = view_id.split(".") + override_values = { + "analyst_group_email": "cloud-dpes-bigquery@google.com", + "view_dataset_id": view_dataset_id, + "source_dataset_id": source_dataset_id, + "view_reference": { + "projectId": project_id, + "datasetId": dataset_id, + "tableId": table_id, + }, + } + view_dataset, source_dataset = view.grant_access(override_values) + assert len(view_dataset.access_entries) != 0 + assert len(source_dataset.access_entries) != 0 + out, _ = capsys.readouterr() + assert "cloud-dpes-bigquery@google.com" in out + assert table_id in out From aac33df45f86ddb54f8d68ecafea8184bb009652 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 22 Dec 2020 16:20:48 -0600 Subject: [PATCH 090/341] test: add session to test with nightly dependencies (#449) This should catch errors introduced in the next versions of dependency packages. --- .kokoro/presubmit/prerelease-deps-3.8.cfg | 7 +++++ noxfile.py | 32 +++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 .kokoro/presubmit/prerelease-deps-3.8.cfg diff --git a/.kokoro/presubmit/prerelease-deps-3.8.cfg b/.kokoro/presubmit/prerelease-deps-3.8.cfg new file mode 100644 index 000000000..f06806baf --- /dev/null +++ b/.kokoro/presubmit/prerelease-deps-3.8.cfg @@ -0,0 +1,7 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Only run this nox session. +env_vars: { + key: "NOX_SESSION" + value: "prerelease_deps" +} \ No newline at end of file diff --git a/noxfile.py b/noxfile.py index 8523eabb5..f3326d01b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -168,6 +168,38 @@ def cover(session): session.run("coverage", "erase") +@nox.session(python="3.8") +def prerelease_deps(session): + """Run all tests with prerelease versions of dependencies installed. + + https://github.com/googleapis/python-bigquery/issues/95 + """ + # PyArrow prerelease packages are published to an alternative PyPI host. + # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + session.install( + "--extra-index-url", "https://pypi.fury.io/arrow-nightlies/", "--pre", "pyarrow" + ) + session.install("--pre", "grpcio", "pandas") + session.install( + "mock", + "pytest", + "google-cloud-testutils", + "pytest-cov", + "freezegun", + "IPython", + ) + session.install("-e", ".[all]") + + # Print out prerelease package versions. + session.run("python", "-c", "import grpc; print(grpc.__version__)") + session.run("python", "-c", "import pandas; print(pandas.__version__)") + session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") + + # Run all tests, except a few samples tests which require extra dependencies. + session.run("py.test", "tests") + session.run("py.test", "samples/tests") + + @nox.session(python="3.8") def lint(session): """Run linters. From b1f2f48af8d460c80eda377518df968c300bd893 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 5 Jan 2021 15:37:44 -0700 Subject: [PATCH 091/341] chore: add constraints file (#456) * chore: add constraints file * chore: add constraints file * chore: add constraints file * chore: add constraints file --- testing/constraints-3.10.txt | 0 testing/constraints-3.11.txt | 0 testing/constraints-3.6.txt | 33 ++++++++++++++++++++++++--------- testing/constraints-3.9.txt | 0 4 files changed, 24 insertions(+), 9 deletions(-) create mode 100644 testing/constraints-3.10.txt create mode 100644 testing/constraints-3.11.txt create mode 100644 testing/constraints-3.9.txt diff --git a/testing/constraints-3.10.txt b/testing/constraints-3.10.txt new file mode 100644 index 000000000..e69de29bb diff --git a/testing/constraints-3.11.txt b/testing/constraints-3.11.txt new file mode 100644 index 000000000..e69de29bb diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 91a507a5c..fe2bcfda7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -1,16 +1,31 @@ +# This constraints file is used to check that lower bounds +# are correct in setup.py +# List *all* library dependencies and extras in this file. +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 google-api-core==1.23.0 -google-cloud-bigquery-storage==2.0.0 +proto-plus==1.10.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 +six==1.13.0 +protobuf==3.12.0 +google-cloud-bigquery-storage==2.0.0 grpcio==1.32.0 -ipython==5.5 -libcst==0.2.5 -llvmlite==0.34.0 -# pandas 0.23.0 is the first version to work with pyarrow to_pandas. +pyarrow==1.0.0 pandas==0.23.0 -protobuf == 3.12.0 -proto-plus==1.10.0 pyarrow==1.0.0 -python-snappy==0.5.4 -six==1.13.0 tqdm==4.7.4 +opentelemetry-api==0.11b0 +opentelemetry-sdk==0.11b0 +opentelemetry-instrumentation==0.11b0 +google-cloud-bigquery-storage==2.0.0 +grpcio==1.32.0 +pyarrow==1.0.0 +opentelemetry-api==0.11b0 +opentelemetry-sdk==0.11b0 +opentelemetry-instrumentation==0.11b0 +pandas==0.23.0 +pyarrow==1.0.0 +tqdm==4.7.4 \ No newline at end of file diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt new file mode 100644 index 000000000..e69de29bb From c2e70603d2946eea244c371b5bf6758c2da8d6b3 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 7 Jan 2021 00:06:40 +0100 Subject: [PATCH 092/341] chore(deps): update dependency pytz to v2020.5 (#452) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0b9b69487..5cda34214 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,4 +7,4 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.5 pyarrow==2.0.0 -pytz==2020.4 +pytz==2020.5 From 0337ea0bde966c3ccb94960493a6fa6f2bee49b4 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 7 Jan 2021 00:26:11 +0100 Subject: [PATCH 093/341] chore(deps): update dependency pandas to v1.2.0 (#454) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Update | Change | |---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | minor | `==1.1.5` -> `==1.2.0` | --- ### Release Notes
pandas-dev/pandas ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 5cda34214..208eb4526 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -5,6 +5,7 @@ grpcio==1.34.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.2.0; python_version >= '3.7' pyarrow==2.0.0 pytz==2020.5 From d01d199839a13157e8bb290248c4a2e86916cc0c Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 8 Jan 2021 13:29:22 -0700 Subject: [PATCH 094/341] ci: use python3 instead of python3.6 in build.sh (#425) * ci: skip docfx in main 'Kokoro' presubmit * fix: specify default sessions in noxfile * fix: use python3 instead of 3.6 * fix: add NOX_SESSION to pass down envvars * fix: remove quotes arround sessions Co-authored-by: Tim Swast --- .kokoro/build.sh | 10 +++++----- .trampolinerc | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.kokoro/build.sh b/.kokoro/build.sh index cb81a05f8..058f363e1 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -34,16 +34,16 @@ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") # Remove old nox -python3.6 -m pip uninstall --yes --quiet nox-automation +python3 -m pip uninstall --yes --quiet nox-automation # Install nox -python3.6 -m pip install --upgrade --quiet nox -python3.6 -m nox --version +python3 -m pip install --upgrade --quiet nox +python3 -m nox --version # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3.6 -m nox -s "${NOX_SESSION:-}" + python3 -m nox -s ${NOX_SESSION:-} else - python3.6 -m nox + python3 -m nox fi diff --git a/.trampolinerc b/.trampolinerc index 995ee2911..c7d663ae9 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -18,12 +18,14 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Add env vars which are passed down into the container here. pass_down_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" + "NOX_SESSION" ) # Prevent unintentional override on the default image. From 0023d193d36e473095ecaf8a9b9456fb731d583b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 8 Jan 2021 22:20:21 +0100 Subject: [PATCH 095/341] chore: remove six dependency (#461) * chore: remove six dependency * Remove now-redundant self argument --- google/cloud/bigquery/_helpers.py | 3 +- google/cloud/bigquery/_pandas_helpers.py | 5 +- google/cloud/bigquery/client.py | 5 +- google/cloud/bigquery/dataset.py | 15 ++-- google/cloud/bigquery/dbapi/_helpers.py | 10 ++- google/cloud/bigquery/dbapi/cursor.py | 4 +- google/cloud/bigquery/enums.py | 4 +- google/cloud/bigquery/job/base.py | 40 +++++------ google/cloud/bigquery/job/query.py | 5 +- google/cloud/bigquery/magics/magics.py | 12 ++-- google/cloud/bigquery/model.py | 5 +- google/cloud/bigquery/routine.py | 3 +- google/cloud/bigquery/schema.py | 4 +- google/cloud/bigquery/table.py | 18 +++-- samples/load_table_uri_truncate_avro.py | 4 +- samples/load_table_uri_truncate_csv.py | 4 +- samples/load_table_uri_truncate_json.py | 4 +- samples/load_table_uri_truncate_orc.py | 4 +- samples/load_table_uri_truncate_parquet.py | 4 +- .../tests/test_copy_table_multiple_source.py | 4 +- setup.py | 1 - tests/system.py | 26 ++++--- tests/unit/job/test_base.py | 6 +- tests/unit/job/test_query.py | 6 +- tests/unit/test__helpers.py | 5 +- tests/unit/test__http.py | 12 ++-- tests/unit/test_client.py | 72 +++++++++---------- tests/unit/test_dbapi__helpers.py | 8 +-- tests/unit/test_dbapi_connection.py | 5 +- tests/unit/test_dbapi_cursor.py | 7 +- tests/unit/test_opentelemetry_tracing.py | 6 +- tests/unit/test_table.py | 27 ++----- 32 files changed, 150 insertions(+), 188 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 100136108..6b66a3020 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -18,7 +18,6 @@ import datetime import decimal import re -import six from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -451,7 +450,7 @@ def _record_field_to_json(fields, row_value): for field_name in not_processed: value = row_value[field_name] if value is not None: - record[field_name] = six.text_type(value) + record[field_name] = str(value) return record diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7774ce26b..162c58b4b 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -17,10 +17,9 @@ import concurrent.futures import functools import logging +import queue import warnings -import six -from six.moves import queue try: import pandas @@ -738,7 +737,7 @@ def download_dataframe_bqstorage( def dataframe_to_json_generator(dataframe): for row in dataframe.itertuples(index=False, name=None): output = {} - for column, value in six.moves.zip(dataframe.columns, row): + for column, value in zip(dataframe.columns, row): # Omit NaN values. if value != value: continue diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 28cac64ad..19693c9ff 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -34,7 +34,6 @@ import pyarrow except ImportError: # pragma: NO COVER pyarrow = None -import six from google import resumable_media from google.resumable_media.requests import MultipartUpload @@ -2017,7 +2016,7 @@ def load_table_from_uri( job_ref = job._JobReference(job_id, project=project, location=location) - if isinstance(source_uris, six.string_types): + if isinstance(source_uris, str): source_uris = [source_uris] destination = _table_arg_to_table_ref(destination, default_project=self.project) @@ -2779,7 +2778,7 @@ def extract_table( ) ) - if isinstance(destination_uris, six.string_types): + if isinstance(destination_uris, str): destination_uris = [destination_uris] if job_config: diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index ce07c8048..2d3a4755f 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -16,7 +16,6 @@ from __future__ import absolute_import -import six import copy import google.cloud._helpers @@ -260,9 +259,9 @@ class DatasetReference(object): """ def __init__(self, project, dataset_id): - if not isinstance(project, six.string_types): + if not isinstance(project, str): raise ValueError("Pass a string for project") - if not isinstance(dataset_id, six.string_types): + if not isinstance(dataset_id, str): raise ValueError("Pass a string for dataset_id") self._project = project self._dataset_id = dataset_id @@ -407,7 +406,7 @@ class Dataset(object): } def __init__(self, dataset_ref): - if isinstance(dataset_ref, six.string_types): + if isinstance(dataset_ref, str): dataset_ref = DatasetReference.from_string(dataset_ref) self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}} @@ -544,7 +543,7 @@ def default_table_expiration_ms(self): @default_table_expiration_ms.setter def default_table_expiration_ms(self, value): - if not isinstance(value, six.integer_types) and value is not None: + if not isinstance(value, int) and value is not None: raise ValueError("Pass an integer, or None") self._properties["defaultTableExpirationMs"] = _helpers._str_or_none(value) @@ -560,7 +559,7 @@ def description(self): @description.setter def description(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["description"] = value @@ -576,7 +575,7 @@ def friendly_name(self): @friendly_name.setter def friendly_name(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["friendlyName"] = value @@ -592,7 +591,7 @@ def location(self): @location.setter def location(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["location"] = value diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index fdf4e17c3..95b5869e5 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,8 +19,6 @@ import functools import numbers -import six - from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -132,7 +130,7 @@ def to_query_parameters_dict(parameters): """ result = [] - for name, value in six.iteritems(parameters): + for name, value in parameters.items(): if isinstance(value, collections_abc.Mapping): raise NotImplementedError( "STRUCT-like parameter values are not supported " @@ -187,9 +185,9 @@ def bigquery_scalar_type(value): return "FLOAT64" elif isinstance(value, decimal.Decimal): return "NUMERIC" - elif isinstance(value, six.text_type): + elif isinstance(value, str): return "STRING" - elif isinstance(value, six.binary_type): + elif isinstance(value, bytes): return "BYTES" elif isinstance(value, datetime.datetime): return "DATETIME" if value.tzinfo is None else "TIMESTAMP" @@ -215,7 +213,7 @@ def array_like(value): bool: ``True`` if the value is considered array-like, ``False`` otherwise. """ return isinstance(value, collections_abc.Sequence) and not isinstance( - value, (six.text_type, six.binary_type, bytearray) + value, (str, bytes, bytearray) ) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index f48b47c12..e90bcc2c0 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -19,8 +19,6 @@ import copy import logging -import six - from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -289,7 +287,7 @@ def fetchone(self): """ self._try_fetch() try: - return six.next(self._query_data) + return next(self._query_data) except StopIteration: return None diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 3f72333af..2268808fd 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -15,7 +15,7 @@ import re import enum -import six +import itertools from google.cloud.bigquery_v2 import types as gapic_types @@ -178,7 +178,7 @@ def _make_sql_scalars_enum(): ) new_doc = "\n".join( - six.moves.filterfalse(skip_pattern.search, orig_doc.splitlines()) + itertools.filterfalse(skip_pattern.search, orig_doc.splitlines()) ) new_enum.__doc__ = "An Enum of scalar SQL types.\n" + new_doc diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 2f4ae1460..3c601f072 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -15,11 +15,11 @@ """Base classes and helpers for job classes.""" import copy +import http import threading from google.api_core import exceptions import google.api_core.future.polling -from six.moves import http_client from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -28,24 +28,24 @@ _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" _ERROR_REASON_TO_EXCEPTION = { - "accessDenied": http_client.FORBIDDEN, - "backendError": http_client.INTERNAL_SERVER_ERROR, - "billingNotEnabled": http_client.FORBIDDEN, - "billingTierLimitExceeded": http_client.BAD_REQUEST, - "blocked": http_client.FORBIDDEN, - "duplicate": http_client.CONFLICT, - "internalError": http_client.INTERNAL_SERVER_ERROR, - "invalid": http_client.BAD_REQUEST, - "invalidQuery": http_client.BAD_REQUEST, - "notFound": http_client.NOT_FOUND, - "notImplemented": http_client.NOT_IMPLEMENTED, - "quotaExceeded": http_client.FORBIDDEN, - "rateLimitExceeded": http_client.FORBIDDEN, - "resourceInUse": http_client.BAD_REQUEST, - "resourcesExceeded": http_client.BAD_REQUEST, - "responseTooLarge": http_client.FORBIDDEN, - "stopped": http_client.OK, - "tableUnavailable": http_client.BAD_REQUEST, + "accessDenied": http.client.FORBIDDEN, + "backendError": http.client.INTERNAL_SERVER_ERROR, + "billingNotEnabled": http.client.FORBIDDEN, + "billingTierLimitExceeded": http.client.BAD_REQUEST, + "blocked": http.client.FORBIDDEN, + "duplicate": http.client.CONFLICT, + "internalError": http.client.INTERNAL_SERVER_ERROR, + "invalid": http.client.BAD_REQUEST, + "invalidQuery": http.client.BAD_REQUEST, + "notFound": http.client.NOT_FOUND, + "notImplemented": http.client.NOT_IMPLEMENTED, + "quotaExceeded": http.client.FORBIDDEN, + "rateLimitExceeded": http.client.FORBIDDEN, + "resourceInUse": http.client.BAD_REQUEST, + "resourcesExceeded": http.client.BAD_REQUEST, + "responseTooLarge": http.client.FORBIDDEN, + "stopped": http.client.OK, + "tableUnavailable": http.client.BAD_REQUEST, } @@ -66,7 +66,7 @@ def _error_result_to_exception(error_result): """ reason = error_result.get("reason") status_code = _ERROR_REASON_TO_EXCEPTION.get( - reason, http_client.INTERNAL_SERVER_ERROR + reason, http.client.INTERNAL_SERVER_ERROR ) return exceptions.from_http_status( status_code, error_result.get("message", ""), errors=[error_result] diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 9e8908613..d87f87f52 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -20,7 +20,6 @@ from google.api_core import exceptions import requests -import six from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem @@ -192,7 +191,7 @@ def default_dataset(self, value): self._set_sub_prop("defaultDataset", None) return - if isinstance(value, six.string_types): + if isinstance(value, str): value = DatasetReference.from_string(value) if isinstance(value, (Dataset, DatasetListItem)): @@ -1168,7 +1167,7 @@ def result( exc.query_job = self raise except requests.exceptions.Timeout as exc: - six.raise_from(concurrent.futures.TimeoutError, exc) + raise concurrent.futures.TimeoutError from exc # If the query job is complete but there are no query results, this was # special job, such as a DDL query. Return an empty result set to diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index f04a6364a..8f343ddcc 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -153,8 +153,6 @@ except ImportError: # pragma: NO COVER raise ImportError("This module can only be loaded in IPython.") -import six - from google.api_core import client_info from google.api_core import client_options from google.api_core.exceptions import NotFound @@ -577,16 +575,16 @@ def _cell_magic(line, query): "--params is not a correctly formatted JSON string or a JSON " "serializable dictionary" ) - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc except lap.exceptions.DuplicateQueryParamsError as exc: rebranded_error = ValueError("Duplicate --params option.") - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc except lap.exceptions.ParseError as exc: rebranded_error = ValueError( "Unrecognized input, are option values correct? " "Error details: {}".format(exc.args[0]) ) - six.raise_from(rebranded_error, exc) + raise rebranded_error from exc args = magic_arguments.parse_argstring(_cell_magic, rest_of_args) @@ -768,7 +766,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): "to use it. Alternatively, use the classic REST API by specifying " "the --use_rest_api magic option." ) - six.raise_from(customized_error, err) + raise customized_error from err try: from google.api_core.gapic_v1 import client_info as gapic_client_info @@ -776,7 +774,7 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): customized_error = ImportError( "Install the grpcio package to use the BigQuery Storage API." ) - six.raise_from(customized_error, err) + raise customized_error from err return bigquery_storage.BigQueryReadClient( credentials=credentials, diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 0f5d8f83b..55846bd1a 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -19,7 +19,6 @@ import copy from google.protobuf import json_format -import six import google.cloud._helpers from google.api_core import datetime_helpers @@ -63,7 +62,7 @@ def __init__(self, model_ref): # buffer classes do not. self._properties = {} - if isinstance(model_ref, six.string_types): + if isinstance(model_ref, str): model_ref = ModelReference.from_string(model_ref) if model_ref: @@ -455,7 +454,7 @@ def _model_arg_to_model_ref(value, default_project=None): This function keeps ModelReference and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): return ModelReference.from_string(value, default_project=default_project) if isinstance(value, Model): return value.reference diff --git a/google/cloud/bigquery/routine.py b/google/cloud/bigquery/routine.py index 6a0ed9fb0..f26f20886 100644 --- a/google/cloud/bigquery/routine.py +++ b/google/cloud/bigquery/routine.py @@ -17,7 +17,6 @@ """Define resources for the BigQuery Routines API.""" from google.protobuf import json_format -import six import google.cloud._helpers from google.cloud.bigquery import _helpers @@ -54,7 +53,7 @@ class Routine(object): } def __init__(self, routine_ref, **kwargs): - if isinstance(routine_ref, six.string_types): + if isinstance(routine_ref, str): routine_ref = RoutineReference.from_string(routine_ref) self._properties = {"routineReference": routine_ref.to_api_repr()} diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8ae0a3a85..c76aded02 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,7 +14,7 @@ """Schemas for BigQuery tables / queries.""" -from six.moves import collections_abc +import collections from google.cloud.bigquery_v2 import types @@ -318,7 +318,7 @@ def _to_schema_fields(schema): instance or a compatible mapping representation of the field. """ for field in schema: - if not isinstance(field, (SchemaField, collections_abc.Mapping)): + if not isinstance(field, (SchemaField, collections.abc.Mapping)): raise ValueError( "Schema items must either be fields or compatible " "mapping representations." diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 6daccf518..a2366b806 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -24,8 +24,6 @@ import pytz import warnings -import six - try: import pandas except ImportError: # pragma: NO COVER @@ -657,7 +655,7 @@ def description(self): @description.setter def description(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["description"] = value @@ -694,7 +692,7 @@ def friendly_name(self): @friendly_name.setter def friendly_name(self, value): - if not isinstance(value, six.string_types) and value is not None: + if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") self._properties["friendlyName"] = value @@ -721,7 +719,7 @@ def view_query(self): @view_query.setter def view_query(self, value): - if not isinstance(value, six.string_types): + if not isinstance(value, str): raise ValueError("Pass a string") _helpers._set_sub_prop(self._properties, ["view", "query"], value) view = self._properties["view"] @@ -1244,7 +1242,7 @@ def keys(self): >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) ['x', 'y'] """ - return six.iterkeys(self._xxx_field_to_index) + return self._xxx_field_to_index.keys() def items(self): """Return items as ``(key, value)`` pairs. @@ -1258,7 +1256,7 @@ def items(self): >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) [('x', 'a'), ('y', 'b')] """ - for key, index in six.iteritems(self._xxx_field_to_index): + for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) def get(self, key, default=None): @@ -1308,7 +1306,7 @@ def __len__(self): return len(self._xxx_values) def __getitem__(self, key): - if isinstance(key, six.string_types): + if isinstance(key, str): value = self._xxx_field_to_index.get(key) if value is None: raise KeyError("no row field {!r}".format(key)) @@ -2293,7 +2291,7 @@ def _table_arg_to_table_ref(value, default_project=None): This function keeps TableReference and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): value = TableReference.from_string(value, default_project=default_project) if isinstance(value, (Table, TableListItem)): value = value.reference @@ -2305,7 +2303,7 @@ def _table_arg_to_table(value, default_project=None): This function keeps Table and other kinds of objects unchanged. """ - if isinstance(value, six.string_types): + if isinstance(value, str): value = TableReference.from_string(value, default_project=default_project) if isinstance(value, TableReference): value = Table(value) diff --git a/samples/load_table_uri_truncate_avro.py b/samples/load_table_uri_truncate_avro.py index 98a791477..1aa0aa49c 100644 --- a/samples/load_table_uri_truncate_avro.py +++ b/samples/load_table_uri_truncate_avro.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_avro(table_id): # [START bigquery_load_table_gcs_avro_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_avro(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/samples/load_table_uri_truncate_csv.py b/samples/load_table_uri_truncate_csv.py index 73de7a8c1..198cdc281 100644 --- a/samples/load_table_uri_truncate_csv.py +++ b/samples/load_table_uri_truncate_csv.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_csv(table_id): # [START bigquery_load_table_gcs_csv_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_csv(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/samples/load_table_uri_truncate_json.py b/samples/load_table_uri_truncate_json.py index a30fae736..d67d93e7b 100644 --- a/samples/load_table_uri_truncate_json.py +++ b/samples/load_table_uri_truncate_json.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_json(table_id): # [START bigquery_load_table_gcs_json_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_json(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/samples/load_table_uri_truncate_orc.py b/samples/load_table_uri_truncate_orc.py index 18f963be2..90543b791 100644 --- a/samples/load_table_uri_truncate_orc.py +++ b/samples/load_table_uri_truncate_orc.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_orc(table_id): # [START bigquery_load_table_gcs_orc_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_orc(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/samples/load_table_uri_truncate_parquet.py b/samples/load_table_uri_truncate_parquet.py index 28692d840..e036fc180 100644 --- a/samples/load_table_uri_truncate_parquet.py +++ b/samples/load_table_uri_truncate_parquet.py @@ -16,7 +16,7 @@ def load_table_uri_truncate_parquet(table_id): # [START bigquery_load_table_gcs_parquet_truncate] - import six + import io from google.cloud import bigquery @@ -33,7 +33,7 @@ def load_table_uri_truncate_parquet(table_id): ], ) - body = six.BytesIO(b"Washington,WA") + body = io.BytesIO(b"Washington,WA") client.load_table_from_file(body, table_id, job_config=job_config).result() previous_rows = client.get_table(table_id).num_rows assert previous_rows > 0 diff --git a/samples/tests/test_copy_table_multiple_source.py b/samples/tests/test_copy_table_multiple_source.py index 45c6d34f5..5bc4668b0 100644 --- a/samples/tests/test_copy_table_multiple_source.py +++ b/samples/tests/test_copy_table_multiple_source.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import six +import io from google.cloud import bigquery from .. import copy_table_multiple_source @@ -32,7 +32,7 @@ def test_copy_table_multiple_source(capsys, random_table_id, random_dataset_id, bigquery.SchemaField("post_abbr", "STRING"), ] ) - body = six.BytesIO(data) + body = io.BytesIO(data) client.load_table_from_file( body, table_ref, location="US", job_config=job_config ).result() diff --git a/setup.py b/setup.py index 5f4e506eb..fcafddbd2 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", - "six >=1.13.0,< 2.0.0dev", "protobuf >= 3.12.0", ] extras = { diff --git a/tests/system.py b/tests/system.py index 185722e83..bfe54b7df 100644 --- a/tests/system.py +++ b/tests/system.py @@ -18,6 +18,7 @@ import csv import datetime import decimal +import io import json import operator import os @@ -27,7 +28,6 @@ import re import requests -import six import psutil import pytest import pytz @@ -54,7 +54,7 @@ pyarrow = None try: import IPython - from IPython.utils import io + from IPython.utils import io as ipython_io from IPython.testing import tools from IPython.terminal import interactiveshell except ImportError: # pragma: NO COVER @@ -219,7 +219,7 @@ def test_get_service_account_email(self): got = client.get_service_account_email() - self.assertIsInstance(got, six.text_type) + self.assertIsInstance(got, str) self.assertIn("@", got) def _create_bucket(self, bucket_name, location=None): @@ -598,7 +598,7 @@ def test_update_table_schema(self): @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) - page = six.next(iterator.pages) + page = next(iterator.pages) return list(page) def _create_table_many_columns(self, rowcount): @@ -1415,7 +1415,7 @@ def test_load_table_from_file_w_explicit_location(self): self._create_bucket(bucket_name, location="eu") # Create a temporary dataset & table in the EU. - table_bytes = six.BytesIO(b"a,3\nb,2\nc,1\n") + table_bytes = io.BytesIO(b"a,3\nb,2\nc,1\n") client = Config.CLIENT dataset = self.temp_dataset(_make_dataset_id("eu_load_file"), location="EU") table_ref = dataset.table("letters") @@ -2444,7 +2444,7 @@ def test_query_results_to_dataframe(self): self.assertEqual(list(df), column_names) # verify the column names exp_datatypes = { "id": int, - "author": six.text_type, + "author": str, "time_ts": pandas.Timestamp, "dead": bool, } @@ -2477,7 +2477,7 @@ def test_query_results_to_dataframe_w_bqstorage(self): self.assertEqual(list(df), column_names) exp_datatypes = { "id": int, - "author": six.text_type, + "author": str, "time_ts": pandas.Timestamp, "dead": bool, } @@ -2572,9 +2572,7 @@ def test_insert_rows_from_dataframe(self): assert len(row_tuples) == len(expected) for row, expected_row in zip(row_tuples, expected): - six.assertCountEqual( - self, row, expected_row - ) # column order does not matter + self.assertCountEqual(row, expected_row) # column order does not matter def test_insert_rows_nested_nested(self): # See #2951 @@ -2780,7 +2778,7 @@ def test_nested_table_to_arrow(self): {"string_col": "Some value", "record_col": record, "float_col": 3.14} ] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) @@ -2858,7 +2856,7 @@ def test_nested_table_to_dataframe(self): } ] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) table = dataset.table(table_id) @@ -2923,7 +2921,7 @@ def test_list_rows_page_size(self): schema = [SF("string_col", "STRING", mode="NULLABLE")] to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] rows = [json.dumps(row) for row in to_insert] - body = six.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("nested_df")) @@ -2997,7 +2995,7 @@ def test_bigquery_magic(): ORDER BY view_count DESC LIMIT 10 """ - with io.capture_output() as captured: + with ipython_io.capture_output() as captured: result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) conn_count_end = len(current_process.connections()) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 12e2d4b8b..478e30e6f 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -13,12 +13,12 @@ # limitations under the License. import copy +import http import unittest from google.api_core import exceptions import google.api_core.retry import mock -from six.moves import http_client from .helpers import _make_client from .helpers import _make_connection @@ -35,14 +35,14 @@ def _call_fut(self, *args, **kwargs): def test_simple(self): error_result = {"reason": "invalid", "message": "bad request"} exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.BAD_REQUEST) + self.assertEqual(exception.code, http.client.BAD_REQUEST) self.assertTrue(exception.message.startswith("bad request")) self.assertIn(error_result, exception.errors) def test_missing_reason(self): error_result = {} exception = self._call_fut(error_result) - self.assertEqual(exception.code, http_client.INTERNAL_SERVER_ERROR) + self.assertEqual(exception.code, http.client.INTERNAL_SERVER_ERROR) class Test_JobReference(unittest.TestCase): diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 0567b59cd..579a841d1 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -14,6 +14,7 @@ import concurrent import copy +import http import textwrap import freezegun @@ -21,7 +22,6 @@ import google.api_core.retry import mock import requests -from six.moves import http_client from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery.query @@ -1210,7 +1210,7 @@ def test_result_error(self): job.result() self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + self.assertEqual(exc_info.exception.code, http.client.BAD_REQUEST) exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) @@ -1265,7 +1265,7 @@ def test__begin_error(self): job.result() self.assertIsInstance(exc_info.exception, exceptions.GoogleCloudError) - self.assertEqual(exc_info.exception.code, http_client.BAD_REQUEST) + self.assertEqual(exc_info.exception.code, http.client.BAD_REQUEST) exc_job_instance = getattr(exc_info.exception, "query_job", None) self.assertIs(exc_job_instance, job) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 5907a3678..8948d4152 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -18,7 +18,6 @@ import unittest import mock -import six class Test_not_null(unittest.TestCase): @@ -894,7 +893,7 @@ def test_w_list_missing_fields(self): ] original = [42] - with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + with self.assertRaisesRegex(ValueError, r".*not match schema length.*"): self._call_fut(fields, original) def test_w_list_too_many_fields(self): @@ -904,7 +903,7 @@ def test_w_list_too_many_fields(self): ] original = [42, "two", "three"] - with six.assertRaisesRegex(self, ValueError, r".*not match schema length.*"): + with self.assertRaisesRegex(ValueError, r".*not match schema length.*"): self._call_fut(fields, original) def test_w_non_empty_dict(self): diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 691c4c802..78e59cb30 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -35,8 +35,8 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit conn = self._make_one(object()) uri = conn.build_api_url("/foo") @@ -49,8 +49,8 @@ def test_build_api_url_no_extra_query_params(self): self.assertEqual(parms, {}) def test_build_api_url_w_custom_endpoint(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit custom_endpoint = "https://foo-bigquery.googleapis.com" conn = self._make_one(object(), api_endpoint=custom_endpoint) @@ -64,8 +64,8 @@ def test_build_api_url_w_custom_endpoint(self): self.assertEqual(parms, {}) def test_build_api_url_w_extra_query_params(self): - from six.moves.urllib.parse import parse_qsl - from six.moves.urllib.parse import urlsplit + from urllib.parse import parse_qsl + from urllib.parse import urlsplit conn = self._make_one(object()) uri = conn.build_api_url("/foo", {"bar": "baz"}) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index e5ead0ccc..98dec00f9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -18,7 +18,9 @@ import decimal import email import gzip +import http.client import io +import itertools import json import operator import unittest @@ -26,8 +28,6 @@ import mock import requests -import six -from six.moves import http_client import pytest import pytz import pkg_resources @@ -474,7 +474,7 @@ def test_list_projects_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) @@ -508,7 +508,7 @@ def test_list_projects_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - six.next(iterator.pages) + next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) @@ -528,7 +528,7 @@ def test_list_projects_explicit_response_missing_projects_key(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/projects"}, client, None) projects = list(page) @@ -582,7 +582,7 @@ def test_list_datasets_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) @@ -635,7 +635,7 @@ def test_list_datasets_explicit_response_missing_datasets_key(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) datasets = list(page) @@ -2919,7 +2919,7 @@ def test_list_tables_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) tables = list(page) @@ -2942,7 +2942,7 @@ def test_list_models_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) models = list(page) @@ -2991,7 +2991,7 @@ def test_list_models_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) models = list(page) @@ -3022,7 +3022,7 @@ def test_list_routines_empty_w_timeout(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with( {"path": "/projects/test-routines/datasets/test_routines/routines"}, @@ -3080,7 +3080,7 @@ def test_list_routines_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": path}, client, None) routines = list(page) @@ -3149,7 +3149,7 @@ def test_list_tables_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) @@ -3213,7 +3213,7 @@ def test_list_tables_explicit(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) tables = list(page) @@ -4040,7 +4040,7 @@ def test_list_jobs_defaults(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4090,7 +4090,7 @@ def test_list_jobs_load_job_wo_sourceUris(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4124,7 +4124,7 @@ def test_list_jobs_explicit_missing(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = six.next(iterator.pages) + page = next(iterator.pages) final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) jobs = list(page) @@ -4412,7 +4412,7 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Create mocks to be checked for doing transport. resumable_url = "http://test.invalid?upload_id=hey-you" response_headers = {"location": resumable_url} - fake_transport = self._mock_transport(http_client.OK, response_headers) + fake_transport = self._mock_transport(http.client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() @@ -4479,7 +4479,7 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): from google.cloud.bigquery.job import LoadJobConfig from google.cloud.bigquery.job import SourceFormat - fake_transport = self._mock_transport(http_client.OK, {}) + fake_transport = self._mock_transport(http.client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() @@ -5022,7 +5022,7 @@ def test_extract_table_generated_job_id(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsInstance(req["data"]["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) self.assertIsNone(req["timeout"]) # Check the job resource. @@ -5227,7 +5227,7 @@ def test_query_defaults(self): job = client.query(QUERY) self.assertIsInstance(job, QueryJob) - self.assertIsInstance(job.job_id, six.string_types) + self.assertIsInstance(job.job_id, str) self.assertIs(job._client, client) self.assertEqual(job.query, QUERY) self.assertEqual(job.udf_resources, []) @@ -5240,7 +5240,7 @@ def test_query_defaults(self): self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsNone(req["timeout"]) sent = req["data"] - self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] self.assertEqual(sent_config["query"], QUERY) self.assertFalse(sent_config["useLegacySql"]) @@ -5687,7 +5687,7 @@ def test_query_w_udf_resources(self): self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsNone(req["timeout"]) sent = req["data"] - self.assertIsInstance(sent["jobReference"]["jobId"], six.string_types) + self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] self.assertEqual(sent_config["query"], QUERY) self.assertTrue(sent_config["useLegacySql"]) @@ -6398,7 +6398,7 @@ def test_insert_rows_from_dataframe(self): actual_calls = conn.api_request.call_args_list - for call, expected_data in six.moves.zip_longest( + for call, expected_data in itertools.zip_longest( actual_calls, EXPECTED_SENT_DATA ): expected_call = mock.call( @@ -6466,7 +6466,7 @@ def test_insert_rows_from_dataframe_nan(self): actual_calls = conn.api_request.call_args_list - for call, expected_data in six.moves.zip_longest( + for call, expected_data in itertools.zip_longest( actual_calls, EXPECTED_SENT_DATA ): expected_call = mock.call( @@ -6776,7 +6776,7 @@ def test_list_rows(self): # Check that initial total_rows is populated from the table. self.assertEqual(iterator.total_rows, 7) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) # Check that total_rows is updated based on API response. @@ -6831,14 +6831,14 @@ def test_list_rows_w_start_index_w_page_size(self): table = Table(self.TABLE_REF, schema=[full_name]) iterator = client.list_rows(table, max_results=4, page_size=2, start_index=1) pages = iterator.pages - rows = list(six.next(pages)) + rows = list(next(pages)) extra_params = iterator.extra_params f2i = {"full_name": 0} self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Phred Phlyntstone",), f2i)) self.assertEqual(rows[1], Row(("Bharney Rhubble",), f2i)) - rows = list(six.next(pages)) + rows = list(next(pages)) self.assertEqual(len(rows), 2) self.assertEqual(rows[0], Row(("Wylma Phlyntstone",), f2i)) @@ -6915,7 +6915,7 @@ def test_list_rows_query_params(self): conn = client._connection = make_connection(*len(tests) * [{}]) for i, test in enumerate(tests): iterator = client.list_rows(table, **test[0]) - six.next(iterator.pages) + next(iterator.pages) req = conn.api_request.call_args_list[i] test[1]["formatOptions.useInt64Timestamp"] = True self.assertEqual(req[1]["query_params"], test[1], "for kwargs %s" % test[0]) @@ -7000,7 +7000,7 @@ def test_list_rows_repeated_fields(self): struct = SchemaField("struct", "RECORD", mode="REPEATED", fields=[index, score]) iterator = client.list_rows(self.TABLE_REF, selected_fields=[color, struct]) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) total_rows = iterator.total_rows page_token = iterator.next_page_token @@ -7065,7 +7065,7 @@ def test_list_rows_w_record_schema(self): table = Table(self.TABLE_REF, schema=[full_name, phone]) iterator = client.list_rows(table) - page = six.next(iterator.pages) + page = next(iterator.pages) rows = list(page) total_rows = iterator.total_rows page_token = iterator.next_page_token @@ -7241,7 +7241,7 @@ def _make_do_upload_patch(cls, client, method, resource={}, side_effect=None): if side_effect is None: side_effect = [ cls._make_response( - http_client.OK, + http.client.OK, json.dumps(resource), {"Content-Type": "application/json"}, ) @@ -7522,7 +7522,7 @@ def test_load_table_from_file_failure(self): file_obj = self._make_file_obj() response = self._make_response( - content="Someone is already in this spot.", status_code=http_client.CONFLICT + content="Someone is already in this spot.", status_code=http.client.CONFLICT ) do_upload_patch = self._make_do_upload_patch( @@ -8584,7 +8584,7 @@ def _make_resumable_upload_responses(cls, size): resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" initial_response = cls._make_response( - http_client.OK, "", {"location": resumable_url} + http.client.OK, "", {"location": resumable_url} ) data_response = cls._make_response( resumable_media.PERMANENT_REDIRECT, @@ -8592,7 +8592,7 @@ def _make_resumable_upload_responses(cls, size): {"range": "bytes=0-{:d}".format(size - 1)}, ) final_response = cls._make_response( - http_client.OK, + http.client.OK, json.dumps({"size": size}), {"Content-Type": "application/json"}, ) @@ -8634,7 +8634,7 @@ def test__do_resumable_upload(self): ) def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http_client.OK)]) + transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) file_obj = self._make_file_obj() file_obj_len = len(file_obj.getvalue()) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 08dd6dcfa..fffa46aa8 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -23,8 +23,6 @@ except ImportError: # pragma: NO COVER pyarrow = None -import six - import google.cloud._helpers from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import _helpers @@ -293,7 +291,7 @@ def test_public_instance_methods_on_closed_instance(self): instance = decorated_class() instance._closed = True - with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + with self.assertRaisesRegex(exceptions.ProgrammingError, "I'm closed!"): instance.instance_method() def test_methods_wo_public_instance_methods_on_closed_instance(self): @@ -316,7 +314,7 @@ def test_custom_class_closed_attribute(self): instance._closed = False instance._really_closed = True - with six.assertRaisesRegex(self, exceptions.ProgrammingError, "I'm closed!"): + with self.assertRaisesRegex(exceptions.ProgrammingError, "I'm closed!"): instance.instance_method() def test_custom_on_closed_error_type(self): @@ -327,5 +325,5 @@ def test_custom_on_closed_error_type(self): instance = decorated_class() instance._closed = True - with six.assertRaisesRegex(self, RuntimeError, "I'm closed!"): + with self.assertRaisesRegex(RuntimeError, "I'm closed!"): instance.instance_method() diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 30fb1292e..edec559b2 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -16,7 +16,6 @@ import unittest import mock -import six try: from google.cloud import bigquery_storage @@ -124,8 +123,8 @@ def test_raises_error_if_closed(self): connection.close() for method in ("close", "commit", "cursor"): - with six.assertRaisesRegex( - self, ProgrammingError, r"Operating on a closed connection\." + with self.assertRaisesRegex( + ProgrammingError, r"Operating on a closed connection\." ): getattr(connection, method)() diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index f55b3fd3f..cbd6f6909 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -16,7 +16,6 @@ import unittest import mock -import six try: import pyarrow @@ -181,8 +180,8 @@ def test_raises_error_if_closed(self): ) for method in method_names: - with six.assertRaisesRegex( - self, ProgrammingError, r"Operating on a closed cursor\." + with self.assertRaisesRegex( + ProgrammingError, r"Operating on a closed cursor\." ): getattr(cursor, method)() @@ -375,7 +374,7 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): cursor = connection.cursor() cursor.execute("SELECT foo, bar FROM some_table") - with six.assertRaisesRegex(self, exceptions.Forbidden, "invalid credentials"): + with self.assertRaisesRegex(exceptions.Forbidden, "invalid credentials"): cursor.fetchall() # the default client was not used diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 09afa7531..5d0cf2053 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import importlib import sys import mock @@ -28,7 +29,6 @@ except ImportError: # pragma: NO COVER opentelemetry = None import pytest -from six.moves import reload_module from google.cloud.bigquery import opentelemetry_tracing @@ -39,7 +39,7 @@ @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") @pytest.fixture def setup(): - reload_module(opentelemetry_tracing) + importlib.reload(opentelemetry_tracing) tracer_provider = TracerProvider() memory_exporter = InMemorySpanExporter() span_processor = SimpleExportSpanProcessor(memory_exporter) @@ -51,7 +51,7 @@ def setup(): @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") def test_opentelemetry_not_installed(setup, monkeypatch): monkeypatch.setitem(sys.modules, "opentelemetry", None) - reload_module(opentelemetry_tracing) + importlib.reload(opentelemetry_tracing) with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: assert span is None diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0e7b0bb4d..3373528e0 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -22,7 +22,6 @@ import pkg_resources import pytest import pytz -import six import google.api_core.exceptions @@ -1674,16 +1673,16 @@ def test_iterate(self): rows_iter = iter(row_iterator) - val1 = six.next(rows_iter) + val1 = next(rows_iter) self.assertEqual(val1.name, "Phred Phlyntstone") self.assertEqual(row_iterator.num_results, 1) - val2 = six.next(rows_iter) + val2 = next(rows_iter) self.assertEqual(val2.name, "Bharney Rhubble") self.assertEqual(row_iterator.num_results, 2) with self.assertRaises(StopIteration): - six.next(rows_iter) + next(rows_iter) api_request.assert_called_once_with(method="GET", path=path, query_params={}) @@ -2437,13 +2436,6 @@ def test_to_dataframe(self): self.assertEqual(df.name.dtype.name, "object") self.assertEqual(df.age.dtype.name, "int64") - @pytest.mark.xfail( - six.PY2, - reason=( - "Requires pyarrow>-1.0 to work, but the latter is not compatible " - "with Python 2 anymore." - ), - ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): @@ -2475,13 +2467,6 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): ], ) - @pytest.mark.xfail( - six.PY2, - reason=( - "Requires pyarrow>-1.0 to work, but the latter is not compatible " - "with Python 2 anymore." - ), - ) @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_dataframe_datetime_out_of_pyarrow_bounds(self): @@ -2697,7 +2682,7 @@ def test_to_dataframe_w_various_types_nullable(self): else: self.assertIsInstance(row.start_timestamp, pandas.Timestamp) self.assertIsInstance(row.seconds, float) - self.assertIsInstance(row.payment_type, six.string_types) + self.assertIsInstance(row.payment_type, str) self.assertIsInstance(row.complete, bool) self.assertIsInstance(row.date, datetime.date) @@ -3542,7 +3527,7 @@ def test__eq___type_mismatch(self): def test_unhashable_object(self): object_under_test1 = self._make_one(start=1, end=10, interval=2) - with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + with self.assertRaisesRegex(TypeError, r".*unhashable type.*"): hash(object_under_test1) def test_repr(self): @@ -3642,7 +3627,7 @@ def test_unhashable_object(self): object_under_test1 = self._make_one( range_=PartitionRange(start=1, end=10, interval=2), field="integer_col" ) - with six.assertRaisesRegex(self, TypeError, r".*unhashable type.*"): + with self.assertRaisesRegex(TypeError, r".*unhashable type.*"): hash(object_under_test1) def test_repr(self): From 015a73e1839e3427408ef6e0f879717d9ddbdb61 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 8 Jan 2021 16:56:18 -0600 Subject: [PATCH 096/341] fix: add minimum timeout to getQueryResults API requests (#444) * fix: add minimum timeout to getQueryResults API requests Since successful responses can still take a long time to download, have a minimum timeout which should accomodate 99.9%+ of responses. I figure it's more important that *any* timeout is set if desired than it is that the specific timeout is used. This is especially true in cases where a short timeout is requested for the purposes of a progress bar. Making forward progress is more important than the progress bar update frequency. * docs: document minimum timeout value * test: remove redundant query timeout test * test: change assertion for done method * chore: remove unused import --- google/cloud/bigquery/client.py | 22 ++++++++++++++++++++-- tests/system.py | 33 +++++++++++---------------------- tests/unit/job/test_query.py | 6 ++++++ tests/unit/test_client.py | 29 +++++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 26 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 19693c9ff..3541726b8 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -93,6 +93,14 @@ ) _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" +# In microbenchmarks, it's been shown that even in ideal conditions (query +# finished, local data), requests to getQueryResults can take 10+ seconds. +# In less-than-ideal situations, the response can take even longer, as it must +# be able to download a full 100+ MB row in that time. Don't let the +# connection timeout before data can be downloaded. +# https://github.com/googleapis/python-bigquery/issues/438 +_MIN_GET_QUERY_RESULTS_TIMEOUT = 120 + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -1570,7 +1578,9 @@ def _get_query_results( location (Optional[str]): Location of the query job. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. If set, this connection timeout may be + increased to a minimum value. This prevents retries on what + would otherwise be a successful response. Returns: google.cloud.bigquery.query._QueryResults: @@ -1579,6 +1589,9 @@ def _get_query_results( extra_params = {"maxResults": 0} + if timeout is not None: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if project is None: project = self.project @@ -3293,7 +3306,9 @@ def _list_rows_from_query_results( How to retry the RPC. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport - before using ``retry``. + before using ``retry``. If set, this connection timeout may be + increased to a minimum value. This prevents retries on what + would otherwise be a successful response. If multiple requests are made under the hood, ``timeout`` applies to each individual request. Returns: @@ -3306,6 +3321,9 @@ def _list_rows_from_query_results( "location": location, } + if timeout is not None: + timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) + if start_index is not None: params["startIndex"] = start_index diff --git a/tests/system.py b/tests/system.py index bfe54b7df..102c8f78d 100644 --- a/tests/system.py +++ b/tests/system.py @@ -27,7 +27,6 @@ import uuid import re -import requests import psutil import pytest import pytz @@ -1798,15 +1797,25 @@ def test_query_w_wrong_config(self): Config.CLIENT.query(good_query, job_config=bad_config).result() def test_query_w_timeout(self): + job_config = bigquery.QueryJobConfig() + job_config.use_query_cache = False + query_job = Config.CLIENT.query( "SELECT * FROM `bigquery-public-data.github_repos.commits`;", job_id_prefix="test_query_w_timeout_", + location="US", + job_config=job_config, ) with self.assertRaises(concurrent.futures.TimeoutError): - # 1 second is much too short for this query. query_job.result(timeout=1) + # Even though the query takes >1 second, the call to getQueryResults + # should succeed. + self.assertFalse(query_job.done(timeout=1)) + + Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + def test_query_w_page_size(self): page_size = 45 query_job = Config.CLIENT.query( @@ -2408,26 +2417,6 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - def test_querying_data_w_timeout(self): - job_config = bigquery.QueryJobConfig() - job_config.use_query_cache = False - - query_job = Config.CLIENT.query( - """ - SELECT COUNT(*) - FROM UNNEST(GENERATE_ARRAY(1,1000000)), UNNEST(GENERATE_ARRAY(1, 10000)) - """, - location="US", - job_config=job_config, - ) - - # Specify a very tight deadline to demonstrate that the timeout - # actually has effect. - with self.assertRaises(requests.exceptions.Timeout): - query_job.done(timeout=0.1) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) - @unittest.skipIf(pandas is None, "Requires `pandas`") def test_query_results_to_dataframe(self): QUERY = """ diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 579a841d1..a4ab11ab6 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1046,6 +1046,8 @@ def test_result_invokes_begins(self): self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_timeout(self): + import google.cloud.bigquery.client + begun_resource = self._make_resource() query_resource = { "jobComplete": True, @@ -1072,6 +1074,10 @@ def test_result_w_timeout(self): "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID), ) self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900) + self.assertEqual( + query_request[1]["timeout"], + google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, + ) self.assertEqual(reload_request[1]["method"], "GET") def test_result_w_page_size(self): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 98dec00f9..bf183b5a4 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -311,7 +311,7 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): project="other-project", location=self.LOCATION, timeout_ms=500, - timeout=42, + timeout=420, ) final_attributes.assert_called_once_with({"path": path}, client, None) @@ -320,7 +320,32 @@ def test__get_query_results_miss_w_explicit_project_and_timeout(self): method="GET", path=path, query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, - timeout=42, + timeout=420, + ) + + def test__get_query_results_miss_w_short_timeout(self): + import google.cloud.bigquery.client + from google.cloud.exceptions import NotFound + + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection() + path = "/projects/other-project/queries/nothere" + with self.assertRaises(NotFound): + client._get_query_results( + "nothere", + None, + project="other-project", + location=self.LOCATION, + timeout_ms=500, + timeout=1, + ) + + conn.api_request.assert_called_once_with( + method="GET", + path=path, + query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION}, + timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, ) def test__get_query_results_miss_w_client_location(self): From 99ef1d20faede0d3b949c6f0cdb3c38f738d630b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 11 Jan 2021 10:18:08 +0100 Subject: [PATCH 097/341] chore: Bound maximum supported Python version (#465) * chore: bound maximum supported Python version * Bound supported Python versions claim in README --- README.rst | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index c7d50d729..61192b625 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6 +Python >= 3.6, < 3.9 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/setup.py b/setup.py index fcafddbd2..0ea6ccca2 100644 --- a/setup.py +++ b/setup.py @@ -120,7 +120,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6", + python_requires=">=3.6, <3.9", include_package_data=True, zip_safe=False, ) From fb3ad7682dc19189c75f8ab4345794a613ac0ca8 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 11 Jan 2021 10:20:06 +0100 Subject: [PATCH 098/341] refactor: simplify AutoStrEnum definition (#458) With now only Python 3.6 supported, we can use the _generate_next_value() hook instead of metaclass magic. --- .../bigquery/magics/line_arg_parser/lexer.py | 41 ++++--------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 17e1ffdae..0cb63292c 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -136,40 +136,17 @@ ) -# The _generate_next_value_() enum hook is only available in Python 3.6+, thus we -# need to do some acrobatics to implement an "auto str enum" base class. Implementation -# based on the recipe provided by the very author of the Enum library: -# https://stackoverflow.com/a/32313954/5040035 -class StrEnumMeta(enum.EnumMeta): - @classmethod - def __prepare__(metacls, name, bases, **kwargs): - # Having deterministic enum members definition order is nice. - return OrderedDict() +class AutoStrEnum(str, enum.Enum): + """Base enum class for for name=value str enums.""" - def __new__(metacls, name, bases, oldclassdict): - # Scan through the declared enum members and convert any value that is a plain - # empty tuple into a `str` of the name instead. - newclassdict = enum._EnumDict() - for key, val in oldclassdict.items(): - if val == (): - val = key - newclassdict[key] = val - return super(StrEnumMeta, metacls).__new__(metacls, name, bases, newclassdict) + def _generate_next_value_(name, start, count, last_values): + return name -# The @six.add_metaclass decorator does not work, Enum complains about _sunder_ names, -# and we cannot use class syntax directly, because the Python 3 version would cause -# a syntax error under Python 2. -AutoStrEnum = StrEnumMeta( - "AutoStrEnum", - (str, enum.Enum), - {"__doc__": "Base enum class for for name=value str enums."}, -) - TokenType = AutoStrEnum( "TokenType", [ - (name, name) + (name, enum.auto()) for name in itertools.chain.from_iterable(token_types.values()) if not name.startswith("GOTO_") ], @@ -177,10 +154,10 @@ def __new__(metacls, name, bases, oldclassdict): class LexerState(AutoStrEnum): - PARSE_POS_ARGS = () # parsing positional arguments - PARSE_NON_PARAMS_OPTIONS = () # parsing options other than "--params" - PARSE_PARAMS_OPTION = () # parsing the "--params" option - STATE_END = () + PARSE_POS_ARGS = enum.auto() # parsing positional arguments + PARSE_NON_PARAMS_OPTIONS = enum.auto() # parsing options other than "--params" + PARSE_PARAMS_OPTION = enum.auto() # parsing the "--params" option + STATE_END = enum.auto() class Lexer(object): From 7ea6b7c2469d2415192cfdacc379e38e49d24775 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 11 Jan 2021 11:07:35 -0600 Subject: [PATCH 099/341] fix: use debug logging level for OpenTelemetry message (#442) * fix: use debug logging level for OpenTelemetry message * only warn at span creation time * add unit test for skipping warning * refactor: rename _warned_telemetry to indicate private and mutable --- .../cloud/bigquery/opentelemetry_tracing.py | 19 ++++++++++++------- tests/unit/test_opentelemetry_tracing.py | 12 ++++++++++++ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py index b9d18efad..57f258ac4 100644 --- a/google/cloud/bigquery/opentelemetry_tracing.py +++ b/google/cloud/bigquery/opentelemetry_tracing.py @@ -23,16 +23,11 @@ from opentelemetry.trace.status import Status HAS_OPENTELEMETRY = True + _warned_telemetry = True except ImportError: - logger.info( - "This service is instrumented using OpenTelemetry. " - "OpenTelemetry could not be imported; please " - "add opentelemetry-api and opentelemetry-instrumentation " - "packages in order to get BigQuery Tracing data." - ) - HAS_OPENTELEMETRY = False + _warned_telemetry = False _default_attributes = { "db.system": "BigQuery" @@ -64,8 +59,18 @@ def create_span(name, attributes=None, client=None, job_ref=None): Raised if a span could not be yielded or issue with call to OpenTelemetry. """ + global _warned_telemetry final_attributes = _get_final_span_attributes(attributes, client, job_ref) if not HAS_OPENTELEMETRY: + if not _warned_telemetry: + logger.debug( + "This service is instrumented using OpenTelemetry. " + "OpenTelemetry could not be imported; please " + "add opentelemetry-api and opentelemetry-instrumentation " + "packages in order to get BigQuery Tracing data." + ) + _warned_telemetry = True + yield None return tracer = trace.get_tracer(__name__) diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 5d0cf2053..726e3cf6f 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -52,8 +52,20 @@ def setup(): def test_opentelemetry_not_installed(setup, monkeypatch): monkeypatch.setitem(sys.modules, "opentelemetry", None) importlib.reload(opentelemetry_tracing) + assert not opentelemetry_tracing._warned_telemetry with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: assert span is None + assert opentelemetry_tracing._warned_telemetry + + +@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") +def test_opentelemetry_not_installed_doesnt_warn(setup, monkeypatch): + monkeypatch.setitem(sys.modules, "opentelemetry", None) + importlib.reload(opentelemetry_tracing) + opentelemetry_tracing._warned_telemetry = True + with opentelemetry_tracing.create_span("No-op for opentelemetry") as span: + assert span is None + assert opentelemetry_tracing._warned_telemetry @pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") From b0e074f7522710886be1da2f117ea22de411b408 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 11 Jan 2021 11:40:09 -0600 Subject: [PATCH 100/341] chore: release 2.6.2 (#429) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 15 +++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d01f62ff6..4d58072e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) + + +### Bug Fixes + +* add minimum timeout to getQueryResults API requests ([#444](https://www.github.com/googleapis/python-bigquery/issues/444)) ([015a73e](https://www.github.com/googleapis/python-bigquery/commit/015a73e1839e3427408ef6e0f879717d9ddbdb61)) +* use debug logging level for OpenTelemetry message ([#442](https://www.github.com/googleapis/python-bigquery/issues/442)) ([7ea6b7c](https://www.github.com/googleapis/python-bigquery/commit/7ea6b7c2469d2415192cfdacc379e38e49d24775)) + + +### Documentation + +* add GEOGRAPHY data type code samples ([#428](https://www.github.com/googleapis/python-bigquery/issues/428)) ([dbc68b3](https://www.github.com/googleapis/python-bigquery/commit/dbc68b3d1f325f80d24a2da5f028b0f653fb0317)) +* fix Shapely import in GEOGRAPHY sample ([#431](https://www.github.com/googleapis/python-bigquery/issues/431)) ([96a1c5b](https://www.github.com/googleapis/python-bigquery/commit/96a1c5b3c72855ba6ae8c88dfd0cdb02d2faf909)) +* move and refresh view samples ([#420](https://www.github.com/googleapis/python-bigquery/issues/420)) ([079b6a1](https://www.github.com/googleapis/python-bigquery/commit/079b6a162f6929bf801366d92f8daeb3318426c4)) + ### [2.6.1](https://www.github.com/googleapis/python-bigquery/compare/v2.6.0...v2.6.1) (2020-12-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 410cd066e..9aaeb8bc4 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.1" +__version__ = "2.6.2" From 2788736b80a4c4ac0ae3029aeb28bcefd34f2db7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 11 Jan 2021 22:51:55 +0100 Subject: [PATCH 101/341] chore(deps): update dependency google-cloud-bigquery to v2.6.2 (#466) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3ea0e6e06..338cf2e89 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.1 +google-cloud-bigquery==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 208eb4526..003dc6fb4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.1 +google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 grpcio==1.34.0 From 1526e3936703fc97b17ec30c8ab8fe90197ad303 Mon Sep 17 00:00:00 2001 From: Carlos de la Guardia Date: Wed, 13 Jan 2021 09:55:33 -0600 Subject: [PATCH 102/341] feature: raise error for unknown properties in job config (#446) * feature: warn about unknown properties in job config * fix: raise error instead of warning * fix: use hasattr instead of __dict__ * fix bad merge * fix system test that sets wrong property Co-authored-by: Tim Swast --- google/cloud/bigquery/job/base.py | 8 ++++++++ tests/system.py | 6 ++---- tests/unit/job/test_base.py | 7 +++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 3c601f072..930b71e8a 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -659,6 +659,14 @@ def __init__(self, job_type, **kwargs): for prop, val in kwargs.items(): setattr(self, prop, val) + def __setattr__(self, name, value): + """Override to be able to raise error if an unknown property is being set""" + if not name.startswith("_") and not hasattr(type(self), name): + raise AttributeError( + "Property {} is unknown for {}.".format(name, type(self)) + ) + super(_JobConfig, self).__setattr__(name, value) + @property def labels(self): """Dict[str, str]: Labels for the job. diff --git a/tests/system.py b/tests/system.py index 102c8f78d..447f66b1a 100644 --- a/tests/system.py +++ b/tests/system.py @@ -76,7 +76,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC -from google.cloud.bigquery import dbapi +from google.cloud.bigquery import dbapi, enums from google.cloud import storage from test_utils.retry import RetryErrors @@ -1789,10 +1789,8 @@ def test_query_w_wrong_config(self): rows = list(Config.CLIENT.query("SELECT 1;").result()) assert rows[0][0] == 1 - project = Config.CLIENT.project - dataset_ref = bigquery.DatasetReference(project, "dset") bad_config = LoadJobConfig() - bad_config.destination = dataset_ref.table("tbl") + bad_config.source_format = enums.SourceFormat.CSV with self.assertRaises(Exception): Config.CLIENT.query(good_query, job_config=bad_config).result() diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 478e30e6f..610ad2875 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -19,6 +19,7 @@ from google.api_core import exceptions import google.api_core.retry import mock +import pytest from .helpers import _make_client from .helpers import _make_connection @@ -1021,6 +1022,12 @@ def test_ctor(self): self.assertEqual(job_config._job_type, self.JOB_TYPE) self.assertEqual(job_config._properties, {self.JOB_TYPE: {}}) + def test_ctor_with_unknown_property_raises_error(self): + error_text = "Property wrong_name is unknown for" + with pytest.raises(AttributeError, match=error_text): + config = self._make_one() + config.wrong_name = None + def test_fill_from_default(self): from google.cloud.bigquery import QueryJobConfig From d7fc252563ba0b01a7dcf4af933d72bdfc005806 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 13 Jan 2021 23:56:25 +0100 Subject: [PATCH 103/341] chore(deps): update dependency grpcio to v1.34.1 (#470) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 003dc6fb4..de882844b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 -grpcio==1.34.0 +grpcio==1.34.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From 80944f080bcc4fda870a6daf1d884de616d39ae7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 14 Jan 2021 16:59:26 +0100 Subject: [PATCH 104/341] fix: reading the labels attribute on Job instances (#471) --- google/cloud/bigquery/job/base.py | 7 +++---- tests/system.py | 17 +++++++++++++++++ tests/unit/job/test_base.py | 2 +- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 930b71e8a..5ba01aa67 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -233,7 +233,7 @@ def path(self): @property def labels(self): """Dict[str, str]: Labels for the job.""" - return self._properties.setdefault("labels", {}) + return self._properties.setdefault("configuration", {}).setdefault("labels", {}) @property def etag(self): @@ -671,9 +671,8 @@ def __setattr__(self, name, value): def labels(self): """Dict[str, str]: Labels for the job. - This method always returns a dict. To change a job's labels, - modify the dict, then call ``Client.update_job``. To delete a - label, set its value to :data:`None` before updating. + This method always returns a dict. Once a job has been created on the + server, its labels cannot be modified anymore. Raises: ValueError: If ``value`` type is invalid. diff --git a/tests/system.py b/tests/system.py index 447f66b1a..0fa5bc41e 100644 --- a/tests/system.py +++ b/tests/system.py @@ -1667,6 +1667,23 @@ def test_job_cancel(self): # raise an error, and that the job completed (in the `retry()` # above). + def test_job_labels(self): + DATASET_ID = _make_dataset_id("job_cancel") + JOB_ID_PREFIX = "fetch_" + DATASET_ID + QUERY = "SELECT 1 as one" + + self.temp_dataset(DATASET_ID) + + job_config = bigquery.QueryJobConfig( + labels={"custom_label": "label_value", "another_label": "foo123"} + ) + job = Config.CLIENT.query( + QUERY, job_id_prefix=JOB_ID_PREFIX, job_config=job_config + ) + + expected_labels = {"custom_label": "label_value", "another_label": "foo123"} + self.assertEqual(job.labels, expected_labels) + def test_get_failed_job(self): # issue 4246 from google.api_core.exceptions import BadRequest diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 610ad2875..44bbc2c77 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -251,7 +251,7 @@ def test_labels_hit(self): labels = {"foo": "bar"} client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) - job._properties["labels"] = labels + job._properties.setdefault("configuration", {})["labels"] = labels self.assertEqual(job.labels, labels) def test_etag(self): From 9f6556cd299e02016811d2e0f495928d82e03667 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 20 Jan 2021 17:04:02 -0600 Subject: [PATCH 105/341] refactor: move system tests into `tests/system` directory (#475) This aligns more closely with the templates from synthtool. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Towards #366 --- noxfile.py | 15 ++-- tests/system/__init__.py | 13 ++++ tests/{system.py => system/test_client.py} | 81 ++------------------- tests/system/test_magics.py | 83 ++++++++++++++++++++++ 4 files changed, 111 insertions(+), 81 deletions(-) create mode 100644 tests/system/__init__.py rename tests/{system.py => system/test_client.py} (97%) create mode 100644 tests/system/test_magics.py diff --git a/noxfile.py b/noxfile.py index f3326d01b..e6a739d1e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -115,9 +115,7 @@ def system(session): session.install("ipython", "-c", constraints_path) # Run py.test against the system tests. - session.run( - "py.test", "--quiet", os.path.join("tests", "system.py"), *session.posargs - ) + session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) @nox.session(python=["3.8"]) @@ -181,12 +179,14 @@ def prerelease_deps(session): ) session.install("--pre", "grpcio", "pandas") session.install( + "freezegun", + "google-cloud-storage", + "google-cloud-testutils", + "IPython", "mock", + "psutil", "pytest", - "google-cloud-testutils", "pytest-cov", - "freezegun", - "IPython", ) session.install("-e", ".[all]") @@ -196,7 +196,8 @@ def prerelease_deps(session): session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") # Run all tests, except a few samples tests which require extra dependencies. - session.run("py.test", "tests") + session.run("py.test", "tests/unit") + session.run("py.test", "tests/system") session.run("py.test", "samples/tests") diff --git a/tests/system/__init__.py b/tests/system/__init__.py new file mode 100644 index 000000000..4fbd93bb2 --- /dev/null +++ b/tests/system/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/system.py b/tests/system/test_client.py similarity index 97% rename from tests/system.py rename to tests/system/test_client.py index 0fa5bc41e..aa1a03160 100644 --- a/tests/system.py +++ b/tests/system/test_client.py @@ -22,13 +22,12 @@ import json import operator import os +import pathlib import time import unittest import uuid -import re import psutil -import pytest import pytz import pkg_resources @@ -51,13 +50,6 @@ import pyarrow.types except ImportError: # pragma: NO COVER pyarrow = None -try: - import IPython - from IPython.utils import io as ipython_io - from IPython.testing import tools - from IPython.terminal import interactiveshell -except ImportError: # pragma: NO COVER - IPython = None from google.api_core.exceptions import PreconditionFailed from google.api_core.exceptions import BadRequest @@ -86,7 +78,7 @@ JOB_TIMEOUT = 120 # 2 minutes -WHERE = os.path.abspath(os.path.dirname(__file__)) +DATA_PATH = pathlib.Path(__file__).parent.parent / "data" # Common table data used for many tests. ROWS = [ @@ -149,10 +141,10 @@ def _make_dataset_id(prefix): return "%s%s" % (prefix, unique_resource_id()) -def _load_json_schema(filename="data/schema.json"): +def _load_json_schema(filename="schema.json"): from google.cloud.bigquery.table import _parse_schema_resource - json_filename = os.path.join(WHERE, filename) + json_filename = DATA_PATH / filename with open(json_filename, "r") as schema_file: return _parse_schema_resource(json.load(schema_file)) @@ -716,7 +708,7 @@ def test_load_table_from_local_avro_file_then_dump_table(self): table = Table(table_ref) self.to_delete.insert(0, table) - with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as avrof: + with open(DATA_PATH / "colors.avro", "rb") as avrof: config = bigquery.LoadJobConfig() config.source_format = SourceFormat.AVRO config.write_disposition = WriteDisposition.WRITE_TRUNCATE @@ -1347,7 +1339,7 @@ def test_load_avro_from_uri_then_dump_table(self): ("orange", 590), ("red", 650), ] - with open(os.path.join(WHERE, "data", "colors.avro"), "rb") as f: + with open(DATA_PATH / "colors.avro", "rb") as f: GS_URL = self._write_avro_to_storage( "bq_load_test" + unique_resource_id(), "colors.avro", f ) @@ -2707,7 +2699,7 @@ def test_create_table_rows_fetch_nested_schema(self): to_insert = [] # Data is in "JSON Lines" format, see http://jsonlines.org/ - json_filename = os.path.join(WHERE, "data", "characters.jsonl") + json_filename = DATA_PATH / "characters.jsonl" with open(json_filename) as rows_file: for line in rows_file: to_insert.append(json.loads(line)) @@ -2979,47 +2971,6 @@ def temp_dataset(self, dataset_id, location=None): return dataset -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -@pytest.mark.skipif(IPython is None, reason="Requires `ipython`") -@pytest.mark.usefixtures("ipython_interactive") -def test_bigquery_magic(): - ip = IPython.get_ipython() - current_process = psutil.Process() - conn_count_start = len(current_process.connections()) - - ip.extension_manager.load_extension("google.cloud.bigquery") - sql = """ - SELECT - CONCAT( - 'https://stackoverflow.com/questions/', - CAST(id as STRING)) as url, - view_count - FROM `bigquery-public-data.stackoverflow.posts_questions` - WHERE tags like '%google-bigquery%' - ORDER BY view_count DESC - LIMIT 10 - """ - with ipython_io.capture_output() as captured: - result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) - - conn_count_end = len(current_process.connections()) - - lines = re.split("\n|\r", captured.stdout) - # Removes blanks & terminal code (result of display clearing) - updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) - assert re.match("Executing query with job ID: .*", updates[0]) - assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) - assert re.match("Query complete after .*s", updates[-1]) - assert isinstance(result, pandas.DataFrame) - assert len(result) == 10 # verify row count - assert list(result) == ["url", "view_count"] # verify column names - - # NOTE: For some reason, the number of open sockets is sometimes one *less* - # than expected when running system tests on Kokoro, thus using the <= assertion. - # That's still fine, however, since the sockets are apparently not leaked. - assert conn_count_end <= conn_count_start # system resources are released - - def _job_done(instance): return instance.state.lower() == "done" @@ -3039,21 +2990,3 @@ def _table_exists(t): return True except NotFound: return False - - -@pytest.fixture(scope="session") -def ipython(): - config = tools.default_config() - config.TerminalInteractiveShell.simple_prompt = True - shell = interactiveshell.TerminalInteractiveShell.instance(config=config) - return shell - - -@pytest.fixture() -def ipython_interactive(request, ipython): - """Activate IPython's builtin hooks - - for the duration of the test scope. - """ - with ipython.builtin_trap: - yield ipython diff --git a/tests/system/test_magics.py b/tests/system/test_magics.py new file mode 100644 index 000000000..78c15cb50 --- /dev/null +++ b/tests/system/test_magics.py @@ -0,0 +1,83 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Jupyter/IPython connector.""" + +import re + +import pytest +import psutil + + +IPython = pytest.importorskip("IPython") +io = pytest.importorskip("IPython.utils.io") +pandas = pytest.importorskip("pandas") +tools = pytest.importorskip("IPython.testing.tools") +interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") + + +@pytest.fixture(scope="session") +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def test_bigquery_magic(ipython_interactive): + ip = IPython.get_ipython() + current_process = psutil.Process() + conn_count_start = len(current_process.connections()) + + ip.extension_manager.load_extension("google.cloud.bigquery") + sql = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + LIMIT 10 + """ + with io.capture_output() as captured: + result = ip.run_cell_magic("bigquery", "--use_rest_api", sql) + + conn_count_end = len(current_process.connections()) + + lines = re.split("\n|\r", captured.stdout) + # Removes blanks & terminal code (result of display clearing) + updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines)) + assert re.match("Executing query with job ID: .*", updates[0]) + assert all(re.match("Query executing: .*s", line) for line in updates[1:-1]) + assert re.match("Query complete after .*s", updates[-1]) + assert isinstance(result, pandas.DataFrame) + assert len(result) == 10 # verify row count + assert list(result) == ["url", "view_count"] # verify column names + + # NOTE: For some reason, the number of open sockets is sometimes one *less* + # than expected when running system tests on Kokoro, thus using the <= assertion. + # That's still fine, however, since the sockets are apparently not leaked. + assert conn_count_end <= conn_count_start # system resources are released From cac90626c5e7f2b4db2b50926e4bf483d088826f Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 22 Jan 2021 18:46:27 +0100 Subject: [PATCH 106/341] chore(deps): update dependency grpcio to v1.35.0 (#477) @tswast confirmed this is ok to merge and that broken docs-presubmit is unrelateed --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index de882844b..603d49a72 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.6.2 google-cloud-bigquery-storage==2.1.0 google-auth-oauthlib==0.4.2 -grpcio==1.34.1 +grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 From 530e1e8d8fe8939e914a78ff1b220907c1b87af7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 22 Jan 2021 23:35:12 +0100 Subject: [PATCH 107/341] fix: use explicitly given project over the client's default project for load jobs (#482) * fix: use project parameter if given for load jobs * blacken client tests * Refactor string concatenations in client tests * Silence invalid coverage complaint --- google/cloud/bigquery/client.py | 41 +++++++-- tests/unit/test_client.py | 157 +++++++++++++++++++++++--------- 2 files changed, 148 insertions(+), 50 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 3541726b8..b270075a9 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2136,11 +2136,11 @@ def load_table_from_file( try: if size is None or size >= _MAX_MULTIPART_SIZE: response = self._do_resumable_upload( - file_obj, job_resource, num_retries, timeout + file_obj, job_resource, num_retries, timeout, project=project ) else: response = self._do_multipart_upload( - file_obj, job_resource, size, num_retries, timeout + file_obj, job_resource, size, num_retries, timeout, project=project ) except resumable_media.InvalidResponse as exc: raise exceptions.from_http_response(exc.response) @@ -2475,7 +2475,9 @@ def load_table_from_json( timeout=timeout, ) - def _do_resumable_upload(self, stream, metadata, num_retries, timeout): + def _do_resumable_upload( + self, stream, metadata, num_retries, timeout, project=None + ): """Perform a resumable upload. Args: @@ -2491,13 +2493,17 @@ def _do_resumable_upload(self, stream, metadata, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: requests.Response: The "200 OK" response object returned after the final chunk is uploaded. """ upload, transport = self._initiate_resumable_upload( - stream, metadata, num_retries, timeout + stream, metadata, num_retries, timeout, project=project ) while not upload.finished: @@ -2505,7 +2511,9 @@ def _do_resumable_upload(self, stream, metadata, num_retries, timeout): return response - def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): + def _initiate_resumable_upload( + self, stream, metadata, num_retries, timeout, project=None + ): """Initiate a resumable upload. Args: @@ -2521,6 +2529,10 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: Tuple: Pair of @@ -2532,7 +2544,11 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): chunk_size = _DEFAULT_CHUNKSIZE transport = self._http headers = _get_upload_headers(self._connection.user_agent) - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=self.project) + + if project is None: + project = self.project + upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project) + # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. upload = ResumableUpload(upload_url, chunk_size, headers=headers) @@ -2553,7 +2569,9 @@ def _initiate_resumable_upload(self, stream, metadata, num_retries, timeout): return upload, transport - def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): + def _do_multipart_upload( + self, stream, metadata, size, num_retries, timeout, project=None + ): """Perform a multipart upload. Args: @@ -2574,6 +2592,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + project (Optional[str]): + Project ID of the project of where to run the upload. Defaults + to the client's project. + Returns: requests.Response: The "200 OK" response object returned after the multipart @@ -2591,7 +2613,10 @@ def _do_multipart_upload(self, stream, metadata, size, num_retries, timeout): headers = _get_upload_headers(self._connection.user_agent) - upload_url = _MULTIPART_URL_TEMPLATE.format(project=self.project) + if project is None: + project = self.project + + upload_url = _MULTIPART_URL_TEMPLATE.format(project=project) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bf183b5a4..625256e6e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4455,9 +4455,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) upload_url = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - + self.PROJECT - + "/jobs?uploadType=resumable" + f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}" + "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) expected_headers = _get_upload_headers(conn.user_agent) @@ -4498,7 +4497,9 @@ def test__initiate_resumable_upload(self): def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) - def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): + def _do_multipart_upload_success_helper( + self, get_boundary, num_retries=None, project=None + ): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig @@ -4508,6 +4509,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if project is None: + project = self.PROJECT + # Create some mock arguments. data = b"Bzzzz-zap \x00\x01\xf4" stream = io.BytesIO(data) @@ -4516,8 +4520,9 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) metadata = job.to_api_repr() size = len(data) + response = client._do_multipart_upload( - stream, metadata, size, num_retries, None + stream, metadata, size, num_retries, None, project=project ) # Check the mocks and the returned value. @@ -4526,35 +4531,39 @@ def _do_multipart_upload_success_helper(self, get_boundary, num_retries=None): get_boundary.assert_called_once_with() upload_url = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - + self.PROJECT - + "/jobs?uploadType=multipart" + f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}" + "/jobs?uploadType=multipart" ) payload = ( b"--==0==\r\n" - + b"content-type: application/json; charset=UTF-8\r\n\r\n" - + json.dumps(metadata).encode("utf-8") - + b"\r\n" - + b"--==0==\r\n" - + b"content-type: */*\r\n\r\n" - + data - + b"\r\n" - + b"--==0==--" - ) + b"content-type: application/json; charset=UTF-8\r\n\r\n" + b"%(json_metadata)s" + b"\r\n" + b"--==0==\r\n" + b"content-type: */*\r\n\r\n" + b"%(data)s" + b"\r\n" + b"--==0==--" + ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} + headers = _get_upload_headers(conn.user_agent) headers["content-type"] = b'multipart/related; boundary="==0=="' fake_transport.request.assert_called_once_with( "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY ) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) - @mock.patch(u"google.resumable_media._upload.get_boundary", return_value=b"==0==") + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") + def test__do_multipart_upload_with_custom_project(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, project="custom-project") + def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -6364,10 +6373,10 @@ def test_insert_rows_from_dataframe(self): dataframe = pandas.DataFrame( [ - {"name": u"Little One", "age": 10, "adult": False}, - {"name": u"Young Gun", "age": 20, "adult": True}, - {"name": u"Dad", "age": 30, "adult": True}, - {"name": u"Stranger", "age": 40, "adult": True}, + {"name": "Little One", "age": 10, "adult": False}, + {"name": "Young Gun", "age": 20, "adult": True}, + {"name": "Dad", "age": 30, "adult": True}, + {"name": "Stranger", "age": 40, "adult": True}, ] ) @@ -6560,8 +6569,8 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): dataframe = pandas.DataFrame( [ - {"name": u"Little One", "adult": False}, - {"name": u"Young Gun", "adult": True}, + {"name": "Little One", "adult": False}, + {"name": "Young Gun", "adult": True}, ] ) @@ -7230,17 +7239,18 @@ class TestClientUpload(object): # `pytest`-style tests rather than `unittest`-style. from google.cloud.bigquery.job import SourceFormat - TABLE_REF = DatasetReference("project_id", "test_dataset").table("test_table") + PROJECT = "project_id" + TABLE_REF = DatasetReference(PROJECT, "test_dataset").table("test_table") LOCATION = "us-central" - @staticmethod - def _make_client(transport=None, location=None): + @classmethod + def _make_client(cls, transport=None, location=None): from google.cloud.bigquery import _http from google.cloud.bigquery import client cl = client.Client( - project="project_id", + project=cls.PROJECT, credentials=_make_credentials(), _http=transport, location=location, @@ -7274,12 +7284,12 @@ def _make_do_upload_patch(cls, client, method, resource={}, side_effect=None): return mock.patch.object(client, method, side_effect=side_effect, autospec=True) EXPECTED_CONFIGURATION = { - "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "jobReference": {"projectId": PROJECT, "jobId": "job_id"}, "configuration": { "load": { "sourceFormat": SourceFormat.CSV, "destinationTable": { - "projectId": "project_id", + "projectId": PROJECT, "datasetId": "test_dataset", "tableId": "test_table", }, @@ -7325,7 +7335,11 @@ def test_load_table_from_file_resumable(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None + file_obj, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) # the original config object should not have been modified @@ -7354,7 +7368,11 @@ def test_load_table_from_file_w_explicit_project(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + None, + project="other-project", ) def test_load_table_from_file_w_client_location(self): @@ -7384,7 +7402,11 @@ def test_load_table_from_file_w_client_location(self): expected_resource["jobReference"]["location"] = self.LOCATION expected_resource["jobReference"]["projectId"] = "other-project" do_upload.assert_called_once_with( - file_obj, expected_resource, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_resource, + _DEFAULT_NUM_RETRIES, + None, + project="other-project", ) def test_load_table_from_file_resumable_metadata(self): @@ -7409,7 +7431,7 @@ def test_load_table_from_file_resumable_metadata(self): config.null_marker = r"\N" expected_config = { - "jobReference": {"projectId": "project_id", "jobId": "job_id"}, + "jobReference": {"projectId": self.PROJECT, "jobId": "job_id"}, "configuration": { "load": { "destinationTable": { @@ -7442,7 +7464,11 @@ def test_load_table_from_file_resumable_metadata(self): ) do_upload.assert_called_once_with( - file_obj, expected_config, _DEFAULT_NUM_RETRIES, None + file_obj, + expected_config, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_multipart(self): @@ -7471,6 +7497,7 @@ def test_load_table_from_file_multipart(self): file_obj_size, _DEFAULT_NUM_RETRIES, None, + project=self.PROJECT, ) def test_load_table_from_file_with_retries(self): @@ -7491,7 +7518,11 @@ def test_load_table_from_file_with_retries(self): ) do_upload.assert_called_once_with( - file_obj, self.EXPECTED_CONFIGURATION, num_retries, None + file_obj, + self.EXPECTED_CONFIGURATION, + num_retries, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_with_rewind(self): @@ -7524,7 +7555,11 @@ def test_load_table_from_file_with_readable_gzip(self): ) do_upload.assert_called_once_with( - gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, None + gzip_file, + self.EXPECTED_CONFIGURATION, + _DEFAULT_NUM_RETRIES, + None, + project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) def test_load_table_from_file_with_writable_gzip(self): @@ -8169,7 +8204,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): dtype="datetime64[ns]", ).dt.tz_localize(pytz.utc), ), - ("string_col", [u"abc", None, u"def"]), + ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), ] ) @@ -8228,7 +8263,7 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): [ ("int_col", [1, 2, 3]), ("int_as_float_col", [1.0, float("nan"), 3.0]), - ("string_col", [u"abc", None, u"def"]), + ("string_col", ["abc", None, "def"]), ] ) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -8263,7 +8298,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): client = self._make_client() df_data = collections.OrderedDict( [ - ("string_col", [u"abc", u"def", u"ghi"]), + ("string_col", ["abc", "def", "ghi"]), ("unknown_col", [b"jkl", None, b"mno"]), ] ) @@ -8317,7 +8352,7 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): from google.cloud.bigquery.schema import SchemaField client = self._make_client() - records = [{"name": u"Monty", "age": 100}, {"name": u"Python", "age": 60}] + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] dataframe = pandas.DataFrame(records) schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) job_config = job.LoadJobConfig(schema=schema) @@ -8658,6 +8693,44 @@ def test__do_resumable_upload(self): timeout=mock.ANY, ) + def test__do_resumable_upload_custom_project(self): + file_obj = self._make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = self._make_transport( + self._make_resumable_upload_responses(file_obj_len) + ) + client = self._make_client(transport) + + result = client._do_resumable_upload( + file_obj, self.EXPECTED_CONFIGURATION, None, None, project="custom-project", + ) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + # Verify that configuration data was passed in with the initial + # request. + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + # Check the project ID used in the call to initiate resumable upload. + initiation_url = next( + ( + call.args[1] + for call in transport.request.call_args_list + if call.args[0] == "POST" and "uploadType=resumable" in call.args[1] + ), + None, + ) # pragma: NO COVER + + assert initiation_url is not None + assert "projects/custom-project" in initiation_url + def test__do_multipart_upload(self): transport = self._make_transport([self._make_response(http.client.OK)]) client = self._make_client(transport) From 61b438523d305ce66a68fde7cb49e9abbf0a8d1d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 25 Jan 2021 18:04:04 +0100 Subject: [PATCH 108/341] fix: invalid conversion of timezone-aware datetime values to JSON (#480) * fix: correctly convert timezone-aware datetimes * blacken * Remove python-dateutil test dependency * Remove unused dst() methods --- google/cloud/bigquery/_helpers.py | 8 ++++ tests/unit/test__helpers.py | 77 +++++++++++++++++-------------- 2 files changed, 51 insertions(+), 34 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 6b66a3020..daa14b92a 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -315,6 +315,10 @@ def _timestamp_to_json_parameter(value): def _timestamp_to_json_row(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): + # For naive datetime objects UTC timezone is assumed, thus we format + # those to string directly without conversion. + if value.tzinfo is not None: + value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS) return value @@ -322,6 +326,10 @@ def _timestamp_to_json_row(value): def _datetime_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" if isinstance(value, datetime.datetime): + # For naive datetime objects UTC timezone is assumed, thus we format + # those to string directly without conversion. + if value.tzinfo is not None: + value = value.astimezone(UTC) value = value.strftime(_RFC3339_MICROS_NO_ZULU) return value diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 8948d4152..0fdf1142f 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -420,13 +420,13 @@ def _call_fut(self, row, schema): def test_w_single_scalar_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "col", "INTEGER") - row = {u"f": [{u"v": u"1"}]} + row = {"f": [{"v": "1"}]} self.assertEqual(self._call_fut(row, schema=[col]), (1,)) def test_w_single_scalar_geography_column(self): # SELECT 1 AS col col = _Field("REQUIRED", "geo", "GEOGRAPHY") - row = {u"f": [{u"v": u"POINT(1, 2)"}]} + row = {"f": [{"v": "POINT(1, 2)"}]} self.assertEqual(self._call_fut(row, schema=[col]), ("POINT(1, 2)",)) def test_w_single_struct_column(self): @@ -434,13 +434,13 @@ def test_w_single_struct_column(self): sub_1 = _Field("REQUIRED", "sub_1", "INTEGER") sub_2 = _Field("REQUIRED", "sub_2", "INTEGER") col = _Field("REQUIRED", "col", "RECORD", fields=[sub_1, sub_2]) - row = {u"f": [{u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}]}}]} + row = {"f": [{"v": {"f": [{"v": "1"}, {"v": "2"}]}}]} self.assertEqual(self._call_fut(row, schema=[col]), ({"sub_1": 1, "sub_2": 2},)) def test_w_single_array_column(self): # SELECT [1, 2, 3] as col col = _Field("REPEATED", "col", "INTEGER") - row = {u"f": [{u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}]} + row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]} self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],)) def test_w_struct_w_nested_array_column(self): @@ -450,13 +450,13 @@ def test_w_struct_w_nested_array_column(self): third = _Field("REPEATED", "third", "INTEGER") col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third]) row = { - u"f": [ + "f": [ { - u"v": { - u"f": [ - {u"v": [{u"v": u"1"}, {u"v": u"2"}]}, - {u"v": u"3"}, - {u"v": [{u"v": u"4"}, {u"v": u"5"}]}, + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}]}, + {"v": "3"}, + {"v": [{"v": "4"}, {"v": "5"}]}, ] } } @@ -464,7 +464,7 @@ def test_w_struct_w_nested_array_column(self): } self.assertEqual( self._call_fut(row, schema=[col]), - ({u"first": [1, 2], u"second": 3, u"third": [4, 5]},), + ({"first": [1, 2], "second": 3, "third": [4, 5]},), ) def test_w_array_of_struct(self): @@ -474,11 +474,11 @@ def test_w_array_of_struct(self): third = _Field("REQUIRED", "third", "INTEGER") col = _Field("REPEATED", "col", "RECORD", fields=[first, second, third]) row = { - u"f": [ + "f": [ { - u"v": [ - {u"v": {u"f": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}}, - {u"v": {u"f": [{u"v": u"4"}, {u"v": u"5"}, {u"v": u"6"}]}}, + "v": [ + {"v": {"f": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}}, + {"v": {"f": [{"v": "4"}, {"v": "5"}, {"v": "6"}]}}, ] } ] @@ -487,8 +487,8 @@ def test_w_array_of_struct(self): self._call_fut(row, schema=[col]), ( [ - {u"first": 1, u"second": 2, u"third": 3}, - {u"first": 4, u"second": 5, u"third": 6}, + {"first": 1, "second": 2, "third": 3}, + {"first": 4, "second": 5, "third": 6}, ], ), ) @@ -499,32 +499,25 @@ def test_w_array_of_struct_w_array(self): second = _Field("REQUIRED", "second", "INTEGER") col = _Field("REPEATED", "col", "RECORD", fields=[first, second]) row = { - u"f": [ + "f": [ { - u"v": [ - { - u"v": { - u"f": [ - {u"v": [{u"v": u"1"}, {u"v": u"2"}, {u"v": u"3"}]}, - {u"v": u"4"}, - ] - } - }, + "v": [ { - u"v": { - u"f": [ - {u"v": [{u"v": u"5"}, {u"v": u"6"}]}, - {u"v": u"7"}, + "v": { + "f": [ + {"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}, + {"v": "4"}, ] } }, + {"v": {"f": [{"v": [{"v": "5"}, {"v": "6"}]}, {"v": "7"}]}}, ] } ] } self.assertEqual( self._call_fut(row, schema=[col]), - ([{u"first": [1, 2, 3], u"second": 4}, {u"first": [5, 6], u"second": 7}],), + ([{"first": [1, 2, 3], "second": 4}, {"first": [5, 6], "second": 7}],), ) @@ -673,7 +666,7 @@ def test_w_non_bytes(self): def test_w_bytes(self): source = b"source" - expected = u"c291cmNl" + expected = "c291cmNl" converted = self._call_fut(source) self.assertEqual(converted, expected) @@ -726,7 +719,7 @@ def test_w_string(self): ZULU = "2016-12-20 15:58:27.339328+00:00" self.assertEqual(self._call_fut(ZULU), ZULU) - def test_w_datetime(self): + def test_w_datetime_no_zone(self): when = datetime.datetime(2016, 12, 20, 15, 58, 27, 339328) self.assertEqual(self._call_fut(when), "2016-12-20T15:58:27.339328Z") @@ -736,6 +729,14 @@ def test_w_datetime_w_utc_zone(self): when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=UTC) self.assertEqual(self._call_fut(when), "2020-11-17T01:06:52.353795Z") + def test_w_datetime_w_non_utc_zone(self): + class EstZone(datetime.tzinfo): + def utcoffset(self, _): + return datetime.timedelta(minutes=-300) + + when = datetime.datetime(2020, 11, 17, 1, 6, 52, 353795, tzinfo=EstZone()) + self.assertEqual(self._call_fut(when), "2020-11-17T06:06:52.353795Z") + class Test_datetime_to_json(unittest.TestCase): def _call_fut(self, value): @@ -753,6 +754,14 @@ def test_w_datetime(self): when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC) self.assertEqual(self._call_fut(when), "2016-12-03T14:11:27.123456") + def test_w_datetime_w_non_utc_zone(self): + class EstZone(datetime.tzinfo): + def utcoffset(self, _): + return datetime.timedelta(minutes=-300) + + when = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=EstZone()) + self.assertEqual(self._call_fut(when), "2016-12-03T19:11:27.123456") + class Test_date_to_json(unittest.TestCase): def _call_fut(self, value): From f187fb01440fc17f7f1694f9b559bb5b2fbae1d0 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 27 Jan 2021 21:44:48 +0100 Subject: [PATCH 109/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.2.1 (#485) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 603d49a72..599b6d52f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.6.2 -google-cloud-bigquery-storage==2.1.0 +google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' From 55daa7da9857a8a2fb14a80a4efa3f466386a85f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 27 Jan 2021 21:54:02 +0100 Subject: [PATCH 110/341] deps: declare support for Python 3.9 (#488) Towards #462. With wheels for `pyarrow==3.0.0` released we can now officially support Python 3.9 - well, at least after when we add Python 3.9 checks to Kokoro. PR checklist: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- README.rst | 2 +- noxfile.py | 22 +++++++++++++--------- setup.py | 11 ++++------- testing/constraints-3.6.txt | 20 +++++--------------- 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index 61192b625..8454cf9c0 100644 --- a/README.rst +++ b/README.rst @@ -52,7 +52,7 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.6, < 3.9 +Python >= 3.6, < 3.10 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/noxfile.py b/noxfile.py index e6a739d1e..942525ca9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -23,6 +23,10 @@ BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") + +DEFAULT_PYTHON_VERSION = "3.8" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.8"] +UNIT_TEST_PYTHON_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() # 'docfx' is excluded since it only needs to run in 'docs-presubmit' @@ -80,13 +84,13 @@ def default(session): ) -@nox.session(python=["3.6", "3.7", "3.8"]) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS) def unit(session): """Run the unit test suite.""" default(session) -@nox.session(python=["3.8"]) +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" @@ -118,7 +122,7 @@ def system(session): session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs) -@nox.session(python=["3.8"]) +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def snippets(session): """Run the snippets test suite.""" @@ -154,7 +158,7 @@ def snippets(session): ) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def cover(session): """Run the final coverage report. @@ -166,7 +170,7 @@ def cover(session): session.run("coverage", "erase") -@nox.session(python="3.8") +@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def prerelease_deps(session): """Run all tests with prerelease versions of dependencies installed. @@ -201,7 +205,7 @@ def prerelease_deps(session): session.run("py.test", "samples/tests") -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def lint(session): """Run linters. @@ -218,7 +222,7 @@ def lint(session): session.run("black", "--check", *BLACK_PATHS) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" @@ -239,7 +243,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def docs(session): """Build the docs.""" @@ -262,7 +266,7 @@ def docs(session): ) -@nox.session(python="3.8") +@nox.session(python=DEFAULT_PYTHON_VERSION) def docfx(session): """Build the docfx yaml files for this library.""" diff --git a/setup.py b/setup.py index 0ea6ccca2..ea2df4843 100644 --- a/setup.py +++ b/setup.py @@ -45,13 +45,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 3.0dev", - ], - "pandas": [ - "pandas>=0.23.0", - # pyarrow 1.0.0 is required for the use of timestamp_as_object keyword. - "pyarrow >= 1.0.0, < 3.0dev", + "pyarrow >= 1.0.0, < 4.0dev", ], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", @@ -112,6 +108,7 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Operating System :: OS Independent", "Topic :: Internet", ], @@ -120,7 +117,7 @@ namespace_packages=namespaces, install_requires=dependencies, extras_require=extras, - python_requires=">=3.6, <3.9", + python_requires=">=3.6, <3.10", include_package_data=True, zip_safe=False, ) diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index fe2bcfda7..c4a5c51be 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -6,26 +6,16 @@ # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 google-api-core==1.23.0 -proto-plus==1.10.0 +google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -six==1.13.0 -protobuf==3.12.0 -google-cloud-bigquery-storage==2.0.0 grpcio==1.32.0 -pyarrow==1.0.0 -pandas==0.23.0 -pyarrow==1.0.0 -tqdm==4.7.4 opentelemetry-api==0.11b0 -opentelemetry-sdk==0.11b0 opentelemetry-instrumentation==0.11b0 -google-cloud-bigquery-storage==2.0.0 -grpcio==1.32.0 -pyarrow==1.0.0 -opentelemetry-api==0.11b0 opentelemetry-sdk==0.11b0 -opentelemetry-instrumentation==0.11b0 pandas==0.23.0 +proto-plus==1.10.0 +protobuf==3.12.0 pyarrow==1.0.0 -tqdm==4.7.4 \ No newline at end of file +six==1.13.0 +tqdm==4.7.4 From d5735ea378eaeea8c0ecbed89daf02f5102c6a63 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 27 Jan 2021 21:08:02 +0000 Subject: [PATCH 111/341] chore: release 2.7.0 (#472) :robot: I have created a release \*beep\* \*boop\* --- ## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) ### Bug Fixes * invalid conversion of timezone-aware datetime values to JSON ([#480](https://www.github.com/googleapis/python-bigquery/issues/480)) ([61b4385](https://www.github.com/googleapis/python-bigquery/commit/61b438523d305ce66a68fde7cb49e9abbf0a8d1d)) * reading the labels attribute on Job instances ([#471](https://www.github.com/googleapis/python-bigquery/issues/471)) ([80944f0](https://www.github.com/googleapis/python-bigquery/commit/80944f080bcc4fda870a6daf1d884de616d39ae7)) * use explicitly given project over the client's default project for load jobs ([#482](https://www.github.com/googleapis/python-bigquery/issues/482)) ([530e1e8](https://www.github.com/googleapis/python-bigquery/commit/530e1e8d8fe8939e914a78ff1b220907c1b87af7)) ### Dependencies * declare support for Python 3.9 ([#488](https://www.github.com/googleapis/python-bigquery/issues/488)) ([55daa7d](https://www.github.com/googleapis/python-bigquery/commit/55daa7da9857a8a2fb14a80a4efa3f466386a85f)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d58072e8..a58510c66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) + + +### Bug Fixes + +* invalid conversion of timezone-aware datetime values to JSON ([#480](https://www.github.com/googleapis/python-bigquery/issues/480)) ([61b4385](https://www.github.com/googleapis/python-bigquery/commit/61b438523d305ce66a68fde7cb49e9abbf0a8d1d)) +* reading the labels attribute on Job instances ([#471](https://www.github.com/googleapis/python-bigquery/issues/471)) ([80944f0](https://www.github.com/googleapis/python-bigquery/commit/80944f080bcc4fda870a6daf1d884de616d39ae7)) +* use explicitly given project over the client's default project for load jobs ([#482](https://www.github.com/googleapis/python-bigquery/issues/482)) ([530e1e8](https://www.github.com/googleapis/python-bigquery/commit/530e1e8d8fe8939e914a78ff1b220907c1b87af7)) + + +### Dependencies + +* declare support for Python 3.9 ([#488](https://www.github.com/googleapis/python-bigquery/issues/488)) ([55daa7d](https://www.github.com/googleapis/python-bigquery/commit/55daa7da9857a8a2fb14a80a4efa3f466386a85f)) + ### [2.6.2](https://www.github.com/googleapis/python-bigquery/compare/v2.6.1...v2.6.2) (2021-01-11) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9aaeb8bc4..d962613e0 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.6.2" +__version__ = "2.7.0" From bf44e7b67d2de41c13053a4550484b9ea049db3e Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 29 Jan 2021 00:34:08 -0800 Subject: [PATCH 112/341] fix: don't try to close closed cursors (#498) --- google/cloud/bigquery/dbapi/connection.py | 3 ++- tests/unit/test_dbapi_connection.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 300c77dc9..459fc82aa 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -76,7 +76,8 @@ def close(self): self._bqstorage_client._transport.grpc_channel.close() for cursor_ in self._cursors_created: - cursor_.close() + if not cursor_._closed: + cursor_.close() def commit(self): """No-op, but for consistency raise an error if connection is closed.""" diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index edec559b2..74da318bf 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -176,6 +176,22 @@ def test_close_closes_all_created_cursors(self): self.assertTrue(cursor_1._closed) self.assertTrue(cursor_2._closed) + def test_close_closes_only_open_created_cursors(self): + connection = self._make_one(client=self._mock_client()) + cursor_1 = connection.cursor() + cursor_2 = connection.cursor() + self.assertFalse(cursor_1._closed) + self.assertFalse(cursor_2._closed) + + cursor_1.close() + self.assertTrue(cursor_1._closed) + cursor_1.close = mock.MagicMock() + + connection.close() + + self.assertFalse(cursor_1.close.called) + self.assertTrue(cursor_2._closed) + def test_does_not_keep_cursor_instances_alive(self): from google.cloud.bigquery.dbapi import Cursor From 2299cc648d5a8fa55fb08ddb58bae4675f0a13aa Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Fri, 29 Jan 2021 08:09:02 -0800 Subject: [PATCH 113/341] build: migrate to flakybot (#500) --- .kokoro/test-samples.sh | 8 ++++---- .kokoro/trampoline_v2.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index c5653a81d..3ce8994cb 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -87,11 +87,11 @@ for file in samples/**/requirements.txt; do python3.6 -m nox -s "$RUN_TESTS_SESSION" EXIT=$? - # If this is a periodic build, send the test log to the Build Cop Bot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop. + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop - $KOKORO_GFILE_DIR/linux_amd64/buildcop + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot fi if [[ $EXIT -ne 0 ]]; then diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 719bcd5ba..4af6cdc26 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -159,7 +159,7 @@ if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then "KOKORO_GITHUB_COMMIT" "KOKORO_GITHUB_PULL_REQUEST_NUMBER" "KOKORO_GITHUB_PULL_REQUEST_COMMIT" - # For Build Cop Bot + # For FlakyBot "KOKORO_GITHUB_COMMIT_URL" "KOKORO_GITHUB_PULL_REQUEST_URL" ) From 475a5c9e96886bfbd2047c1a59ab5f9ab7b4998c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:14 +0100 Subject: [PATCH 114/341] chore(deps): update dependency pyarrow to v3 (#490) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 599b6d52f..979506199 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,5 +7,5 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==2.0.0 +pyarrow==3.0.0 pytz==2020.5 From 5caa14fd8562fde40ba79eab48db869db7dfdcf5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:49 +0100 Subject: [PATCH 115/341] chore(deps): update dependency google-cloud-bigquery to v2.7.0 (#491) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 338cf2e89..6f9306af2 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 979506199..bc3985ebd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From 1cf9f4f360a80837889e3f12138677e72eb78881 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:38:33 +0100 Subject: [PATCH 116/341] chore(deps): update dependency matplotlib to v3.3.4 (#495) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bc3985ebd..00d28fa0b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.3 +matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From c090323c3cbb973f0ba3c0c332a57d0612825b38 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 1 Feb 2021 10:41:52 -0800 Subject: [PATCH 117/341] chore: update shared templates (#468) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * chore: add config / docs for 'pre-commit' support Source-Author: Tres Seaver Source-Date: Tue Dec 1 16:01:20 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: 32af6da519a6b042e3da62008e2a75e991efb6b4 Source-Link: https://github.com/googleapis/synthtool/commit/32af6da519a6b042e3da62008e2a75e991efb6b4 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.3.0 Source-Author: WhiteSource Renovate Source-Date: Wed Dec 2 17:18:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: 69629b64b83c6421d616be2b8e11795738ec8a6c Source-Link: https://github.com/googleapis/synthtool/commit/69629b64b83c6421d616be2b8e11795738ec8a6c * chore: update noxfile.py.j2 * Update noxfile.py.j2 add changes from @glasnt to the template template to ensure that enforcing type hinting doesn't fail for repos with the sample noxfile (aka all samples repos) See https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4869/files for context * fix typo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Thu Dec 3 13:44:30 2020 -0800 Source-Repo: googleapis/synthtool Source-Sha: 18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 Source-Link: https://github.com/googleapis/synthtool/commit/18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.4.0 Co-authored-by: Tres Seaver Source-Author: WhiteSource Renovate Source-Date: Wed Dec 16 18:13:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: aa255b15d52b6d8950cca48cfdf58f7d27a60c8a Source-Link: https://github.com/googleapis/synthtool/commit/aa255b15d52b6d8950cca48cfdf58f7d27a60c8a * docs(python): document adding Python 3.9 support, dropping 3.5 support Closes #787 Source-Author: Tres Seaver Source-Date: Thu Dec 17 16:08:02 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: b670a77a454f415d247907908e8ee7943e06d718 Source-Link: https://github.com/googleapis/synthtool/commit/b670a77a454f415d247907908e8ee7943e06d718 * chore: exclude `.nox` directories from linting The samples tests create `.nox` directories with all dependencies installed. These directories should be excluded from linting. I've tested this change locally, and it significantly speeds up linting on my machine. Source-Author: Tim Swast Source-Date: Tue Dec 22 13:04:04 2020 -0600 Source-Repo: googleapis/synthtool Source-Sha: 373861061648b5fe5e0ac4f8a38b32d639ee93e4 Source-Link: https://github.com/googleapis/synthtool/commit/373861061648b5fe5e0ac4f8a38b32d639ee93e4 * chore(python): fix column sizing issue in docs Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 11:58:32 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: f15b57ccfd71106c2299e9b89835fe6e55015662 Source-Link: https://github.com/googleapis/synthtool/commit/f15b57ccfd71106c2299e9b89835fe6e55015662 * chore(python): use 'http' in LICENSE Co-authored-by: Tim Swast Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 13:05:12 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 41a4e56982620d3edcf110d76f4fcdfdec471ac8 Source-Link: https://github.com/googleapis/synthtool/commit/41a4e56982620d3edcf110d76f4fcdfdec471ac8 * chore(python): skip docfx in main presubmit * chore(python): skip docfx in main presubmit * fix: properly template the repo name Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Jan 8 10:32:13 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 Source-Link: https://github.com/googleapis/synthtool/commit/fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 * chore: add missing quotation mark Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Jan 11 09:43:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 16ec872dd898d7de6e1822badfac32484b5d9031 Source-Link: https://github.com/googleapis/synthtool/commit/16ec872dd898d7de6e1822badfac32484b5d9031 --- .flake8 | 1 + .kokoro/docs/docs-presubmit.cfg | 2 +- .pre-commit-config.yaml | 17 ++++++ CONTRIBUTING.rst | 21 +++++--- LICENSE | 7 +-- docs/_static/custom.css | 7 ++- samples/geography/noxfile.py | 19 ++++--- samples/snippets/noxfile.py | 19 ++++--- synth.metadata | 92 +++++++++++++++++++++++++++++++-- 9 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.flake8 b/.flake8 index ed9316381..29227d4cf 100644 --- a/.flake8 +++ b/.flake8 @@ -26,6 +26,7 @@ exclude = *_pb2.py # Standard linting exemptions. + **/.nox/** __pycache__, .git, *.pyc, diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg index 5c216b4bc..08adb2e28 100644 --- a/.kokoro/docs/docs-presubmit.cfg +++ b/.kokoro/docs/docs-presubmit.cfg @@ -25,4 +25,4 @@ env_vars: { env_vars: { key: "NOX_SESSION" value: "docs docfx" -} \ No newline at end of file +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..a9024b15d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b3b802b49..15bcd2e28 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -111,6 +111,16 @@ Coding Style should point to the official ``googleapis`` checkout and the the branch should be the main branch on that remote (``master``). +- This repository contains configuration for the + `pre-commit `__ tool, which automates checking + our linters during a commit. If you have it installed on your ``$PATH``, + you can enable enforcing those checks via: + +.. code-block:: bash + + $ pre-commit install + pre-commit installed at .git/hooks/pre-commit + Exceptions to PEP8: - Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for @@ -192,25 +202,24 @@ Supported Python Versions We support: -- `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ -.. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version -3.5. Reasons for this include: +3.6. Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ diff --git a/LICENSE b/LICENSE index a8ee855de..d64569567 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ - Apache License + + Apache License Version 2.0, January 2004 - https://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -192,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 0abaf229f..bcd37bbd3 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,9 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} + +/* Ensure minimum width for 'Parameters' / 'Returns' column */ +dl.field-list > dt { + min-width: 100px +} diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index ab2c49227..bbd25fcdb 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index ab2c49227..bbd25fcdb 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/synth.metadata b/synth.metadata index 6b7854860..eb9009391 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,8 +3,8 @@ { "git": { "name": ".", - "remote": "git@github.com:tswast/python-bigquery.git", - "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } } ], @@ -40,5 +40,89 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".pre-commit-config.yaml", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/bigquery_v2/services.rst", + "docs/bigquery_v2/types.rst", + "docs/conf.py", + "google/cloud/bigquery_v2/__init__.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/table_reference.proto", + "google/cloud/bigquery_v2/py.typed", + "google/cloud/bigquery_v2/types/__init__.py", + "google/cloud/bigquery_v2/types/encryption_config.py", + "google/cloud/bigquery_v2/types/model.py", + "google/cloud/bigquery_v2/types/model_reference.py", + "google/cloud/bigquery_v2/types/standard_sql.py", + "google/cloud/bigquery_v2/types/table_reference.py", + "mypy.ini", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/geography/noxfile.py", + "samples/snippets/noxfile.py", + "scripts/decrypt-secrets.sh", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From 64e1c0581b915e36756ea465936939390da7d818 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:52:02 +0100 Subject: [PATCH 118/341] chore(deps): update dependency pytz to v2021 (#502) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pytz](http://pythonhosted.org/pytz) | `==2020.5` -> `==2021.1` | [![age](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/compatibility-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/confidence-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 00d28fa0b..7087121b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,4 +8,4 @@ matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 -pytz==2020.5 +pytz==2021.1 From 3138d41b60be1b0419cc6bc456b381801b418089 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 3 Feb 2021 09:41:41 -0800 Subject: [PATCH 119/341] test: add samples Python 3.9 test session (#506) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * feat: sync v1beta1 GKE API fix: deprecate SetLocations; use UpdateCluster feat: support for sysctls config in Linux nodes feat: support for node kubelet config controlling CPU manager policy, CFS quota feat: support for Customer Managed Encryption in nodes feat: support for SSDs as ephemeral storage feat: support for node reservation affinity feat: support for Gvisor in nodes fix: deprecate basic auth fields (removed in 1.19 clusters) feat: support for NodeLocalDNS feat: support for ConfigConnector feat: support for the Compute Engine Persistent Disk CSI driver feat: support for KALM feat: support for private cluster VPC peering and master global access feat: support for CloudRun load balancers feat: support using routes for pod IPs feat: support for Shielded Nodes feat: support for release channels feat: support for Workload Identity feat: support for Cluster Telemetry feat: support for Cloud TPU feat: support for receiving upgrade notifications feat: support for Confidential Nodes feat: support for disabling default sNAT feat: support for selecting Kubernetes datapath model feat: support for encrypting etcd databases feat: support for configuration of master components fix: deprecate Operation.cluster_conditions and operation_conditions; use error feat: support updating NodePool locations feat: support for node Surge Upgrades feat: support for specifying Cluster Autoscaling profile. feat: support for Node Auto Provisioning feat: support for specifying node disk size and type fix: deprecated StatusCondition.code; use canonical_code docs: many minor documentation clarifications docs: some output only fields now annotated as such PiperOrigin-RevId: 344443035 Source-Author: Google APIs Source-Date: Thu Nov 26 11:27:06 2020 -0800 Source-Repo: googleapis/googleapis Source-Sha: df4fd38d040c5c8a0869936205bca13fb64b2cff Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff * chore: add 3.9 to noxfile template Since the python-docs-samples noxfile-template doesn't sync with this, I wanted to make sure the noxfile template matched the most recent change [here](https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4968/files) cc @tmatsuo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Fri Jan 15 17:24:05 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f Source-Link: https://github.com/googleapis/synthtool/commit/56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f * build(python): make `NOX_SESSION` optional I added this accidentally in #889. `NOX_SESSION` should be passed down if it is set but not marked required. Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Tue Jan 19 09:38:04 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: ba960d730416fe05c50547e975ce79fcee52c671 Source-Link: https://github.com/googleapis/synthtool/commit/ba960d730416fe05c50547e975ce79fcee52c671 * chore: Add header checker config to python library synth Now that we have it working in [python-docs-samples](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/.github/header-checker-lint.yml) we should consider adding it to the 🐍 libraries :) Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Mon Jan 25 13:24:08 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 573f7655311b553a937f9123bee17bf78497db95 Source-Link: https://github.com/googleapis/synthtool/commit/573f7655311b553a937f9123bee17bf78497db95 * build: migrate to flakybot Source-Author: Justin Beckwith Source-Date: Thu Jan 28 22:22:38 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: d1bb9173100f62c0cfc8f3138b62241e7f47ca6a Source-Link: https://github.com/googleapis/synthtool/commit/d1bb9173100f62c0cfc8f3138b62241e7f47ca6a * remove tarball Co-authored-by: Tim Swast --- .github/header-checker-lint.yml | 15 +++++++++++++++ .trampolinerc | 1 - samples/geography/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- synth.metadata | 7 ++++--- 5 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 .github/header-checker-lint.yml diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml new file mode 100644 index 000000000..fc281c05b --- /dev/null +++ b/.github/header-checker-lint.yml @@ -0,0 +1,15 @@ +{"allowedCopyrightHolders": ["Google LLC"], + "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "sourceFileExtensions": [ + "ts", + "js", + "java", + "sh", + "Dockerfile", + "yaml", + "py", + "html", + "txt" + ] +} \ No newline at end of file diff --git a/.trampolinerc b/.trampolinerc index c7d663ae9..383b6ec89 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -18,7 +18,6 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" - "NOX_SESSION" ) # Add env vars which are passed down into the container here. diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index bbd25fcdb..f2320ea00 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index bbd25fcdb..f2320ea00 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/synth.metadata b/synth.metadata index eb9009391..f6dcca132 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" + "sha": "64e1c0581b915e36756ea465936939390da7d818" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } } ], @@ -48,6 +48,7 @@ ".github/ISSUE_TEMPLATE/feature_request.md", ".github/ISSUE_TEMPLATE/support_request.md", ".github/PULL_REQUEST_TEMPLATE.md", + ".github/header-checker-lint.yml", ".github/release-please.yml", ".github/snippet-bot.yml", ".gitignore", From 1823cadee3acf95c516d0479400e4175349ea199 Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Fri, 5 Feb 2021 07:53:15 -0800 Subject: [PATCH 120/341] feat: add mtls support to client (#492) * feat: add mtls feature --- google/cloud/bigquery/_http.py | 21 +++++++++++++++++++-- google/cloud/bigquery/client.py | 25 +++++++++++++++++++------ tests/system/test_client.py | 6 ++++++ tests/unit/helpers.py | 2 ++ tests/unit/test__http.py | 14 ++++++++++++++ tests/unit/test_client.py | 23 +++++++++++++++++++---- 6 files changed, 79 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index 8ee633e64..ede26cc70 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -14,11 +14,23 @@ """Create / interact with Google BigQuery connections.""" +import os +import pkg_resources + from google.cloud import _http from google.cloud.bigquery import __version__ +# TODO: Increase the minimum version of google-cloud-core to 1.6.0 +# and remove this logic. See: +# https://github.com/googleapis/python-bigquery/issues/509 +if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER + release = pkg_resources.get_distribution("google-cloud-core").parsed_version + if release < pkg_resources.parse_version("1.6.0"): + raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") + + class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. @@ -26,13 +38,18 @@ class Connection(_http.JSONConnection): client (google.cloud.bigquery.client.Client): The client that owns the current connection. client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent. + + api_endpoint (str): The api_endpoint to use. If None, the library will decide what endpoint to use. """ DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" + DEFAULT_API_MTLS_ENDPOINT = "https://bigquery.mtls.googleapis.com" - def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT): + def __init__(self, client, client_info=None, api_endpoint=None): super(Connection, self).__init__(client, client_info) - self.API_BASE_URL = api_endpoint + self.API_BASE_URL = api_endpoint or self.DEFAULT_API_ENDPOINT + self.API_BASE_MTLS_URL = self.DEFAULT_API_MTLS_ENDPOINT + self.ALLOW_AUTO_SWITCH_TO_MTLS_URL = api_endpoint is None self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b270075a9..f8c0d7c93 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -78,10 +78,7 @@ _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 -_BASE_UPLOAD_TEMPLATE = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - "{project}/jobs?uploadType=" -) +_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" _MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" _RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" _GENERIC_CONTENT_TYPE = "*/*" @@ -2547,7 +2544,15 @@ def _initiate_resumable_upload( if project is None: project = self.project - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project) # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. @@ -2616,7 +2621,15 @@ def _do_multipart_upload( if project is None: project = self.project - upload_url = _MULTIPART_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index aa1a03160..85c044bad 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -28,6 +28,7 @@ import uuid import psutil +import pytest import pytz import pkg_resources @@ -132,6 +133,8 @@ else: PYARROW_INSTALLED_VERSION = None +MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" + def _has_rows(result): return len(result) > 0 @@ -2651,6 +2654,9 @@ def test_insert_rows_nested_nested_dictionary(self): expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows) + @pytest.mark.skipif( + MTLS_TESTING, reason="mTLS testing has no permission to the max-value.js file" + ) def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index eea345e89..b51b0bbb7 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -21,6 +21,8 @@ def make_connection(*responses): mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + mock_conn.API_BASE_URL = "https://bigquery.googleapis.com" + mock_conn.get_api_base_url_for_mtls = mock.Mock(return_value=mock_conn.API_BASE_URL) return mock_conn diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 78e59cb30..09f6d29d7 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -32,6 +32,9 @@ def _get_target_class(): return Connection def _make_one(self, *args, **kw): + if "api_endpoint" not in kw: + kw["api_endpoint"] = "https://bigquery.googleapis.com" + return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): @@ -138,3 +141,14 @@ def test_extra_headers_replace(self): url=expected_uri, timeout=self._get_default_timeout(), ) + + def test_ctor_mtls(self): + conn = self._make_one(object(), api_endpoint=None) + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, True) + self.assertEqual(conn.API_BASE_URL, "https://bigquery.googleapis.com") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") + + conn = self._make_one(object(), api_endpoint="http://foo") + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, False) + self.assertEqual(conn.API_BASE_URL, "http://foo") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 625256e6e..66add9c0a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2057,6 +2057,7 @@ def test_get_table_sets_user_agent(self): url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) http.reset_mock() + http.is_mtls = False mock_response.status_code = 200 mock_response.json.return_value = self._make_table_resource() user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") @@ -4425,7 +4426,7 @@ def _mock_transport(self, status_code, headers, content=b""): fake_transport.request.return_value = fake_response return fake_transport - def _initiate_resumable_upload_helper(self, num_retries=None): + def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): from google.resumable_media.requests import ResumableUpload from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE @@ -4440,6 +4441,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): fake_transport = self._mock_transport(http.client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") # Create some mock arguments and call the method under test. data = b"goodbye gudbi gootbee" @@ -4454,8 +4457,10 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}" + f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) @@ -4494,11 +4499,14 @@ def _initiate_resumable_upload_helper(self, num_retries=None): def test__initiate_resumable_upload(self): self._initiate_resumable_upload_helper() + def test__initiate_resumable_upload_mtls(self): + self._initiate_resumable_upload_helper(mtls=True) + def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None + self, get_boundary, num_retries=None, project=None, mtls=False ): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob @@ -4508,6 +4516,8 @@ def _do_multipart_upload_success_helper( fake_transport = self._mock_transport(http.client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") if project is None: project = self.PROJECT @@ -4530,8 +4540,9 @@ def _do_multipart_upload_success_helper( self.assertEqual(stream.tell(), size) get_boundary.assert_called_once_with() + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}" + f"{host_name}/upload/bigquery/v2/projects/{project}" "/jobs?uploadType=multipart" ) payload = ( @@ -4556,6 +4567,10 @@ def _do_multipart_upload_success_helper( def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") + def test__do_multipart_upload_mtls(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, mtls=True) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) From 1c6681aba872c00afb16a904a2ba9bae8e9618d3 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 8 Feb 2021 09:16:42 -0800 Subject: [PATCH 121/341] chore(python): include py.typed files in release (#511) A py.typed file must be included in the released package for it to be considered typed by type checkers. https://www.python.org/dev/peps/pep-0561/#packaging-type-information. See https://github.com/googleapis/python-secret-manager/issues/79 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Feb 5 17:32:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 33366574ffb9e11737b3547eb6f020ecae0536e8 Source-Link: https://github.com/googleapis/synthtool/commit/33366574ffb9e11737b3547eb6f020ecae0536e8 --- MANIFEST.in | 4 ++-- synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index e9e29d120..e783f4c62 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,10 +16,10 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE -recursive-include google *.json *.proto +recursive-include google *.json *.proto py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ # Exclude scripts for samples readmegen -prune scripts/readme-gen \ No newline at end of file +prune scripts/readme-gen diff --git a/synth.metadata b/synth.metadata index f6dcca132..1c5fecaf8 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "64e1c0581b915e36756ea465936939390da7d818" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } } ], From 1773dae8dc30b37a74d7ee727e475b45b3b3d2e7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 10 Feb 2021 08:28:01 +0000 Subject: [PATCH 122/341] chore: release 2.8.0 (#510) :robot: I have created a release \*beep\* \*boop\* --- ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) ### Features * add mtls support to client ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) ### Bug Fixes * don't try to close closed cursors ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a58510c66..768b7b036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) + + +### Features + +* Add mTLS support to client. ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) + + +### Bug Fixes + +* Don't try to close closed cursors. ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) + ## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index d962613e0..0a9aecb37 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.7.0" +__version__ = "2.8.0" From 1de05f6cb1cea27cbfa5bc39dc428755464de130 Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Thu, 11 Feb 2021 13:08:39 -0800 Subject: [PATCH 123/341] test: update system test for mtls (#518) --- noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 942525ca9..df36d237e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -113,7 +113,11 @@ def system(session): session.install( "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path ) - session.install("google-cloud-storage", "-c", constraints_path) + if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": + # mTLS test requires pyopenssl and latest google-cloud-storage + session.install("google-cloud-storage", "pyopenssl") + else: + session.install("google-cloud-storage", "-c", constraints_path) session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) From 9902789791d0237dbda4695c781b8a056dbeddc0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 16 Feb 2021 06:49:16 -0800 Subject: [PATCH 124/341] chore: add PARQUET to DestinationFormat enum (#521) --- google/cloud/bigquery/enums.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 2268808fd..db463afdc 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -72,6 +72,9 @@ class DestinationFormat(object): AVRO = "AVRO" """Specifies Avro format.""" + PARQUET = "PARQUET" + """Specifies Parquet format.""" + class Encoding(object): """The character encoding of the data. The default is :attr:`UTF_8`. From eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 08:58:02 -0600 Subject: [PATCH 125/341] docs: clarify `%%bigquery`` magics and fix broken link (#508) --- docs/usage/index.rst | 4 ++-- google/cloud/bigquery/magics/magics.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/usage/index.rst b/docs/usage/index.rst index ff4c9d7f1..1d3cc9f64 100644 --- a/docs/usage/index.rst +++ b/docs/usage/index.rst @@ -29,7 +29,7 @@ Integrations with Other Libraries pandas -See also, the :mod:`google.cloud.bigquery.magics` module for integrations -with Jupyter. +See also, the :mod:`google.cloud.bigquery.magics.magics` module for +integrations with Jupyter. diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 8f343ddcc..6ae7cae12 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -14,6 +14,15 @@ """IPython Magics +To use these magics, you must first register them. Run the ``%load_ext`` magic +in a Jupyter notebook cell. + +.. code:: + + %load_ext google.cloud.bigquery + +This makes the ``%%bigquery`` magic available. + .. function:: %%bigquery IPython cell magic to run a query and display the result as a DataFrame From 3e9430faff7f071600acef295cb5feefe767b954 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 09:25:22 -0600 Subject: [PATCH 126/341] chore: remove redundant view code samples (#437) --- docs/snippets.py | 126 ----------------------------------------------- 1 file changed, 126 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 8c106e63d..3f9b9a88c 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -478,132 +478,6 @@ def test_update_table_cmek(client, to_delete): # [END bigquery_update_table_cmek] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_manage_views(client, to_delete): - project = client.project - source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = bigquery.Dataset(source_dataset_ref) - source_dataset = client.create_dataset(source_dataset) - to_delete.append(source_dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - source_table_id = "us_states" - load_job = client.load_table_from_uri( - uri, source_dataset.table(source_table_id), job_config=job_config - ) - load_job.result() - - shared_dataset_id = "shared_dataset_{}".format(_millis()) - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = bigquery.Dataset(shared_dataset_ref) - shared_dataset = client.create_dataset(shared_dataset) - to_delete.append(shared_dataset) - - # [START bigquery_create_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to create a shared view of a source table of - # US States. The source table contains all 50 states, while the view will - # contain only states with names starting with 'W'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.create_table(view) # API request - - print("Successfully created view at {}".format(view.full_table_id)) - # [END bigquery_create_view] - - # [START bigquery_update_view_query] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to update a shared view of a source table of - # US States. The view's query will be updated to contain only states with - # names starting with 'M'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.update_table(view, ["view_query"]) # API request - # [END bigquery_update_view_query] - - # [START bigquery_get_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # shared_dataset_id = 'my_shared_dataset' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - view_ref = shared_dataset_ref.table("my_shared_view") - view = client.get_table(view_ref) # API Request - - # Display view properties - print("View at {}".format(view.full_table_id)) - print("View Query:\n{}".format(view.view_query)) - # [END bigquery_get_view] - assert view.view_query is not None - - analyst_group_email = "example-analyst-group@google.com" - # [START bigquery_grant_view_access] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Assign access controls to the dataset containing the view - # shared_dataset_id = 'my_shared_dataset' - # analyst_group_email = 'data_analysts@example.com' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = client.get_dataset(shared_dataset_ref) # API request - access_entries = shared_dataset.access_entries - access_entries.append( - bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) - ) - shared_dataset.access_entries = access_entries - shared_dataset = client.update_dataset( - shared_dataset, ["access_entries"] - ) # API request - - # Authorize the view to access the source dataset - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - project = client.project - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = client.get_dataset(source_dataset_ref) # API request - view_reference = { - "projectId": project, - "datasetId": shared_dataset_id, - "tableId": "my_shared_view", - } - access_entries = source_dataset.access_entries - access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) - source_dataset.access_entries = access_entries - source_dataset = client.update_dataset( - source_dataset, ["access_entries"] - ) # API request - # [END bigquery_grant_view_access] - - def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) project = client.project From edd3328fffa3040b2cd3a3c668c90a0e43e4c94c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 17:52:56 +0100 Subject: [PATCH 127/341] feat: add determinism level for javascript UDFs (#522) * feat: add determinism level for javascript UDFs * Add enum-like class for routine determinism level --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/enums.py | 17 ++++++++ google/cloud/bigquery/routine/__init__.py | 29 +++++++++++++ .../cloud/bigquery/{ => routine}/routine.py | 12 ++++++ tests/system/test_client.py | 1 + tests/unit/routine/test_routine.py | 41 ++++++++++++++++++- 7 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 google/cloud/bigquery/routine/__init__.py rename google/cloud/bigquery/{ => routine}/routine.py (97%) diff --git a/docs/reference.rst b/docs/reference.rst index 3643831cb..6b802e2a5 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -110,6 +110,7 @@ Routine .. autosummary:: :toctree: generated + routine.DeterminismLevel routine.Routine routine.RoutineArgument routine.RoutineReference diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 41f987228..29d375b03 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import DeterminismLevel from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -134,6 +135,7 @@ "Compression", "CreateDisposition", "DestinationFormat", + "DeterminismLevel", "ExternalSourceFormat", "Encoding", "QueryPriority", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index db463afdc..e353b3132 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -234,3 +234,20 @@ class WriteDisposition(object): WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" + + +class DeterminismLevel: + """Specifies determinism level for JavaScript user-defined functions (UDFs). + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#DeterminismLevel + """ + + DETERMINISM_LEVEL_UNSPECIFIED = "DETERMINISM_LEVEL_UNSPECIFIED" + """The determinism of the UDF is unspecified.""" + + DETERMINISTIC = "DETERMINISTIC" + """The UDF is deterministic, meaning that 2 function calls with the same inputs + always produce the same result, even across 2 query runs.""" + + NOT_DETERMINISTIC = "NOT_DETERMINISTIC" + """The UDF is not deterministic.""" diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py new file mode 100644 index 000000000..d1c79b05e --- /dev/null +++ b/google/cloud/bigquery/routine/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""User-Defined Routines.""" + + +from google.cloud.bigquery.enums import DeterminismLevel +from google.cloud.bigquery.routine.routine import Routine +from google.cloud.bigquery.routine.routine import RoutineArgument +from google.cloud.bigquery.routine.routine import RoutineReference + + +__all__ = ( + "DeterminismLevel", + "Routine", + "RoutineArgument", + "RoutineReference", +) diff --git a/google/cloud/bigquery/routine.py b/google/cloud/bigquery/routine/routine.py similarity index 97% rename from google/cloud/bigquery/routine.py rename to google/cloud/bigquery/routine/routine.py index f26f20886..103799e8f 100644 --- a/google/cloud/bigquery/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -50,6 +50,7 @@ class Routine(object): "return_type": "returnType", "type_": "routineType", "description": "description", + "determinism_level": "determinismLevel", } def __init__(self, routine_ref, **kwargs): @@ -253,6 +254,17 @@ def description(self): def description(self, value): self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value + @property + def determinism_level(self): + """Optional[str]: (experimental) The determinism level of the JavaScript UDF + if defined. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["determinism_level"]) + + @determinism_level.setter + def determinism_level(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a routine given its API representation. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 85c044bad..60c3b3fa8 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2682,6 +2682,7 @@ def test_create_routine(self): ) ] routine.body = "return maxValue(arr)" + routine.determinism_level = bigquery.DeterminismLevel.DETERMINISTIC query_string = "SELECT `{}`([-100.0, 3.14, 100.0, 42.0]) as max_value;".format( str(routine.reference) ) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index b02ace1db..0a59e7c5f 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -18,6 +18,7 @@ import pytest import google.cloud._helpers +from google.cloud import bigquery from google.cloud import bigquery_v2 @@ -73,6 +74,7 @@ def test_ctor_w_properties(target_class): ) type_ = "SCALAR_FUNCTION" description = "A routine description." + determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC actual_routine = target_class( routine_id, @@ -82,6 +84,7 @@ def test_ctor_w_properties(target_class): return_type=return_type, type_=type_, description=description, + determinism_level=determinism_level, ) ref = RoutineReference.from_string(routine_id) @@ -92,6 +95,9 @@ def test_ctor_w_properties(target_class): assert actual_routine.return_type == return_type assert actual_routine.type_ == type_ assert actual_routine.description == description + assert ( + actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC + ) def test_from_api_repr(target_class): @@ -120,6 +126,7 @@ def test_from_api_repr(target_class): "routineType": "SCALAR_FUNCTION", "someNewField": "someValue", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, } actual_routine = target_class.from_api_repr(resource) @@ -152,6 +159,7 @@ def test_from_api_repr(target_class): assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" def test_from_api_repr_w_minimal_resource(target_class): @@ -177,6 +185,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.return_type is None assert actual_routine.type_ is None assert actual_routine.description is None + assert actual_routine.determinism_level is None def test_from_api_repr_w_unknown_fields(target_class): @@ -208,6 +217,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["arguments"], {"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]}, @@ -220,6 +230,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["body"], {"definitionBody": "x * 3"}, @@ -232,6 +243,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["language"], {"language": "SQL"}, @@ -244,6 +256,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["return_type"], {"returnType": {"typeKind": "INT64"}}, @@ -256,6 +269,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["type_"], {"routineType": "SCALAR_FUNCTION"}, @@ -268,13 +282,37 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["description"], {"description": "A routine description."}, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["determinism_level"], + { + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED + }, + ), ( {}, - ["arguments", "language", "body", "type_", "return_type", "description"], + [ + "arguments", + "language", + "body", + "type_", + "return_type", + "description", + "determinism_level", + ], { "arguments": None, "definitionBody": None, @@ -282,6 +320,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": None, "routineType": None, "description": None, + "determinismLevel": None, }, ), ( From 4ffb4e067abdaa54dad6eff49a7fbdb0fa358637 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 18:30:58 +0100 Subject: [PATCH 128/341] feat: expose reservation usage stats on jobs (#524) * feat: expose reservation usage stats on jobs * Add ReservationUsage to job types in docs * Remove redundant space in docstring. --- docs/reference.rst | 1 + google/cloud/bigquery/job/__init__.py | 2 ++ google/cloud/bigquery/job/base.py | 27 +++++++++++++++++++++++++++ tests/unit/job/test_base.py | 24 ++++++++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index 6b802e2a5..52d916f96 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -62,6 +62,7 @@ Job-Related Types job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority + job.ReservationUsage job.SourceFormat job.WriteDisposition job.SchemaUpdateOption diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 26ecf8d3c..4945841d9 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame from google.cloud.bigquery.job.base import UnknownJob @@ -51,6 +52,7 @@ "_DONE_STATE", "_JobConfig", "_JobReference", + "ReservationUsage", "ScriptStatistics", "ScriptStackFrame", "UnknownJob", diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5ba01aa67..d8f5d6528 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -14,6 +14,7 @@ """Base classes and helpers for job classes.""" +from collections import namedtuple import copy import http import threading @@ -73,6 +74,16 @@ def _error_result_to_exception(error_result): ) +ReservationUsage = namedtuple("ReservationUsage", "name slot_ms") +ReservationUsage.__doc__ = "Job resource usage for a reservation." +ReservationUsage.name.__doc__ = ( + 'Reservation name or "unreserved" for on-demand resources usage.' +) +ReservationUsage.slot_ms.__doc__ = ( + "Total slot milliseconds used by the reservation for a particular job." +) + + class _JobReference(object): """A reference to a job. @@ -305,6 +316,22 @@ def _job_statistics(self): statistics = self._properties.get("statistics", {}) return statistics.get(self._JOB_TYPE, {}) + @property + def reservation_usage(self): + """Job resource usage breakdown by reservation. + + Returns: + List[google.cloud.bigquery.job.ReservationUsage]: + Reservation usage stats. Can be empty if not set from the server. + """ + usage_stats_raw = _helpers._get_sub_prop( + self._properties, ["statistics", "reservationUsage"], default=() + ) + return [ + ReservationUsage(name=usage["name"], slot_ms=int(usage["slotMs"])) + for usage in usage_stats_raw + ] + @property def error_result(self): """Error information about the job as a whole. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 44bbc2c77..bbeffba50 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -319,6 +319,30 @@ def test_ended(self): stats["endTime"] = millis self.assertEqual(job.ended, now) + def test_reservation_usage_no_stats(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = {} + self.assertEqual(job.reservation_usage, []) + + def test_reservation_usage_stats_exist(self): + from google.cloud.bigquery.job import ReservationUsage + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = { + "reservationUsage": [ + {"name": "slot_foo", "slotMs": "42"}, + {"name": "slot_bar", "slotMs": "123"}, + ], + } + + expected = [ + ReservationUsage(name="slot_foo", slot_ms=42), + ReservationUsage(name="slot_bar", slot_ms=123), + ] + self.assertEqual(job.reservation_usage, expected) + def test__job_statistics(self): statistics = {"foo": "bar"} client = _make_client(project=self.PROJECT) From 01e851d00fc17a780375580776753d78f6d74174 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 17 Feb 2021 12:06:05 -0800 Subject: [PATCH 129/341] docs: update python contributing guide (#514) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. Source-Link: https://github.com/googleapis/synthtool/commit/4679e7e415221f03ff2a71e3ffad75b9ec41d87e PiperOrigin-RevId: 344443035 Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff --- CONTRIBUTING.rst | 22 ++++++++++++++++++---- synth.metadata | 7 ++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 15bcd2e28..a0e330e44 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -70,9 +70,14 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: $ nox -s unit-2.7 - $ nox -s unit-3.7 + $ nox -s unit-3.8 $ ... +- Args to pytest can be passed through the nox command separated by a `--`. For + example, to run a single test:: + + $ nox -s unit-3.8 -- -k + .. note:: The unit tests and system tests are described in the @@ -93,8 +98,12 @@ On Debian/Ubuntu:: ************ Coding Style ************ +- We use the automatic code formatter ``black``. You can run it using + the nox session ``blacken``. This will eliminate many lint errors. Run via:: + + $ nox -s blacken -- PEP8 compliance, with exceptions defined in the linter configuration. +- PEP8 compliance is required, with exceptions defined in the linter configuration. If you have ``nox`` installed, you can test that you have not introduced any non-compliant code via:: @@ -133,13 +142,18 @@ Running System Tests - To run system tests, you can execute:: - $ nox -s system-3.7 + # Run all system tests + $ nox -s system-3.8 $ nox -s system-2.7 + # Run a single system test + $ nox -s system-3.8 -- -k + + .. note:: System tests are only configured to run under Python 2.7 and - Python 3.7. For expediency, we do not run them in older versions + Python 3.8. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local diff --git a/synth.metadata b/synth.metadata index 1c5fecaf8..f91ffab69 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } } ], @@ -92,6 +92,7 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", + "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/services.rst", From 4c9947d7578ee9d03d36b464dd4b627545bfb09e Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 18 Feb 2021 08:18:03 -0800 Subject: [PATCH 130/341] chore: update automation naming, smaller generated code fixes (#505) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 350246057 Source-Link: https://github.com/googleapis/googleapis/commit/520682435235d9c503983a360a2090025aa47cd1 PiperOrigin-RevId: 347055288 Source-Link: https://github.com/googleapis/googleapis/commit/dd372aa22ded7a8ba6f0e03a80e06358a3fa0907 --- .coveragerc | 34 +-- .gitignore | 4 +- .kokoro/build.sh | 10 + docs/bigquery_v2/services.rst | 6 - docs/bigquery_v2/types.rst | 1 + google/cloud/bigquery_v2/types/__init__.py | 1 - .../bigquery_v2/types/encryption_config.py | 2 +- google/cloud/bigquery_v2/types/model.py | 216 +++++++++--------- .../cloud/bigquery_v2/types/standard_sql.py | 10 +- synth.metadata | 12 +- synth.py | 2 + 11 files changed, 144 insertions(+), 154 deletions(-) delete mode 100644 docs/bigquery_v2/services.rst diff --git a/.coveragerc b/.coveragerc index 0d8e6297d..23861a8eb 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,38 +1,18 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generated by synthtool. DO NOT EDIT! [run] branch = True -omit = - google/cloud/__init__.py [report] fail_under = 100 show_missing = True +omit = + google/cloud/bigquery/__init__.py exclude_lines = # Re-enable the standard pragma pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore abstract methods - raise NotImplementedError -omit = - */gapic/*.py - */proto/*.py - */core/*.py - */site-packages/*.py - google/cloud/__init__.py + # Ignore pkg_resources exceptions. + # This is added at the module level as a safeguard for if someone + # generates the code and tries to run it without pip installing. This + # makes it virtually impossible to test properly. + except pkg_resources.DistributionNotFound diff --git a/.gitignore b/.gitignore index b9daa52f1..b4243ced7 100644 --- a/.gitignore +++ b/.gitignore @@ -50,8 +50,10 @@ docs.metadata # Virtual environment env/ + +# Test logs coverage.xml -sponge_log.xml +*sponge_log.xml # System test environment variables. system_tests/local_test_setup diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 058f363e1..302cc1e1a 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -40,6 +40,16 @@ python3 -m pip uninstall --yes --quiet nox-automation python3 -m pip install --upgrade --quiet nox python3 -m nox --version +# If this is a continuous build, send the test log to the FlakyBot. +# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then + cleanup() { + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + } + trap cleanup EXIT HUP +fi + # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then diff --git a/docs/bigquery_v2/services.rst b/docs/bigquery_v2/services.rst deleted file mode 100644 index 65fbb438c..000000000 --- a/docs/bigquery_v2/services.rst +++ /dev/null @@ -1,6 +0,0 @@ -Services for Google Cloud Bigquery v2 API -========================================= - -.. automodule:: google.cloud.bigquery_v2.services.model_service - :members: - :inherited-members: diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery_v2/types.rst index 41b906514..c36a83e0b 100644 --- a/docs/bigquery_v2/types.rst +++ b/docs/bigquery_v2/types.rst @@ -3,4 +3,5 @@ Types for Google Cloud Bigquery v2 API .. automodule:: google.cloud.bigquery_v2.types :members: + :undoc-members: :show-inheritance: diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 1e354641a..00dc837c9 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -32,7 +32,6 @@ ListModelsResponse, ) - __all__ = ( "EncryptionConfiguration", "ModelReference", diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py index 6fb90f340..2d801bde3 100644 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -30,7 +30,7 @@ class EncryptionConfiguration(proto.Message): r""" Attributes: - kms_key_name (~.wrappers.StringValue): + kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index c3530dec2..8ae158b64 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -45,7 +45,7 @@ class Model(proto.Message): Attributes: etag (str): Output only. A hash of this resource. - model_reference (~.gcb_model_reference.ModelReference): + model_reference (google.cloud.bigquery_v2.types.ModelReference): Required. Unique identifier for this model. creation_time (int): Output only. The time when this model was @@ -58,7 +58,7 @@ class Model(proto.Message): model. friendly_name (str): Optional. A descriptive name for this model. - labels (Sequence[~.gcb_model.Model.LabelsEntry]): + labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): The labels associated with this model. You can use these to organize and group your models. Label keys and values can be no longer than 63 @@ -81,22 +81,22 @@ class Model(proto.Message): Output only. The geographic location where the model resides. This value is inherited from the dataset. - encryption_configuration (~.encryption_config.EncryptionConfiguration): + encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration): Custom encryption configuration (e.g., Cloud KMS keys). This shows the encryption configuration of the model data while stored in BigQuery storage. This field can be used with PatchModel to update encryption key for an already encrypted model. - model_type (~.gcb_model.Model.ModelType): + model_type (google.cloud.bigquery_v2.types.Model.ModelType): Output only. Type of the model resource. - training_runs (Sequence[~.gcb_model.Model.TrainingRun]): + training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]): Output only. Information for all training runs in increasing order of start_time. - feature_columns (Sequence[~.standard_sql.StandardSqlField]): + feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Input feature columns that were used to train this model. - label_columns (Sequence[~.standard_sql.StandardSqlField]): + label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. @@ -280,15 +280,15 @@ class RegressionMetrics(proto.Message): matrix factorization models. Attributes: - mean_absolute_error (~.wrappers.DoubleValue): + mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Mean absolute error. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared error. - mean_squared_log_error (~.wrappers.DoubleValue): + mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared log error. - median_absolute_error (~.wrappers.DoubleValue): + median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. - r_squared (~.wrappers.DoubleValue): + r_squared (google.protobuf.wrappers_pb2.DoubleValue): R^2 score. """ @@ -319,33 +319,33 @@ class AggregateClassificationMetrics(proto.Message): by counting the total number of correctly predicted rows. Attributes: - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): Precision is the fraction of actual positive predictions that had positive actual labels. For multiclass this is a macro-averaged metric treating each class as a binary classifier. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): Recall is the fraction of actual positive labels that were given a positive prediction. For multiclass this is a macro-averaged metric. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): Accuracy is the fraction of predictions given the correct label. For multiclass this is a micro-averaged metric. - threshold (~.wrappers.DoubleValue): + threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold at which the metrics are computed. For binary classification models this is the positive class threshold. For multi-class classfication models this is the confidence threshold. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The F1 score is an average of recall and precision. For multiclass this is a macro- averaged metric. - log_loss (~.wrappers.DoubleValue): + log_loss (google.protobuf.wrappers_pb2.DoubleValue): Logarithmic Loss. For multiclass this is a macro-averaged metric. - roc_auc (~.wrappers.DoubleValue): + roc_auc (google.protobuf.wrappers_pb2.DoubleValue): Area Under a ROC Curve. For multiclass this is a macro-averaged metric. """ @@ -369,9 +369,9 @@ class BinaryClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): Binary confusion matrix at multiple thresholds. positive_label (str): @@ -384,27 +384,27 @@ class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. Attributes: - positive_class_threshold (~.wrappers.DoubleValue): + positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of the following metric. - true_positives (~.wrappers.Int64Value): + true_positives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as true. - false_positives (~.wrappers.Int64Value): + false_positives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as true. - true_negatives (~.wrappers.Int64Value): + true_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as false. - false_negatives (~.wrappers.Int64Value): + false_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as false. - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive predictions that had positive actual labels. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive labels that were given a positive prediction. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The equally weighted average of recall and precision. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): The fraction of predictions given the correct label. """ @@ -462,9 +462,9 @@ class MultiClassClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]): Confusion matrix at different thresholds. """ @@ -472,10 +472,10 @@ class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. Attributes: - confidence_threshold (~.wrappers.DoubleValue): + confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the entries of the confusion matrix. - rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): One row per actual label. """ @@ -487,7 +487,7 @@ class Entry(proto.Message): The predicted label. For confidence_threshold > 0, we will also add an entry indicating the number of items under the confidence threshold. - item_count (~.wrappers.Int64Value): + item_count (google.protobuf.wrappers_pb2.Int64Value): Number of items being predicted as this label. """ @@ -504,7 +504,7 @@ class Row(proto.Message): Attributes: actual_label (str): The original label of this row. - entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): Info describing predicted label distribution. """ @@ -540,12 +540,12 @@ class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. Attributes: - davies_bouldin_index (~.wrappers.DoubleValue): + davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. - mean_squared_distance (~.wrappers.DoubleValue): + mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue): Mean of squared distances between each sample to its cluster centroid. - clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]): + clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): [Beta] Information for all clusters. """ @@ -555,10 +555,10 @@ class Cluster(proto.Message): Attributes: centroid_id (int): Centroid id. - feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]): + feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]): Values of highly variant features for this cluster. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): Count of training data rows that were assigned to this cluster. """ @@ -569,10 +569,10 @@ class FeatureValue(proto.Message): Attributes: feature_column (str): The feature column name. - numerical_value (~.wrappers.DoubleValue): + numerical_value (google.protobuf.wrappers_pb2.DoubleValue): The numerical feature value. This is the centroid value for this feature. - categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): The categorical feature value. """ @@ -580,7 +580,7 @@ class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. Attributes: - category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If there are more than ten categories, we return top ten (by count) and return one more CategoryCount with category @@ -594,7 +594,7 @@ class CategoryCount(proto.Message): Attributes: category (str): The name of category. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): The count of training samples matching the category within the cluster. """ @@ -654,23 +654,23 @@ class RankingMetrics(proto.Message): feedback_type=implicit. Attributes: - mean_average_precision (~.wrappers.DoubleValue): + mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue): Calculates a precision per user for all the items by ranking them and then averages all the precisions across all the users. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Similar to the mean squared error computed in regression and explicit recommendation models except instead of computing the rating directly, the output from evaluate is computed against a preference which is 1 or 0 depending on if the rating exists or not. - normalized_discounted_cumulative_gain (~.wrappers.DoubleValue): + normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue): A metric to determine the goodness of a ranking calculated from the predicted confidence by comparing it to an ideal rank measured by the original ratings. - average_rank (~.wrappers.DoubleValue): + average_rank (google.protobuf.wrappers_pb2.DoubleValue): Determines the goodness of a ranking by computing the percentile rank from the predicted confidence and dividing it by the original rank. @@ -696,11 +696,11 @@ class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. Attributes: - non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]): + non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. - arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]): + arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]): Arima model fitting metrics. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. has_drift (Sequence[bool]): @@ -709,7 +709,7 @@ class ArimaForecastingMetrics(proto.Message): time_series_id (Sequence[str]): Id to differentiate different time series for the large-scale case. - arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): Repeated as there can be many metric sets (one for each model) in auto-arima and the large-scale case. @@ -720,16 +720,16 @@ class ArimaSingleModelForecastingMetrics(proto.Message): model. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -779,21 +779,21 @@ class EvaluationMetrics(proto.Message): imported models. Attributes: - regression_metrics (~.gcb_model.Model.RegressionMetrics): + regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): Populated for regression models and explicit feedback type matrix factorization models. - binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics): + binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): Populated for binary classification/classifier models. - multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics): + multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): Populated for multi-class classification/classifier models. - clustering_metrics (~.gcb_model.Model.ClusteringMetrics): + clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): Populated for clustering models. - ranking_metrics (~.gcb_model.Model.RankingMetrics): + ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): Populated for implicit feedback type matrix factorization models. - arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics): + arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): Populated for ARIMA models. """ @@ -835,10 +835,10 @@ class DataSplitResult(proto.Message): and evaluation data tables that were used to train the model. Attributes: - training_table (~.table_reference.TableReference): + training_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the training data after split. - evaluation_table (~.table_reference.TableReference): + evaluation_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the evaluation data after split. """ @@ -893,7 +893,7 @@ class GlobalExplanation(proto.Message): features after training. Attributes: - explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]): + explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]): A list of the top global explanations. Sorted by absolute value of attribution in descending order. @@ -913,7 +913,7 @@ class Explanation(proto.Message): be formatted like .. Overall size of feature name will always be truncated to first 120 characters. - attribution (~.wrappers.DoubleValue): + attribution (google.protobuf.wrappers_pb2.DoubleValue): Attribution of feature. """ @@ -933,22 +933,22 @@ class TrainingRun(proto.Message): r"""Information about a single training query run for the model. Attributes: - training_options (~.gcb_model.Model.TrainingRun.TrainingOptions): + training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, includes user specified and default options that were used. - start_time (~.timestamp.Timestamp): + start_time (google.protobuf.timestamp_pb2.Timestamp): The start time of this training run. - results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]): + results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]): Output of each iteration run, results.size() <= max_iterations. - evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): + evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics): The evaluation metrics over training/eval data that were computed at the end of training. - data_split_result (~.gcb_model.Model.DataSplitResult): + data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult): Data split result of the training run. Only set when the input data is actually split. - global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]): + global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]): Global explanations for important features of the model. For multi-class models, there is one entry for each label class. For other models, @@ -962,30 +962,30 @@ class TrainingOptions(proto.Message): max_iterations (int): The maximum number of iterations in training. Used only for iterative training algorithms. - loss_type (~.gcb_model.Model.LossType): + loss_type (google.cloud.bigquery_v2.types.Model.LossType): Type of loss function used during training run. learn_rate (float): Learning rate in training. Used only for iterative training algorithms. - l1_regularization (~.wrappers.DoubleValue): + l1_regularization (google.protobuf.wrappers_pb2.DoubleValue): L1 regularization coefficient. - l2_regularization (~.wrappers.DoubleValue): + l2_regularization (google.protobuf.wrappers_pb2.DoubleValue): L2 regularization coefficient. - min_relative_progress (~.wrappers.DoubleValue): + min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue): When early_stop is true, stops training when accuracy improvement is less than 'min_relative_progress'. Used only for iterative training algorithms. - warm_start (~.wrappers.BoolValue): + warm_start (google.protobuf.wrappers_pb2.BoolValue): Whether to train a model from the last checkpoint. - early_stop (~.wrappers.BoolValue): + early_stop (google.protobuf.wrappers_pb2.BoolValue): Whether to stop early when the loss doesn't improve significantly any more (compared to min_relative_progress). Used only for iterative training algorithms. input_label_columns (Sequence[str]): Name of input label columns in training data. - data_split_method (~.gcb_model.Model.DataSplitMethod): + data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod): The data split type for training and evaluation, e.g. RANDOM. data_split_eval_fraction (float): @@ -1007,13 +1007,13 @@ class TrainingOptions(proto.Message): and the rest are eval data. It respects the order in Orderable data types: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy): + learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy): The strategy to determine learn rate for the current iteration. initial_learn_rate (float): Specifies the initial learning rate for the line search learn rate strategy. - label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. @@ -1023,21 +1023,21 @@ class TrainingOptions(proto.Message): item_column (str): Item column specified for matrix factorization models. - distance_type (~.gcb_model.Model.DistanceType): + distance_type (google.cloud.bigquery_v2.types.Model.DistanceType): Distance type for clustering models. num_clusters (int): Number of clusters for clustering models. model_uri (str): [Beta] Google Cloud Storage URI from which the model was imported. Only applicable for imported models. - optimization_strategy (~.gcb_model.Model.OptimizationStrategy): + optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. hidden_units (Sequence[int]): Hidden units for dnn models. batch_size (int): Batch size for dnn models. - dropout (~.wrappers.DoubleValue): + dropout (google.protobuf.wrappers_pb2.DoubleValue): Dropout probability for dnn models. max_tree_depth (int): Maximum depth of a tree for boosted tree @@ -1046,18 +1046,18 @@ class TrainingOptions(proto.Message): Subsample fraction of the training data to grow tree to prevent overfitting for boosted tree models. - min_split_loss (~.wrappers.DoubleValue): + min_split_loss (google.protobuf.wrappers_pb2.DoubleValue): Minimum split loss for boosted tree models. num_factors (int): Num factors specified for matrix factorization models. - feedback_type (~.gcb_model.Model.FeedbackType): + feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType): Feedback type that specifies which algorithm to run for matrix factorization. - wals_alpha (~.wrappers.DoubleValue): + wals_alpha (google.protobuf.wrappers_pb2.DoubleValue): Hyperparameter for matrix factoration when implicit feedback type is specified. - kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): + kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod): The method used to initialize the centroids for kmeans algorithm. kmeans_initialization_column (str): @@ -1071,16 +1071,16 @@ class TrainingOptions(proto.Message): for ARIMA model. auto_arima (bool): Whether to enable auto ARIMA or not. - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): A specification of the non-seasonal part of the ARIMA model: the three components (p, d, q) are the AR order, the degree of differencing, and the MA order. - data_frequency (~.gcb_model.Model.DataFrequency): + data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency): The data frequency of a time series. include_drift (bool): Include drift when fitting an ARIMA model. - holiday_region (~.gcb_model.Model.HolidayRegion): + holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion): The geographical region based on which the holidays are considered in time series modeling. If a valid value is specified, then holiday @@ -1226,23 +1226,23 @@ class IterationResult(proto.Message): r"""Information about a single iteration of the training run. Attributes: - index (~.wrappers.Int32Value): + index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. - duration_ms (~.wrappers.Int64Value): + duration_ms (google.protobuf.wrappers_pb2.Int64Value): Time taken to run the iteration in milliseconds. - training_loss (~.wrappers.DoubleValue): + training_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the training data at the end of iteration. - eval_loss (~.wrappers.DoubleValue): + eval_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the eval data at the end of iteration. learn_rate (float): Learn rate used for this iteration. - cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): + cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]): Information about top clusters for clustering models. - arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult): + arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult): """ @@ -1252,10 +1252,10 @@ class ClusterInfo(proto.Message): Attributes: centroid_id (int): Centroid id. - cluster_radius (~.wrappers.DoubleValue): + cluster_radius (google.protobuf.wrappers_pb2.DoubleValue): Cluster radius, the average distance from centroid to each point assigned to the cluster. - cluster_size (~.wrappers.Int64Value): + cluster_size (google.protobuf.wrappers_pb2.Int64Value): Cluster size, the total number of points assigned to the cluster. """ @@ -1276,11 +1276,11 @@ class ArimaResult(proto.Message): iteration results. Attributes: - arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): This message is repeated because there are multiple arima models fitted in auto-arima. For non-auto-arima model, its size is one. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1314,18 +1314,18 @@ class ArimaModelInfo(proto.Message): r"""Arima model information. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): Arima coefficients. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1482,7 +1482,7 @@ class PatchModelRequest(proto.Message): Required. Dataset ID of the model to patch. model_id (str): Required. Model ID of the model to patch. - model (~.gcb_model.Model): + model (google.cloud.bigquery_v2.types.Model): Required. Patched model. Follows RFC5789 patch semantics. Missing fields are not updated. To clear a field, explicitly @@ -1525,7 +1525,7 @@ class ListModelsRequest(proto.Message): Required. Project ID of the models to list. dataset_id (str): Required. Dataset ID of the models to list. - max_results (~.wrappers.UInt32Value): + max_results (google.protobuf.wrappers_pb2.UInt32Value): The maximum number of results to return in a single response page. Leverage the page tokens to iterate through the entire collection. @@ -1547,7 +1547,7 @@ class ListModelsResponse(proto.Message): r""" Attributes: - models (Sequence[~.gcb_model.Model]): + models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields are populated: model_reference, model_type, creation_time, last_modified_time and labels. diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 80e4632f7..3bc6afedc 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -33,13 +33,13 @@ class StandardSqlDataType(proto.Message): array_element_type="DATE"}} ]}} Attributes: - type_kind (~.standard_sql.StandardSqlDataType.TypeKind): + type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): Required. The top level type of this field. Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - array_element_type (~.standard_sql.StandardSqlDataType): + array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): The type of the array's elements, if type_kind = "ARRAY". - struct_type (~.standard_sql.StandardSqlStructType): + struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): The fields of this struct, in order, if type_kind = "STRUCT". """ @@ -80,7 +80,7 @@ class StandardSqlField(proto.Message): name (str): Optional. The name of this field. Can be absent for struct fields. - type (~.standard_sql.StandardSqlDataType): + type (google.cloud.bigquery_v2.types.StandardSqlDataType): Optional. The type of this parameter. Absent if not explicitly specified (e.g., CREATE FUNCTION statement can omit the return type; in @@ -97,7 +97,7 @@ class StandardSqlStructType(proto.Message): r""" Attributes: - fields (Sequence[~.standard_sql.StandardSqlField]): + fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): """ diff --git a/synth.metadata b/synth.metadata index f91ffab69..dc183a72e 100644 --- a/synth.metadata +++ b/synth.metadata @@ -11,22 +11,22 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff", - "internalRef": "344443035" + "sha": "e13001be33d69042a9505e698f792587a804a5cf", + "internalRef": "358152223" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } } ], @@ -42,6 +42,7 @@ } ], "generatedFiles": [ + ".coveragerc", ".flake8", ".github/CONTRIBUTING.md", ".github/ISSUE_TEMPLATE/bug_report.md", @@ -95,6 +96,7 @@ "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", + "docs/bigquery_v2/model_service.rst", "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", @@ -127,4 +129,4 @@ "setup.cfg", "testing/.gitignore" ] -} \ No newline at end of file +} diff --git a/synth.py b/synth.py index 341c5832f..3ab271c96 100644 --- a/synth.py +++ b/synth.py @@ -33,6 +33,8 @@ library, excludes=[ "docs/index.rst", + "docs/bigquery_v2/*_service.rst", + "docs/bigquery_v2/services.rst", "README.rst", "noxfile.py", "setup.py", From 1862de798e09b81c9bbbf06b00a438b5f57daf79 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 10:52:56 -0600 Subject: [PATCH 131/341] chore: release 2.9.0 (#526) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 768b7b036..51fad831e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) + + +### Features + +* add determinism level for javascript UDFs ([#522](https://www.github.com/googleapis/python-bigquery/issues/522)) ([edd3328](https://www.github.com/googleapis/python-bigquery/commit/edd3328fffa3040b2cd3a3c668c90a0e43e4c94c)) +* expose reservation usage stats on jobs ([#524](https://www.github.com/googleapis/python-bigquery/issues/524)) ([4ffb4e0](https://www.github.com/googleapis/python-bigquery/commit/4ffb4e067abdaa54dad6eff49a7fbdb0fa358637)) + + +### Documentation + +* clarify `%%bigquery`` magics and fix broken link ([#508](https://www.github.com/googleapis/python-bigquery/issues/508)) ([eedf93b](https://www.github.com/googleapis/python-bigquery/commit/eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8)) +* update python contributing guide ([#514](https://www.github.com/googleapis/python-bigquery/issues/514)) ([01e851d](https://www.github.com/googleapis/python-bigquery/commit/01e851d00fc17a780375580776753d78f6d74174)) + ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0a9aecb37..b2a8c5535 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.8.0" +__version__ = "2.9.0" From 60fbf287b0d34d5db2e61cce7a5b42735ed43d0e Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Fri, 19 Feb 2021 03:20:15 -0800 Subject: [PATCH 132/341] docs(bigquery): Add alternative approach to setting credentials (#517) * docs(bigquery): Add alternative approach to setting credentials * docs(bigquery): Add alternative approach to setting credentials Correction: json object rather than string * Remove trailing space Co-authored-by: Peter Lamut --- samples/snippets/authenticate_service_account.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py index 58cd2b542..c07848bee 100644 --- a/samples/snippets/authenticate_service_account.py +++ b/samples/snippets/authenticate_service_account.py @@ -30,6 +30,11 @@ def main(): key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) + # Alternatively, use service_account.Credentials.from_service_account_info() + # to set credentials directly via a json object rather than set a filepath + # TODO(developer): Set key_json to the content of the service account key file. + # credentials = service_account.Credentials.from_service_account_info(key_json) + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) # [END bigquery_client_json_credentials] return client From 02147545c23cc135c14747971239e480bead4f9b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 19:53:11 +0100 Subject: [PATCH 133/341] chore(deps): update dependency google-cloud-bigquery to v2.9.0 (#515) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f9306af2..e9fcfca03 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7087121b5..9def04cb8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From bb9a94c3c8414d49f1e3bed31a810b371e3011be Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 20:06:05 +0100 Subject: [PATCH 134/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.3.0 (#529) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.2.1` -> `==2.3.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/compatibility-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/confidence-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.3.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​230-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev221v230-2021-02-18) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.2.1...v2.3.0) ##### Features - add `client_cert_source_for_mtls` argument to transports ([#​135](https://www.github.com/googleapis/python-bigquery-storage/issues/135)) ([072850d](https://www.github.com/googleapis/python-bigquery-storage/commit/072850dd341909fdc22f330117a17e48da12fdd1)) ##### Documentation - update python contributing guide ([#​140](https://www.github.com/googleapis/python-bigquery-storage/issues/140)) ([1671056](https://www.github.com/googleapis/python-bigquery-storage/commit/1671056bfe181660440b1bf4415005e3eed01eb2)) ##### [2.2.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.2.0...v2.2.1) (2021-01-25) ##### Documentation - remove required session variable to fix publish ([#​124](https://www.github.com/googleapis/python-bigquery-storage/issues/124)) ([19a105c](https://www.github.com/googleapis/python-bigquery-storage/commit/19a105cb9c868bb1a9e63966609a2488876f511b))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9def04cb8..c638178fc 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.9.0 -google-cloud-bigquery-storage==2.2.1 +google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' From 696c443f0a6740be0767e12b706a7771bc1460c3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 10:14:26 +0100 Subject: [PATCH 135/341] docs: explain retry behavior for DONE jobs (#532) * docs: explain retry behavior for DONE jobs * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Swast --- google/cloud/bigquery/job/base.py | 8 ++++++-- google/cloud/bigquery/job/query.py | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index d8f5d6528..f24e972c8 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -614,7 +614,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Checks if the job is complete. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -635,7 +637,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d87f87f52..b3ca8d940 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -979,7 +979,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Args: retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. + How to retry the call that retrieves query results. If the job state is + ``DONE``, retrying is aborted early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1128,7 +1129,9 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. + How to retry the call that retrieves rows. If the job state is + ``DONE``, retrying is aborted early even if the results are not + available, as this will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. From cc3394f80934419eb00c2029bb81c92a696e7d88 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 18:33:11 +0100 Subject: [PATCH 136/341] feat: add BIGNUMERIC support (#527) * feat: add support of BIGNUMERIC * feat: add BIGNUMERIC support * Add bignumeric_type extra * Add additional BIGNUMERIC tests * Prevent import time error if no BIGNUMERIC support * Add/improve a few comments * Add feature flag for BIGNUMERIC suppport Co-authored-by: HemangChothani --- google/cloud/bigquery/_pandas_helpers.py | 18 +- google/cloud/bigquery/dbapi/_helpers.py | 12 +- google/cloud/bigquery/dbapi/types.py | 2 +- google/cloud/bigquery/query.py | 8 +- google/cloud/bigquery/schema.py | 1 + setup.py | 2 + tests/system/test_client.py | 162 ++++++++++------ tests/unit/test__pandas_helpers.py | 228 +++++++++++++++-------- tests/unit/test_dbapi__helpers.py | 14 ++ tests/unit/test_query.py | 10 + 10 files changed, 305 insertions(+), 152 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 162c58b4b..7ad416e08 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ import queue import warnings +from packaging import version try: import pandas @@ -80,6 +81,10 @@ def pyarrow_numeric(): return pyarrow.decimal128(38, 9) +def pyarrow_bignumeric(): + return pyarrow.decimal256(76, 38) + + def pyarrow_time(): return pyarrow.time64("us") @@ -128,14 +133,23 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal128 instances. } + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False + else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 95b5869e5..6b36d6e43 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,11 @@ import functools import numbers +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -184,7 +189,12 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - return "NUMERIC" + # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. + scalar_object = pyarrow.scalar(value) + if isinstance(scalar_object, pyarrow.Decimal128Scalar): + return "NUMERIC" + else: + return "BIGNUMERIC" elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 14917820c..20eca9b00 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -78,7 +78,7 @@ def __eq__(self, other): STRING = "STRING" BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL" ) DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") ROWID = "ROWID" diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f2ed6337e..ecec73e99 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -83,7 +83,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -102,7 +102,7 @@ def positional(cls, type_, value): Args: type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -186,7 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. """ @@ -203,7 +203,7 @@ def positional(cls, array_type, values): Args: array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index c76aded02..9be27f3e8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -32,6 +32,7 @@ "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, "BOOL": types.StandardSqlDataType.TypeKind.BOOL, "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, diff --git a/setup.py b/setup.py index ea2df4843..31b6a3ff7 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", + "packaging >= 14.3", "protobuf >= 3.12.0", ] extras = { @@ -48,6 +49,7 @@ "pyarrow >= 1.0.0, < 4.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 60c3b3fa8..684a42c30 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -65,6 +65,7 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -891,6 +892,9 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -902,21 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - df_data = collections.OrderedDict( - [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - ) + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1003,6 +1008,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -1012,57 +1020,65 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( ( - "ts_col", + "bignum_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - ), - ] - ) + ) + ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1172,6 +1188,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1210,6 +1227,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -2157,6 +2182,10 @@ def test_query_w_query_params(self): pi_numeric_param = ScalarQueryParameter( name="pi_numeric_param", type_="NUMERIC", value=pi_numeric ) + bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) + bignum_param = ScalarQueryParameter( + name="bignum_param", type_="BIGNUMERIC", value=bignum + ) truthy = True truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) beef = b"DEADBEEF" @@ -2302,6 +2331,15 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] + if _BIGNUMERIC_SUPPORT: + examples.append( + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + } + ) + for example in examples: jconfig = QueryJobConfig() jconfig.query_parameters = example["query_parameters"] diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index ef0c40e1a..abd725820 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -39,6 +39,12 @@ from google import api_core from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + + +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) @pytest.fixture @@ -70,6 +76,15 @@ def is_numeric(type_): )(type_) +def is_bignumeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 76, + lambda type_: type_.scale == 38, + )(type_) + + def is_timestamp(type_): # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type return all_( @@ -120,6 +135,9 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), + pytest.param( + "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -198,6 +216,12 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), + pytest.param( + "BIGNUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, + ), ( "BOOLEAN", "REPEATED", @@ -270,34 +294,41 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected = pyarrow.struct(expected) + assert pyarrow.types.is_struct(actual) assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -314,34 +345,41 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected_value_type = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected_value_type = pyarrow.struct(expected) + assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) assert actual.value_type.num_fields == len(fields) @@ -385,6 +423,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), + pytest.param( + "BIGNUMERIC", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("3.141592653589793238462643383279"), + ], + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( @@ -841,41 +889,45 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), - schema.SchemaField("field09", "BOOL", mode="REQUIRED"), - schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), - schema.SchemaField("field11", "DATE", mode="REQUIRED"), - schema.SchemaField("field12", "TIME", mode="REQUIRED"), - schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), - schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), - ) - dataframe = pandas.DataFrame( - { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [True, False], - "field09": [False, True], - "field10": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), - ], - "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field13": [ - datetime.datetime(1970, 1, 1, 0, 0, 0), - datetime.datetime(2012, 12, 21, 9, 7, 42), - ], - "field14": [ - "POINT(30 10)", - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - } + schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field10", "BOOL", mode="REQUIRED"), + schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field12", "DATE", mode="REQUIRED"), + schema.SchemaField("field13", "TIME", mode="REQUIRED"), + schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) + if _BIGNUMERIC_SUPPORT: + bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) + + data = { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field09": [True, False], + "field10": [False, True], + "field11": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + } + if _BIGNUMERIC_SUPPORT: + data["field08"] = [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ] + dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema @@ -1089,6 +1141,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "bytes_field": b"some bytes", "string_field": u"some characters", "numeric_field": decimal.Decimal("123.456"), + "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } ] ) @@ -1109,6 +1162,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + current_schema += ( + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), + ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1131,6 +1188,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + expected_schema += ( + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), + ) + by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fffa46aa8..c28c014d4 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,6 +25,7 @@ import google.cloud._helpers from google.cloud.bigquery import table +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -51,6 +52,14 @@ def test_scalar_to_query_parameter(self): "TIMESTAMP", ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ) + ) + for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) @@ -104,6 +113,11 @@ def test_array_to_query_parameter_valid_argument(self): ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") + ) + for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index cf268daf1..ae2c29d09 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -166,6 +166,16 @@ def test_to_api_repr_w_numeric(self): param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bignumeric(self): + big_num_string = "{d38}.{d38}".format(d38="9" * 38) + EXPECTED = { + "parameterType": {"type": "BIGNUMERIC"}, + "parameterValue": {"value": big_num_string}, + } + klass = self._get_target_class() + param = klass.positional(type_="BIGNUMERIC", value=big_num_string) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { "parameterType": {"type": "BOOL"}, From c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Feb 2021 16:26:00 +0100 Subject: [PATCH 137/341] fix: error using empty array of structs parameter (#474) * fix: error using empty array of structs parameter * Add QueryParameterType classes * Use query parameter types with ArrayQueryParameter * Adjust system test to changed ArrayQueryParameter * Clarify a comment about an assertion Co-authored-by: Tim Swast * Clarify when name/descr. is omitted from API repr * Rename subtypes to fields * Add fields property to StructQueryParameterType * Add a check for empty struct fields * Define scalar SQL parameter types as type objects Co-authored-by: Tim Swast --- google/cloud/bigquery/__init__.py | 6 + google/cloud/bigquery/enums.py | 21 ++ google/cloud/bigquery/query.py | 301 +++++++++++++++++++++-- tests/system/test_client.py | 15 ++ tests/unit/test_query.py | 383 +++++++++++++++++++++++++++++- 5 files changed, 711 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 29d375b03..f609468f5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -66,8 +66,11 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import DeterminismLevel @@ -93,6 +96,9 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "ArrayQueryParameterType", + "ScalarQueryParameterType", + "StructQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e353b3132..b378f091b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -18,6 +18,7 @@ import itertools from google.cloud.bigquery_v2 import types as gapic_types +from google.cloud.bigquery.query import ScalarQueryParameterType class Compression(object): @@ -215,6 +216,26 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + STRING = ScalarQueryParameterType("STRING") + BYTES = ScalarQueryParameterType("BYTES") + INTEGER = ScalarQueryParameterType("INT64") + INT64 = ScalarQueryParameterType("INT64") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BOOLEAN = ScalarQueryParameterType("BOOL") + BOOL = ScalarQueryParameterType("BOOL") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + DATE = ScalarQueryParameterType("DATE") + TIME = ScalarQueryParameterType("TIME") + DATETIME = ScalarQueryParameterType("DATETIME") + + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index ecec73e99..42547cd73 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -48,6 +48,239 @@ def __ne__(self, other): return not self == other +class _AbstractQueryParameterType: + """Base class for representing query parameter types. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype + """ + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.QueryParameterType: Instance + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + raise NotImplementedError + + +class ScalarQueryParameterType(_AbstractQueryParameterType): + """Type representation for scalar query parameters. + + Args: + type_ (str): + One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', + 'DATETIME', or 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, type_, *, name=None, description=None): + self._type = type_ + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + type_ = resource["type"] + return cls(type_) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return {"type": self._type} + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._type!r}{name}{description})" + + +class ArrayQueryParameterType(_AbstractQueryParameterType): + """Type representation for array query parameters. + + Args: + array_type (Union[ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, array_type, *, name=None, description=None): + self._array_type = array_type + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ArrayQueryParameterType: Instance + """ + array_item_type = resource["arrayType"]["type"] + + if array_item_type in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + else: + klass = ScalarQueryParameterType + + item_type_instance = klass.from_api_repr(resource["arrayType"]) + return cls(item_type_instance) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "ARRAY", + "arrayType": self._array_type.to_api_repr(), + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._array_type!r}{name}{description})" + + +class StructQueryParameterType(_AbstractQueryParameterType): + """Type representation for struct query parameters. + + Args: + fields (Iterable[Union[ \ + ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ + ]]): + An non-empty iterable describing the struct's field types. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, *fields, name=None, description=None): + if not fields: + raise ValueError("Struct type must have at least one field defined.") + + self._fields = fields # fields is a tuple (immutable), no shallow copy needed + self.name = name + self.description = description + + @property + def fields(self): + return self._fields # no copy needed, self._fields is an immutable sequence + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.StructQueryParameterType: Instance + """ + fields = [] + + for struct_field in resource["structTypes"]: + type_repr = struct_field["type"] + if type_repr["type"] in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + elif type_repr["type"] == "ARRAY": + klass = ArrayQueryParameterType + else: + klass = ScalarQueryParameterType + + type_instance = klass.from_api_repr(type_repr) + type_instance.name = struct_field.get("name") + type_instance.description = struct_field.get("description") + fields.append(type_instance) + + return cls(*fields) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + fields = [] + + for field in self._fields: + item = {"type": field.to_api_repr()} + if field.name is not None: + item["name"] = field.name + if field.description is not None: + item["description"] = field.description + + fields.append(item) + + return { + "type": "STRUCT", + "structTypes": fields, + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + items = ", ".join(repr(field) for field in self._fields) + return f"{self.__class__.__name__}({items}{name}{description})" + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -184,28 +417,43 @@ class ArrayQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, + `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. """ def __init__(self, name, array_type, values): self.name = name - self.array_type = array_type self.values = values + if isinstance(array_type, str): + if not values and array_type in {"RECORD", "STRUCT"}: + raise ValueError( + "Missing detailed struct item type info for an empty array, " + "please provide a StructQueryParameterType instance." + ) + self.array_type = array_type + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. Args: - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, + `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name @@ -263,22 +511,40 @@ def to_api_repr(self): Dict: JSON mapping """ values = self.values - if self.array_type == "RECORD" or self.array_type == "STRUCT": + + if self.array_type in {"RECORD", "STRUCT"} or isinstance( + self.array_type, StructQueryParameterType + ): reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]["parameterType"] a_values = [repr_["parameterValue"] for repr_ in reprs] + + if reprs: + a_type = reprs[0]["parameterType"] + else: + # This assertion always evaluates to True because the + # constructor disallows STRUCT/RECORD type defined as a + # string with empty values. + assert isinstance(self.array_type, StructQueryParameterType) + a_type = self.array_type.to_api_repr() else: - a_type = {"type": self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + # Scalar array item type. + if isinstance(self.array_type, str): + a_type = {"type": self.array_type} + else: + a_type = self.array_type.to_api_repr() + + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: values = [converter(value) for value in values] a_values = [{"value": value} for value in values] + resource = { "parameterType": {"type": "ARRAY", "arrayType": a_type}, "parameterValue": {"arrayValues": a_values}, } if self.name is not None: resource["name"] = self.name + return resource def _key(self): @@ -289,7 +555,14 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return (self.name, self.array_type.upper(), self.values) + if isinstance(self.array_type, str): + item_type = self.array_type + elif isinstance(self.array_type, ScalarQueryParameterType): + item_type = self.array_type._type + else: + item_type = "STRUCT" + + return (self.name, item_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 684a42c30..ed48b0bfe 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2168,7 +2168,9 @@ def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import StructQueryParameterType question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( @@ -2227,6 +2229,14 @@ def test_query_w_query_params(self): characters_param = ArrayQueryParameter( name=None, array_type="RECORD", values=[phred_param, bharney_param] ) + empty_struct_array_param = ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( "sidekick", bharney_name_param, bharney_age_param @@ -2317,6 +2327,11 @@ def test_query_w_query_params(self): ], "query_parameters": [characters_param], }, + { + "sql": "SELECT @empty_array_param", + "expected": [], + "query_parameters": [empty_struct_array_param], + }, { "sql": "SELECT @roles", "expected": { diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index ae2c29d09..c8be2911f 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -43,6 +43,318 @@ def test___eq__(self): self.assertNotEqual(udf, wrong_type) +class Test__AbstractQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameterType + + return _AbstractQueryParameterType + + @classmethod + def _make_one(cls, *args, **kw): + return cls._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param_type = self._make_one() + with self.assertRaises(NotImplementedError): + param_type.to_api_repr() + + +class Test_ScalarQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameterType + + return ScalarQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + klass = self._get_target_class() + result = klass.from_api_repr({"type": "BOOLEAN"}) + self.assertEqual(result._type, "BOOLEAN") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + param_type = self._make_one("BYTES", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, {"type": "BYTES"}) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BYTES") + self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("BYTES", name="foo", description="this is foo") + self.assertEqual( + repr(param_type), + "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", + ) + + +class Test_ArrayQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameterType + + return ArrayQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import StructQueryParameterType + + api_resource = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + item_type = result._array_type + self.assertIsInstance(item_type, StructQueryParameterType) + + self.assertIsNone(item_type.name) + self.assertIsNone(item_type.description) + + field = item_type.fields[0] + self.assertEqual(field.name, "weight") + self.assertEqual(field.description, "in kg") + self.assertEqual(field._type, "INTEGER") + + field = item_type.fields[1] + self.assertEqual(field.name, "last_name") + self.assertIsNone(field.description) + self.assertEqual(field._type, "STRING") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + array_item_type = StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="weight", description="in kg"), + ScalarQueryParameterType("STRING", name="last_name"), + ) + param_type = self._make_one(array_item_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BOOLEAN") + self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("INT64", name="bar", description="this is bar") + self.assertEqual( + repr(param_type), + "ArrayQueryParameterType('INT64', name='bar', description='this is bar')", + ) + + +class Test_StructQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameterType + + return StructQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_raises_error_without_any_fields(self): + with self.assertRaisesRegex(ValueError, ".*at least one field.*"): + self._make_one() + + def test_from_api_repr(self): + from google.cloud.bigquery.query import ArrayQueryParameterType + from google.cloud.bigquery.query import ScalarQueryParameterType + + api_resource = { + "type": "STRUCT", + "structTypes": [ + { + "name": "age", + "type": {"type": "INTEGER"}, + "description": "in years", + }, + { + "name": "aliases", + "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + }, + { + "description": "a nested struct", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + self.assertEqual(len(result.fields), 3) + + field = result.fields[0] + self.assertIsInstance(field, ScalarQueryParameterType) + self.assertEqual(field.name, "age") + self.assertEqual(field.description, "in years") + + field = result.fields[1] + self.assertIsInstance(field, ArrayQueryParameterType) + self.assertEqual(field.name, "aliases") + self.assertIsNone(field.description) + self.assertIsInstance(field._array_type, ScalarQueryParameterType) + self.assertEqual(field._array_type._type, "STRING") + + field = result.fields[2] + self.assertIsInstance(field, self._get_target_class()) + self.assertIsNone(field.name) + self.assertEqual(field.description, "a nested struct") + + date_field = field.fields[0] + self.assertEqual(date_field._type, "DATE") + self.assertEqual(date_field.name, "nested_date") + self.assertIsNone(date_field.description) + + bool_field = field.fields[1] + self.assertEqual(bool_field._type, "BOOLEAN") + self.assertIsNone(bool_field.name) + self.assertEqual(bool_field.description, "nested bool field") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + date_type = ScalarQueryParameterType("DATE", name="day_of_birth") + param_type = self._make_one(int_type, date_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + {"name": "day_of_birth", "type": {"type": "DATE"}}, + ], + } + self.assertEqual(result, expected_result) + + def test_to_api_repr_nested(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + struct_class = self._get_target_class() + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + nested_struct_type = struct_class( + ScalarQueryParameterType("DATE", name="nested_date"), + ScalarQueryParameterType("BOOLEAN", description="nested bool field"), + name="nested", + ) + param_type = self._make_one( + int_type, nested_struct_type, name="foo", description="bar" + ) + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + { + "name": "nested", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING") + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')" + ")" + ) + self.assertEqual(repr(param_type), expected) + + def test_repr_all_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), + ScalarQueryParameterType("STRING"), + name="data_record", + description="this is it", + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), " + "name='data_record', description='this is it'" + ")" + ) + self.assertEqual(repr(param_type), expected) + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -340,6 +652,10 @@ def test_ctor(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_ctor_empty_struct_array_wo_type_info(self): + with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"): + self._make_one(name="foo", array_type="STRUCT", values=[]) + def test___eq__(self): param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) @@ -467,6 +783,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_array_type_as_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + EXPECTED = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}}, + "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]}, + } + klass = self._get_target_class() + param = klass.positional( + array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): EXPECTED = { "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, @@ -503,6 +832,31 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_empty_array_of_records_type(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + EXPECTED = { + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, + ], + }, + }, + "parameterValue": {"arrayValues": []}, + } + item_type = StructQueryParameterType( + ScalarQueryParameterType("STRING", name="foo"), + ScalarQueryParameterType("INT64", name="bar"), + ) + klass = self._get_target_class() + param = klass.positional(array_type=item_type, values=[]) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING", ["value"]) other = object() @@ -547,11 +901,38 @@ def test___ne___different_values(self): field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) - def test___repr__(self): + def test___repr__array_type_str(self): field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) + def test___repr__array_type_scalar_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_items = self._make_one( + "int_items", ScalarQueryParameterType("INTEGER"), [64] + ) + expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])" + self.assertEqual(repr(int_items), expected) + + def test___repr__array_type_struct_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + struct_items = self._make_one( + "struct_items", + StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="age"), + ScalarQueryParameterType("STRING", name="last_name"), + ), + [{"age": 18, "last_name": "Doe"}], + ) + expected = ( + "ArrayQueryParameter('struct_items', 'STRUCT', " + "[{'age': 18, 'last_name': 'Doe'}])" + ) + self.assertEqual(repr(struct_items), expected) + class Test_StructQueryParameter(unittest.TestCase): @staticmethod From 699498c8d1ea76dcc7e6347781fc699159dc9214 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 25 Feb 2021 08:42:04 -0800 Subject: [PATCH 138/341] chore: exclude tarball from code generation (#512) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/ca115e36-5d95-4acd-a2d8-7ac2f22a7261/targets - [x] To automatically regenerate this PR, check this box. --- synth.metadata | 3 +-- synth.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synth.metadata b/synth.metadata index dc183a72e..9412653c6 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -93,7 +93,6 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", - "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/model_service.rst", diff --git a/synth.py b/synth.py index 3ab271c96..3c6440600 100644 --- a/synth.py +++ b/synth.py @@ -32,6 +32,7 @@ s.move( library, excludes=[ + "*.tar.gz", "docs/index.rst", "docs/bigquery_v2/*_service.rst", "docs/bigquery_v2/services.rst", From d7632799769248b09a8558ba18f5025ebdd9675a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 25 Feb 2021 19:32:01 +0100 Subject: [PATCH 139/341] fix: QueryJob.exception() *returns* the errors, not raises them (#467) * fix: QueryJob.exception() should *return* errors * Reload query job on error, raise any reload errors * Catch errors on reloading failed query jobs * Add additional unit test * Increase retry deadline to mitigate test flakiness * Store the more informative exception in done() --- google/cloud/bigquery/job/query.py | 26 ++++++++-- tests/unit/job/test_base.py | 2 +- tests/unit/job/test_query.py | 83 +++++++++++++++++++++++++++++- 3 files changed, 104 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b3ca8d940..5c1118500 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -989,7 +989,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): unfinished jobs before checking. Default ``True``. Returns: - bool: True if the job is complete, False otherwise. + bool: ``True`` if the job is complete or if fetching its status resulted in + an error, ``False`` otherwise. """ # Do not refresh if the state is already done, as the job will not # change once complete. @@ -997,17 +998,34 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): if not reload or is_done: return is_done - self._reload_query_results(retry=retry, timeout=timeout) - # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + return True + else: + return self.state == _DONE_STATE + # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + return True return self.state == _DONE_STATE diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index bbeffba50..405ad6ee5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -967,7 +967,7 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index a4ab11ab6..655a121e6 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -16,6 +16,7 @@ import copy import http import textwrap +import types import freezegun from google.api_core import exceptions @@ -308,7 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done(self): + def test_done_job_complete(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -356,6 +357,84 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + def test_done_w_query_results_error_reload_ok_job_finished(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "DONE" + self.set_exception(copy.copy(bad_request_error)) + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert is_done + assert isinstance(job._exception, exceptions.BadRequest) + + def test_done_w_query_results_error_reload_ok_job_still_running(self): + client = _make_client(project=self.PROJECT) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + client._get_query_results = mock.Mock(side_effect=retry_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "RUNNING" + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert not is_done + assert job._exception is None + + def test_done_w_query_results_error_reload_error(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + reload_error = exceptions.DataLoss("Oops, sorry!") + job.reload = mock.Mock(side_effect=reload_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is bad_request_error + + def test_done_w_job_query_results_ok_reload_error(self): + client = _make_client(project=self.PROJECT) + query_results = google.cloud.bigquery.query._QueryResults( + properties={ + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, + } + ) + client._get_query_results = mock.Mock(return_value=query_results) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + job.reload = mock.Mock(side_effect=retry_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is retry_error + def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -973,7 +1052,7 @@ def test_result_w_retry(self): initial=0.001, maximum=0.001, multiplier=1.0, - deadline=0.001, + deadline=0.1, predicate=custom_predicate, ) From 81df4ba518dd82d6ef2519adb7803d4c90119a8b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 25 Feb 2021 18:46:03 +0000 Subject: [PATCH 140/341] chore: release 2.10.0 (#533) :robot: I have created a release \*beep\* \*boop\* --- ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) ### Features * add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) ### Bug Fixes * error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) * QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) ### Documentation * **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) * explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51fad831e..9afd523a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) + + +### Features + +* add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) + + +### Bug Fixes + +* error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) +* QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) + + +### Documentation + +* **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) +* explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) + ## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b2a8c5535..13e710fcc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.9.0" +__version__ = "2.10.0" From 1ba69273b25341783c46c4564e7ee632e421569b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:08 +0100 Subject: [PATCH 141/341] chore(deps): update dependency google-cloud-bigquery to v2.10.0 (#535) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e9fcfca03..8ff7fa850 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c638178fc..150fe2993 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From 6f44c9e7e33bde83e75650055221167f4a845ccc Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:31 +0100 Subject: [PATCH 142/341] chore(deps): update dependency grpcio to v1.36.0 (#536) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 150fe2993..d645d8a1d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.35.0 +grpcio==1.36.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From 3917260bee6a9cf87bd5e2cdf23bf4c4e310ff32 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 4 Mar 2021 00:24:05 +0100 Subject: [PATCH 143/341] chore(deps): update dependency grpcio to v1.36.1 (#541) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.36.0` -> `==1.36.1` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/compatibility-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/confidence-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d645d8a1d..a80b7fa05 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.36.0 +grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From f0259eb7ed4ff254ee238e87651992ff93481dae Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 5 Mar 2021 05:06:51 +0100 Subject: [PATCH 144/341] chore(deps): update dependency google-auth-oauthlib to v0.4.3 (#542) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a80b7fa05..8ccbec38e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.2 +google-auth-oauthlib==0.4.3 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 816934b29a7917e4278063e19c56cd0d38b5569f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 9 Mar 2021 11:57:40 -0800 Subject: [PATCH 145/341] chore: upgrade gapic-generator-python to 0.42.2 (#543) PiperOrigin-RevId: 361662015 Source-Author: Google APIs Source-Date: Mon Mar 8 14:47:18 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf Source-Link: https://github.com/googleapis/googleapis/commit/28a591963253d52ce3a25a918cafbdd9928de8cf --- google/cloud/bigquery_v2/types/__init__.py | 28 +++++++++++----------- synth.metadata | 10 ++++---- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 00dc837c9..b76e65c65 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -16,6 +16,14 @@ # from .encryption_config import EncryptionConfiguration +from .model import ( + DeleteModelRequest, + GetModelRequest, + ListModelsRequest, + ListModelsResponse, + Model, + PatchModelRequest, +) from .model_reference import ModelReference from .standard_sql import ( StandardSqlDataType, @@ -23,26 +31,18 @@ StandardSqlStructType, ) from .table_reference import TableReference -from .model import ( - Model, - GetModelRequest, - PatchModelRequest, - DeleteModelRequest, - ListModelsRequest, - ListModelsResponse, -) __all__ = ( "EncryptionConfiguration", + "DeleteModelRequest", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "PatchModelRequest", "ModelReference", "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", "TableReference", - "Model", - "GetModelRequest", - "PatchModelRequest", - "DeleteModelRequest", - "ListModelsRequest", - "ListModelsResponse", ) diff --git a/synth.metadata b/synth.metadata index 9412653c6..cab985521 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e13001be33d69042a9505e698f792587a804a5cf", - "internalRef": "358152223" + "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", + "internalRef": "361662015" } }, { @@ -95,8 +95,6 @@ "MANIFEST.in", "docs/_static/custom.css", "docs/_templates/layout.html", - "docs/bigquery_v2/model_service.rst", - "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", "google/cloud/bigquery_v2/__init__.py", @@ -128,4 +126,4 @@ "setup.cfg", "testing/.gitignore" ] -} +} \ No newline at end of file From d5c7e11a1dc2a149d74294bfadbae62d70573e69 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 9 Mar 2021 21:02:56 +0100 Subject: [PATCH 146/341] feat: add context manager support to client (#540) --- google/cloud/bigquery/client.py | 6 ++++++ tests/unit/test_client.py | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index f8c0d7c93..bdbcb767c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3423,6 +3423,12 @@ def schema_to_json(self, schema_list, destination): with open(destination, mode="w") as file_obj: return self._schema_to_json_file_object(json_schema_list, file_obj) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 66add9c0a..6c3263ea5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7218,6 +7218,28 @@ def test_list_rows_error(self): with self.assertRaises(TypeError): client.list_rows(1) + def test_context_manager_enter_returns_itself(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with mock.patch.object(client, "close"), client as context_var: + pass + + self.assertIs(client, context_var) + + def test_context_manager_exit_closes_client(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + fake_close = mock.Mock() + with mock.patch.object(client, "close", fake_close): + with client: + pass + + fake_close.assert_called_once() + class Test_make_job_id(unittest.TestCase): def _call_fut(self, job_id, prefix=None): From 3ce826e8805e7df4933ada29677c2c88709cd539 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Mar 2021 20:16:09 +0000 Subject: [PATCH 147/341] chore: release 2.11.0 (#545) :robot: I have created a release \*beep\* \*boop\* --- ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) ### Features * add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9afd523a4..512d38108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) + + +### Features + +* add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) + ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 13e710fcc..e6e357434 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.10.0" +__version__ = "2.11.0" From a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 10 Mar 2021 21:31:51 +0100 Subject: [PATCH 148/341] feat: make QueryJob.done() method more performant (#544) --- google/cloud/bigquery/job/query.py | 91 ++++++++++++------------------ tests/unit/job/test_query.py | 54 ++++-------------- 2 files changed, 45 insertions(+), 100 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 5c1118500..491983f8e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -19,6 +19,7 @@ import re from google.api_core import exceptions +from google.api_core.future import polling as polling_future import requests from google.cloud.bigquery.dataset import Dataset @@ -42,7 +43,6 @@ from google.cloud.bigquery._tqdm_helpers import wait_for_query from google.cloud.bigquery.job.base import _AsyncJob -from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -974,61 +974,6 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. If the job state is - ``DONE``, retrying is aborted early, as the job will not change anymore. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: ``True`` if the job is complete or if fetching its status resulted in - an error, ``False`` otherwise. - """ - # Do not refresh if the state is already done, as the job will not - # change once complete. - is_done = self.state == _DONE_STATE - if not reload or is_done: - return is_done - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - return True - else: - return self.state == _DONE_STATE - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - return True - - return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout @@ -1130,6 +1075,40 @@ def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): timeout=transport_timeout, ) + def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): + """Check if the query has finished running and raise if it's not. + + If the query has finished, also reload the job itself. + """ + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + finally: + return + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if not self._query_results.complete: + raise polling_future._OperationNotComplete() + else: + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + def result( self, page_size=None, diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 655a121e6..4665933ea 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -309,16 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done_job_complete(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): + def test__done_or_raise_w_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -326,7 +317,7 @@ def test_done_w_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) + job._done_or_raise(timeout=42) fake_get_results.assert_called_once() call_args = fake_get_results.call_args @@ -335,7 +326,7 @@ def test_done_w_timeout(self): call_args = fake_reload.call_args self.assertEqual(call_args.kwargs.get("timeout"), 42) - def test_done_w_timeout_and_longer_internal_api_timeout(self): + def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -344,7 +335,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) + job._done_or_raise(timeout=5.5) # The expected timeout used is simply the given timeout, as the latter # is shorter than the job's internal done timeout. @@ -357,7 +348,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - def test_done_w_query_results_error_reload_ok_job_finished(self): + def test__done_or_raise_w_query_results_error_reload_ok(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -373,32 +364,11 @@ def fake_reload(self, *args, **kwargs): fake_reload_method = types.MethodType(fake_reload, job) with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() + job._done_or_raise() - assert is_done assert isinstance(job._exception, exceptions.BadRequest) - def test_done_w_query_results_error_reload_ok_job_still_running(self): - client = _make_client(project=self.PROJECT) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - client._get_query_results = mock.Mock(side_effect=retry_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "RUNNING" - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() - - assert not is_done - assert job._exception is None - - def test_done_w_query_results_error_reload_error(self): + def test__done_or_raise_w_query_results_error_reload_error(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -409,12 +379,11 @@ def test_done_w_query_results_error_reload_error(self): job.reload = mock.Mock(side_effect=reload_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is bad_request_error - def test_done_w_job_query_results_ok_reload_error(self): + def test__done_or_raise_w_job_query_results_ok_reload_error(self): client = _make_client(project=self.PROJECT) query_results = google.cloud.bigquery.query._QueryResults( properties={ @@ -430,9 +399,8 @@ def test_done_w_job_query_results_ok_reload_error(self): job.reload = mock.Mock(side_effect=retry_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is retry_error def test_query_plan(self): @@ -1905,8 +1873,6 @@ def test_reload_w_timeout(self): ) def test_iter(self): - import types - begun_resource = self._make_resource() query_resource = { "jobComplete": True, From 0d7212cec1e786b88b5825318406ac64e30e2a9d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 10 Mar 2021 21:39:07 +0100 Subject: [PATCH 149/341] chore(deps): update dependency google-cloud-bigquery to v2.11.0 (#546) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8ff7fa850..34896627e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8ccbec38e..b55d2b3a4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From efdf1c653770f7c03c17e31e3c2f279bb685637b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 15 Mar 2021 09:52:04 -0500 Subject: [PATCH 150/341] refactor: split pandas system tests to new module (#548) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Follow-up to https://github.com/googleapis/python-bigquery/pull/448 Towards #366 --- tests/system/conftest.py | 39 ++ tests/system/helpers.py | 94 ++++ tests/system/test_client.py | 953 ++---------------------------------- tests/system/test_pandas.py | 801 ++++++++++++++++++++++++++++++ 4 files changed, 969 insertions(+), 918 deletions(-) create mode 100644 tests/system/conftest.py create mode 100644 tests/system/helpers.py create mode 100644 tests/system/test_pandas.py diff --git a/tests/system/conftest.py b/tests/system/conftest.py new file mode 100644 index 000000000..4b5fcb543 --- /dev/null +++ b/tests/system/conftest.py @@ -0,0 +1,39 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import helpers + + +@pytest.fixture(scope="session") +def bigquery_client(): + from google.cloud import bigquery + + return bigquery.Client() + + +@pytest.fixture(scope="session") +def bqstorage_client(bigquery_client): + from google.cloud import bigquery_storage + + return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"bqsystem_{helpers.temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) diff --git a/tests/system/helpers.py b/tests/system/helpers.py new file mode 100644 index 000000000..76e609345 --- /dev/null +++ b/tests/system/helpers.py @@ -0,0 +1,94 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import uuid + +import google.api_core.exceptions +import test_utils.retry + +from google.cloud._helpers import UTC + + +_naive = datetime.datetime(2016, 12, 5, 12, 41, 9) +_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) +_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_stamp_microseconds = _stamp + ".250000" +_zoned = _naive.replace(tzinfo=UTC) +_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) +_numeric = decimal.Decimal("123456789.123456789") + + +# Examples of most data types to test with query() and DB-API. +STANDARD_SQL_EXAMPLES = [ + ("SELECT 1", 1), + ("SELECT 1.3", 1.3), + ("SELECT TRUE", True), + ('SELECT "ABC"', "ABC"), + ('SELECT CAST("foo" AS BYTES)', b"foo"), + ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned), + ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,), + ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()), + ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()), + ('SELECT NUMERIC "%s"' % (_numeric,), _numeric), + ("SELECT (1, 2)", {"_field_1": 1, "_field_2": 2}), + ( + "SELECT ((1, 2), (3, 4), 5)", + { + "_field_1": {"_field_1": 1, "_field_2": 2}, + "_field_2": {"_field_1": 3, "_field_2": 4}, + "_field_3": 5, + }, + ), + ("SELECT [1, 2, 3]", [1, 2, 3]), + ( + "SELECT ([1, 2], 3, [4, 5])", + {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, + ), + ( + "SELECT [(1, 2, 3), (4, 5, 6)]", + [ + {"_field_1": 1, "_field_2": 2, "_field_3": 3}, + {"_field_1": 4, "_field_2": 5, "_field_3": 6}, + ], + ), + ( + "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", + [{"_field_1": [1, 2, 3], "_field_2": 4}, {"_field_1": [5, 6], "_field_2": 7}], + ), + ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), + ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), +] + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +def _rate_limit_exceeded(forbidden): + """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" + return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) + + +# We need to wait to stay within the rate limits. +# The alternative outcome is a 403 Forbidden response from upstream, which +# they return instead of the more appropriate 429. +# See https://cloud.google.com/bigquery/quota-policy +retry_403 = test_utils.retry.RetryErrors( + google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded, +) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ed48b0bfe..133f609a6 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -13,7 +13,6 @@ # limitations under the License. import base64 -import collections import concurrent.futures import csv import datetime @@ -29,9 +28,11 @@ import psutil import pytest -import pytz import pkg_resources +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + try: from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER @@ -42,10 +43,6 @@ except ImportError: # pragma: NO COVER fastavro = None -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None try: import pyarrow import pyarrow.types @@ -56,7 +53,6 @@ from google.api_core.exceptions import BadRequest from google.api_core.exceptions import ClientError from google.api_core.exceptions import Conflict -from google.api_core.exceptions import Forbidden from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound from google.api_core.exceptions import InternalServerError @@ -65,7 +61,6 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -121,14 +116,8 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") -if pandas: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version -else: - PANDAS_INSTALLED_VERSION = None - if pyarrow: PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version else: @@ -154,18 +143,6 @@ def _load_json_schema(filename="schema.json"): return _parse_schema_resource(json.load(schema_file)) -def _rate_limit_exceeded(forbidden): - """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" - return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) - - -# We need to wait to stay within the rate limits. -# The alternative outcome is a 403 Forbidden response from upstream, which -# they return instead of the more appropriate 429. -# See https://cloud.google.com/bigquery/quota-policy -retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded) - - class Config(object): """Run-time configuration to be modified at set-up. @@ -262,7 +239,7 @@ def test_get_dataset(self): dataset_arg = Dataset(dataset_ref) dataset_arg.friendly_name = "Friendly" dataset_arg.description = "Description" - dataset = retry_403(client.create_dataset)(dataset_arg) + dataset = helpers.retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -345,7 +322,7 @@ def test_create_table(self): table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -380,7 +357,7 @@ def test_create_table_with_policy(self): table_arg = Table(dataset.table(table_id), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -416,7 +393,7 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): table_arg.time_partitioning = TimePartitioning(field="transaction_time") table_arg.clustering_fields = ["user_email", "store_code"] - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -430,7 +407,7 @@ def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) Config.CLIENT.delete_dataset(dataset_id) self.assertFalse(_dataset_exists(dataset_ref)) @@ -439,11 +416,11 @@ def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id("delete_table_true_with_content") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) Config.CLIENT.delete_dataset(dataset, delete_contents=True) self.assertFalse(_table_exists(table)) @@ -455,7 +432,7 @@ def test_delete_dataset_delete_contents_false(self): table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - retry_403(Config.CLIENT.create_table)(table_arg) + helpers.retry_403(Config.CLIENT.create_table)(table_arg) with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) @@ -504,7 +481,7 @@ def test_list_tables(self): ] for table_name in tables_to_create: table = Table(dataset.table(table_name), schema=SCHEMA) - created_table = retry_403(Config.CLIENT.create_table)(table) + created_table = helpers.retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. @@ -534,7 +511,7 @@ def test_update_table(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) @@ -574,7 +551,7 @@ def test_update_table_schema(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE") @@ -674,7 +651,7 @@ def test_insert_rows_then_dump_table(self): ] table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -732,413 +709,6 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_automatic_schema(self): - """Test that a DataFrame with dtypes that map well to BigQuery types - can be uploaded without specifying a schema. - - https://github.com/googleapis/google-cloud-python/issues/9044 - """ - df_data = collections.OrderedDict( - [ - ("bool_col", pandas.Series([True, False, True], dtype="bool")), - ( - "ts_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), - ), - ( - "dt_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ), - ), - ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), - ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), - ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), - ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), - ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), - ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), - ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), - ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), - ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( - Config.CLIENT.project, dataset_id - ) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual( - tuple(table.schema), - ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - # BigQuery does not support uploading DATETIME values from - # Parquet files. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 - bigquery.SchemaField("dt_col", "TIMESTAMP"), - bigquery.SchemaField("float32_col", "FLOAT"), - bigquery.SchemaField("float64_col", "FLOAT"), - bigquery.SchemaField("int8_col", "INTEGER"), - bigquery.SchemaField("int16_col", "INTEGER"), - bigquery.SchemaField("int32_col", "INTEGER"), - bigquery.SchemaField("int64_col", "INTEGER"), - bigquery.SchemaField("uint8_col", "INTEGER"), - bigquery.SchemaField("uint16_col", "INTEGER"), - bigquery.SchemaField("uint32_col", "INTEGER"), - ), - ) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded if a BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded without specifying a schema. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nulls(self): - """Test that a DataFrame with null columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/7370 - """ - # Schema with all scalar types. - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - num_rows = 100 - nulls = [None] * num_rows - df_data = [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, num_rows) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_required(self): - """Test that a DataFrame with required columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/8093 - """ - table_schema = ( - bigquery.SchemaField("name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ) - - records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] - dataframe = pandas.DataFrame(records, columns=["name", "age"]) - job_config = bigquery.LoadJobConfig(schema=table_schema) - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_required".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 2) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_explicit_schema(self): - # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. - # See: - # https://github.com/googleapis/python-bigquery/issues/61 - # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - - df_data = [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, - "Only `pyarrow version >=0.17.0` is supported", - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_struct_datatype(self): - """Test that a DataFrame with struct datatype can be uploaded if a - BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/21 - """ - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = [ - bigquery.SchemaField( - "bar", - "RECORD", - fields=[ - bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ], - mode="REQUIRED", - ), - ] - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] - dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(table.schema, table_schema) - self.assertEqual(table.num_rows, 3) - def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), @@ -1160,7 +730,7 @@ def test_load_table_from_json_basic_use(self): # Create the table before loading so that schema mismatch errors are # identified. - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1175,149 +745,6 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", ["abc", None, "def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - ( - "dt_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0), - None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - ], - ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) - df_data = collections.OrderedDict( - [ - ( - "float_col", - [ - 0.14285714285714285, - 0.51428571485748, - 0.87128748, - 1.807960649, - 2.0679610649, - 2.4406779661016949, - 3.7148514257, - 3.8571428571428572, - 1.51251252e40, - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - rows = self._fetch_single_page(table) - floats = [r.values()[0] for r in rows] - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 9) - self.assertEqual(floats, df_data["float_col"]) - def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, @@ -1339,7 +766,7 @@ def test_load_table_from_json_schema_autodetect(self): bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), ) # create the table before loading so that the column order is predictable - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1374,7 +801,7 @@ def test_load_avro_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("bq_load_test")) table_arg = dataset.table(table_name) - table = retry_403(Config.CLIENT.create_table)(Table(table_arg)) + table = helpers.retry_403(Config.CLIENT.create_table)(Table(table_arg)) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1405,7 +832,7 @@ def test_load_table_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("load_gcs_then_dump")) table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1623,7 +1050,7 @@ def test_get_set_iam_policy(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1648,7 +1075,7 @@ def test_test_iam_permissions(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1672,7 +1099,7 @@ def test_job_cancel(self): dataset = self.temp_dataset(DATASET_ID) table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) @@ -1743,75 +1170,12 @@ def test_query_w_legacy_sql_types(self): self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example["expected"]) - def _generate_standard_sql_types_examples(self): - naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) - stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) - stamp_microseconds = stamp + ".250000" - zoned = naive.replace(tzinfo=UTC) - zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) - numeric = decimal.Decimal("123456789.123456789") - return [ - {"sql": "SELECT 1", "expected": 1}, - {"sql": "SELECT 1.3", "expected": 1.3}, - {"sql": "SELECT TRUE", "expected": True}, - {"sql": 'SELECT "ABC"', "expected": "ABC"}, - {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, - {"sql": 'SELECT TIMESTAMP "%s"' % (stamp,), "expected": zoned}, - { - "sql": 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), - "expected": zoned_microseconds, - }, - {"sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), "expected": naive}, - { - "sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp_microseconds,), - "expected": naive_microseconds, - }, - {"sql": 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), "expected": naive.date()}, - {"sql": 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), "expected": naive.time()}, - {"sql": 'SELECT NUMERIC "%s"' % (numeric,), "expected": numeric}, - {"sql": "SELECT (1, 2)", "expected": {"_field_1": 1, "_field_2": 2}}, - { - "sql": "SELECT ((1, 2), (3, 4), 5)", - "expected": { - "_field_1": {"_field_1": 1, "_field_2": 2}, - "_field_2": {"_field_1": 3, "_field_2": 4}, - "_field_3": 5, - }, - }, - {"sql": "SELECT [1, 2, 3]", "expected": [1, 2, 3]}, - { - "sql": "SELECT ([1, 2], 3, [4, 5])", - "expected": {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, - }, - { - "sql": "SELECT [(1, 2, 3), (4, 5, 6)]", - "expected": [ - {"_field_1": 1, "_field_2": 2, "_field_3": 3}, - {"_field_1": 4, "_field_2": 5, "_field_3": 6}, - ], - }, - { - "sql": "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", - "expected": [ - {u"_field_1": [1, 2, 3], u"_field_2": 4}, - {u"_field_1": [5, 6], u"_field_2": 7}, - ], - }, - { - "sql": "SELECT ARRAY(SELECT STRUCT([1, 2]))", - "expected": [{u"_field_1": [1, 2]}], - }, - {"sql": "SELECT ST_GeogPoint(1, 2)", "expected": "POINT(1 2)"}, - ] - def test_query_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - rows = list(Config.CLIENT.query(example["sql"])) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + rows = list(Config.CLIENT.query(sql)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example["expected"]) + self.assertEqual(rows[0][0], expected) def test_query_w_failed_query(self): from google.api_core.exceptions import BadRequest @@ -1950,13 +1314,12 @@ def test_query_statistics(self): self.assertGreater(len(plan), stages_with_inputs) def test_dbapi_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - Config.CURSOR.execute(example["sql"]) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + Config.CURSOR.execute(sql) self.assertEqual(Config.CURSOR.rowcount, 1) row = Config.CURSOR.fetchone() self.assertEqual(len(row), 1) - self.assertEqual(row[0], example["expected"]) + self.assertEqual(row[0], expected) row = Config.CURSOR.fetchone() self.assertIsNone(row) @@ -2107,7 +1470,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE") table_ref = dataset.table(table_id) table_arg = Table(table_ref, schema=[greeting]) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -2480,152 +1843,6 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_query_results_to_dataframe(self): - QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - df = Config.CLIENT.query(QUERY).result().to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) # verify the column names - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_query_results_to_dataframe_w_bqstorage(self): - query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_insert_rows_from_dataframe(self): - SF = bigquery.SchemaField - schema = [ - SF("float_col", "FLOAT", mode="REQUIRED"), - SF("int_col", "INTEGER", mode="REQUIRED"), - SF("bool_col", "BOOLEAN", mode="REQUIRED"), - SF("string_col", "STRING", mode="NULLABLE"), - ] - - dataframe = pandas.DataFrame( - [ - { - "float_col": 1.11, - "bool_col": True, - "string_col": "my string", - "int_col": 10, - }, - { - "float_col": 2.22, - "bool_col": False, - "string_col": "another string", - "int_col": 20, - }, - { - "float_col": 3.33, - "bool_col": False, - "string_col": "another string", - "int_col": 30, - }, - { - "float_col": 4.44, - "bool_col": True, - "string_col": "another string", - "int_col": 40, - }, - { - "float_col": 5.55, - "bool_col": False, - "string_col": "another string", - "int_col": 50, - }, - { - "float_col": 6.66, - "bool_col": True, - # Include a NaN value, because pandas often uses NaN as a - # NULL value indicator. - "string_col": float("NaN"), - "int_col": 60, - }, - ] - ) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("issue_7553")) - table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - chunk_errors = Config.CLIENT.insert_rows_from_dataframe( - table, dataframe, chunk_size=3 - ) - for errors in chunk_errors: - assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - Config.CLIENT.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] - expected = [ - tuple(None if col != col else col for col in data_row) - for data_row in dataframe.itertuples(index=False) - ] - - assert len(row_tuples) == len(expected) - - for row, expected_row in zip(row_tuples, expected): - self.assertCountEqual(row, expected_row) # column order does not matter - def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField @@ -2656,7 +1873,7 @@ def test_insert_rows_nested_nested(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2696,7 +1913,7 @@ def test_insert_rows_nested_nested_dictionary(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2740,8 +1957,8 @@ def test_create_routine(self): str(routine.reference) ) - routine = retry_403(Config.CLIENT.create_routine)(routine) - query_job = retry_403(Config.CLIENT.query)(query_string) + routine = helpers.retry_403(Config.CLIENT.create_routine)(routine) + query_job = helpers.retry_403(Config.CLIENT.query)(query_string) rows = list(query_job.result()) assert len(rows) == 1 @@ -2752,7 +1969,7 @@ def test_create_table_rows_fetch_nested_schema(self): dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) schema = _load_json_schema() table_arg = Table(dataset.table(table_name), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_name) @@ -2872,85 +2089,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_nested_table_to_dataframe(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - SF = bigquery.SchemaField - schema = [ - SF("string_col", "STRING", mode="NULLABLE"), - SF( - "record_col", - "RECORD", - mode="NULLABLE", - fields=[ - SF("nested_string", "STRING", mode="NULLABLE"), - SF("nested_repeated", "INTEGER", mode="REPEATED"), - SF( - "nested_record", - "RECORD", - mode="NULLABLE", - fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], - ), - ], - ), - SF("bigfloat_col", "FLOAT", mode="NULLABLE"), - SF("smallfloat_col", "FLOAT", mode="NULLABLE"), - ] - record = { - "nested_string": "another string value", - "nested_repeated": [0, 1, 2], - "nested_record": {"nested_nested_string": "some deep insight"}, - } - to_insert = [ - { - "string_col": "Some value", - "record_col": record, - "bigfloat_col": 3.14, - "smallfloat_col": 2.72, - } - ] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe( - dtypes={"smallfloat_col": "float16"} - ) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] - self.assertEqual(list(df), exp_columns) # verify the column names - row = df.iloc[0] - # verify the row content - self.assertEqual(row["string_col"], "Some value") - expected_keys = tuple(sorted(record.keys())) - row_keys = tuple(sorted(row["record_col"].keys())) - self.assertEqual(row_keys, expected_keys) - # Can't compare numpy arrays, which pyarrow encodes the embedded - # repeated column to, so convert to list. - self.assertEqual(list(row["record_col"]["nested_repeated"]), [0, 1, 2]) - # verify that nested data can be accessed with indices/keys - self.assertEqual(row["record_col"]["nested_repeated"][0], 0) - self.assertEqual( - row["record_col"]["nested_record"]["nested_nested_string"], - "some deep insight", - ) - # verify dtypes - self.assertEqual(df.dtypes["bigfloat_col"].name, "float64") - self.assertEqual(df.dtypes["smallfloat_col"].name, "float16") - def test_list_rows_empty_table(self): from google.cloud.bigquery.table import RowIterator @@ -2999,34 +2137,13 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_list_rows_max_results_w_bqstorage(self): - table_ref = DatasetReference("bigquery-public-data", "utility_us").table( - "country_code_iso" - ) - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - row_iterator = Config.CLIENT.list_rows( - table_ref, - selected_fields=[bigquery.SchemaField("country_name", "STRING")], - max_results=100, - ) - dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - self.assertEqual(len(dataframe.index), 100) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) if location: dataset.location = location - dataset = retry_403(Config.CLIENT.create_dataset)(dataset) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py new file mode 100644 index 000000000..1164e36da --- /dev/null +++ b/tests/system/test_pandas.py @@ -0,0 +1,801 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for pandas connector.""" + +import collections +import datetime +import decimal +import json +import io +import operator + +import pkg_resources +import pytest +import pytz + +from google.cloud import bigquery +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + + +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) +pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") + + +PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") + + +def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): + """Test that a DataFrame with dtypes that map well to BigQuery types + can be uploaded without specifying a schema. + + https://github.com/googleapis/google-cloud-python/issues/9044 + """ + df_data = collections.OrderedDict( + [ + ("bool_col", pandas.Series([True, False, True], dtype="bool")), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), + ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), + ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), + ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), + ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), + ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), + ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), + ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), + ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( + bigquery_client.project, dataset_id + ) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + # BigQuery does not support uploading DATETIME values from + # Parquet files. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + bigquery.SchemaField("dt_col", "TIMESTAMP"), + bigquery.SchemaField("float32_col", "FLOAT"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int8_col", "INTEGER"), + bigquery.SchemaField("int16_col", "INTEGER"), + bigquery.SchemaField("int32_col", "INTEGER"), + bigquery.SchemaField("int64_col", "INTEGER"), + bigquery.SchemaField("uint8_col", "INTEGER"), + bigquery.SchemaField("uint16_col", "INTEGER"), + bigquery.SchemaField("uint32_col", "INTEGER"), + ) + assert table.num_rows == 3 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded if a BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded without specifying a schema. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): + """Test that a DataFrame with null columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/7370 + """ + # Schema with all scalar types. + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + num_rows = 100 + nulls = [None] * num_rows + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == num_rows + + +def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): + """Test that a DataFrame with required columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/8093 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) + job_config = bigquery.LoadJobConfig(schema=table_schema) + table_id = "{}.{}.load_table_from_dataframe_w_required".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == 2 + + +def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): + # Schema with all scalar types. + # TODO: Uploading DATETIME columns currently fails, thus that field type + # is temporarily removed from the test. + # See: + # https://github.com/googleapis/python-bigquery/issues/61 + # https://issuetracker.google.com/issues/151765076 + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + ["POINT(30 10)", None, "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ) + ) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert table.schema == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", ["abc", None, "def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) + df_data = collections.OrderedDict( + [ + ( + "float_col", + [ + 0.14285714285714285, + 0.51428571485748, + 0.87128748, + 1.807960649, + 2.0679610649, + 2.4406779661016949, + 3.7148514257, + 3.8571428571428572, + 1.51251252e40, + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + rows = bigquery_client.list_rows(table_id) + floats = [r.values()[0] for r in rows] + assert tuple(table.schema) == table_schema + assert table.num_rows == 9 + assert floats == df_data["float_col"] + + +def test_query_results_to_dataframe(bigquery_client): + QUERY = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + df = bigquery_client.query(QUERY).result().to_dataframe() + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names # verify the column names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for _, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_query_results_to_dataframe_w_bqstorage(bigquery_client): + query = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + df = bigquery_client.query(query).result().to_dataframe(bqstorage_client) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_insert_rows_from_dataframe(bigquery_client, dataset_id): + SF = bigquery.SchemaField + schema = [ + SF("float_col", "FLOAT", mode="REQUIRED"), + SF("int_col", "INTEGER", mode="REQUIRED"), + SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("string_col", "STRING", mode="NULLABLE"), + ] + + dataframe = pandas.DataFrame( + [ + { + "float_col": 1.11, + "bool_col": True, + "string_col": "my string", + "int_col": 10, + }, + { + "float_col": 2.22, + "bool_col": False, + "string_col": "another string", + "int_col": 20, + }, + { + "float_col": 3.33, + "bool_col": False, + "string_col": "another string", + "int_col": 30, + }, + { + "float_col": 4.44, + "bool_col": True, + "string_col": "another string", + "int_col": 40, + }, + { + "float_col": 5.55, + "bool_col": False, + "string_col": "another string", + "int_col": 50, + }, + { + "float_col": 6.66, + "bool_col": True, + # Include a NaN value, because pandas often uses NaN as a + # NULL value indicator. + "string_col": float("NaN"), + "int_col": 60, + }, + ] + ) + + table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" + table_arg = bigquery.Table(table_id, schema=schema) + table = helpers.retry_403(bigquery_client.create_table)(table_arg) + + chunk_errors = bigquery_client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + for errors in chunk_errors: + assert not errors + + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer. + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] + expected = [ + # Pandas often represents NULL values as NaN. Convert to None for + # easier comparison. + tuple(None if col != col else col for col in data_row) + for data_row in dataframe.itertuples(index=False) + ] + + assert len(row_tuples) == len(expected) + + for row, expected_row in zip(row_tuples, expected): + assert ( + # Use Counter to verify the same number of values in each, because + # column order does not matter. + collections.Counter(row) + == collections.Counter(expected_row) + ) + + +def test_nested_table_to_dataframe(bigquery_client, dataset_id): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + SF = bigquery.SchemaField + schema = [ + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), + SF("bigfloat_col", "FLOAT", mode="NULLABLE"), + SF("smallfloat_col", "FLOAT", mode="NULLABLE"), + ] + record = { + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, + } + to_insert = [ + { + "string_col": "Some value", + "record_col": record, + "bigfloat_col": 3.14, + "smallfloat_col": 2.72, + } + ] + rows = [json.dumps(row) for row in to_insert] + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + table_id = f"{bigquery_client.project}.{dataset_id}.test_nested_table_to_dataframe" + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + # Load a table using a local JSON file from memory. + bigquery_client.load_table_from_file(body, table_id, job_config=job_config).result() + + df = bigquery_client.list_rows(table_id, selected_fields=schema).to_dataframe( + dtypes={"smallfloat_col": "float16"} + ) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] + assert list(df) == exp_columns # verify the column names + row = df.iloc[0] + # verify the row content + assert row["string_col"] == "Some value" + expected_keys = tuple(sorted(record.keys())) + row_keys = tuple(sorted(row["record_col"].keys())) + assert row_keys == expected_keys + # Can't compare numpy arrays, which pyarrow encodes the embedded + # repeated column to, so convert to list. + assert list(row["record_col"]["nested_repeated"]) == [0, 1, 2] + # verify that nested data can be accessed with indices/keys + assert row["record_col"]["nested_repeated"][0] == 0 + assert ( + row["record_col"]["nested_record"]["nested_nested_string"] + == "some deep insight" + ) + # verify dtypes + assert df.dtypes["bigfloat_col"].name == "float64" + assert df.dtypes["smallfloat_col"].name == "float16" + + +def test_list_rows_max_results_w_bqstorage(bigquery_client): + table_ref = bigquery.DatasetReference("bigquery-public-data", "utility_us").table( + "country_code_iso" + ) + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + row_iterator = bigquery_client.list_rows( + table_ref, + selected_fields=[bigquery.SchemaField("country_name", "STRING")], + max_results=100, + ) + with pytest.warns( + UserWarning, match="Cannot use bqstorage_client if max_results is set" + ): + dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + assert len(dataframe.index) == 100 From a460f938f1d31c23067a1e09bf6227dd18e92364 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 16 Mar 2021 08:13:39 -0700 Subject: [PATCH 151/341] chore: add pre-commit-config to renovate ignore paths (#552) Disable renovate PRs on the .pre-commit-config.yaml which is templated from synthtool. https://docs.renovatebot.com/configuration-options/#ignorepaths Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Mar 15 09:05:39 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 2c54c473779ea731128cea61a3a6c975a08a5378 Source-Link: https://github.com/googleapis/synthtool/commit/2c54c473779ea731128cea61a3a6c975a08a5378 --- renovate.json | 3 ++- synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/renovate.json b/renovate.json index 4fa949311..f08bc22c9 100644 --- a/renovate.json +++ b/renovate.json @@ -1,5 +1,6 @@ { "extends": [ "config:base", ":preserveSemverRanges" - ] + ], + "ignorePaths": [".pre-commit-config.yaml"] } diff --git a/synth.metadata b/synth.metadata index cab985521..9f81d3045 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" + "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } } ], From 1b946ba23ee7df86114c6acb338ec34e6c92af6d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 16:24:21 +0100 Subject: [PATCH 152/341] fix: remove DB-API dependency on pyarrow with decimal query parameters (#551) * fix: DB API pyarrow dependency with decimal values DB API should gracefully handle the case when the optional pyarrow dependency is not installed. * Blacken DB API helpers tests * Refine the logic for recognizing NUMERIC Decimals --- google/cloud/bigquery/dbapi/_helpers.py | 23 ++++++--- tests/unit/test_dbapi__helpers.py | 66 +++++++++++++++++-------- 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 6b36d6e43..69694c98c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,16 +19,15 @@ import functools import numbers -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions +_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") +_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") + + def scalar_to_query_parameter(value, name=None): """Convert a scalar value into a query parameter. @@ -189,12 +188,20 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. - scalar_object = pyarrow.scalar(value) - if isinstance(scalar_object, pyarrow.Decimal128Scalar): + vtuple = value.as_tuple() + # NUMERIC values have precision of 38 (number of digits) and scale of 9 (number + # of fractional digits), and their max absolute value must be strictly smaller + # than 1.0E+29. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + if ( + len(vtuple.digits) <= 38 # max precision: 38 + and vtuple.exponent >= -9 # max scale: 9 + and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX + ): return "NUMERIC" else: return "BIGNUMERIC" + elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index c28c014d4..9a505c1ec 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,7 +25,6 @@ import google.cloud._helpers from google.cloud.bigquery import table -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -39,9 +38,8 @@ def test_scalar_to_query_parameter(self): (123, "INT64"), (-123456789, "INT64"), (1.25, "FLOAT64"), - (decimal.Decimal("1.25"), "NUMERIC"), (b"I am some bytes", "BYTES"), - (u"I am a string", "STRING"), + ("I am a string", "STRING"), (datetime.date(2017, 4, 1), "DATE"), (datetime.time(12, 34, 56), "TIME"), (datetime.datetime(2012, 3, 4, 5, 6, 7), "DATETIME"), @@ -51,14 +49,17 @@ def test_scalar_to_query_parameter(self): ), "TIMESTAMP", ), + (decimal.Decimal("1.25"), "NUMERIC"), + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max NUMERIC value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max NUMERIC value, despite precision <=38 + ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ( - decimal.Decimal("1.1234567890123456789012345678901234567890"), - "BIGNUMERIC", - ) - ) for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) @@ -71,6 +72,33 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) + def test_decimal_to_query_parameter(self): # TODO: merge with previous test + + expected_types = [ + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max size, even if precision <=38 + ), + ] + + for value, expected_type in expected_types: + msg = f"value: {value} expected_type: {expected_type}" + + parameter = _helpers.scalar_to_query_parameter(value) + self.assertIsNone(parameter.name, msg=msg) + self.assertEqual(parameter.type_, expected_type, msg=msg) + self.assertEqual(parameter.value, value, msg=msg) + + named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") + self.assertEqual(named_parameter.name, "myvar", msg=msg) + self.assertEqual(named_parameter.type_, expected_type, msg=msg) + self.assertEqual(named_parameter.value, value, msg=msg) + def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) @@ -89,8 +117,9 @@ def test_array_to_query_parameter_valid_argument(self): ([123, -456, 0], "INT64"), ([1.25, 2.50], "FLOAT64"), ([decimal.Decimal("1.25")], "NUMERIC"), + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"), ([b"foo", b"bar"], "BYTES"), - ([u"foo", u"bar"], "STRING"), + (["foo", "bar"], "STRING"), ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), ([datetime.time(12, 34, 56), datetime.time(10, 20, 30)], "TIME"), ( @@ -113,11 +142,6 @@ def test_array_to_query_parameter_valid_argument(self): ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") - ) - for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) @@ -134,7 +158,7 @@ def test_array_to_query_parameter_empty_argument(self): _helpers.array_to_query_parameter([]) def test_array_to_query_parameter_unsupported_sequence(self): - unsupported_iterables = [{10, 20, 30}, u"foo", b"bar", bytearray([65, 75, 85])] + unsupported_iterables = [{10, 20, 30}, "foo", b"bar", bytearray([65, 75, 85])] for iterable in unsupported_iterables: with self.assertRaises(exceptions.ProgrammingError): _helpers.array_to_query_parameter(iterable) @@ -144,7 +168,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): _helpers.array_to_query_parameter([object(), 2, 7]) def test_to_query_parameters_w_dict(self): - parameters = {"somebool": True, "somestring": u"a-string-value"} + parameters = {"somebool": True, "somestring": "a-string-value"} query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: @@ -154,7 +178,7 @@ def test_to_query_parameters_w_dict(self): sorted( [ ("somebool", "BOOL", True), - ("somestring", "STRING", u"a-string-value"), + ("somestring", "STRING", "a-string-value"), ] ), ) @@ -177,14 +201,14 @@ def test_to_query_parameters_w_dict_dict_param(self): _helpers.to_query_parameters(parameters) def test_to_query_parameters_w_list(self): - parameters = [True, u"a-string-value"] + parameters = [True, "a-string-value"] query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) self.assertSequenceEqual( sorted(query_parameter_tuples), - sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]), + sorted([(None, "BOOL", True), (None, "STRING", "a-string-value")]), ) def test_to_query_parameters_w_list_array_param(self): From af542f275aa5c09d34fd75cf172c3dd1db1520a4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 16 Mar 2021 15:36:02 +0000 Subject: [PATCH 153/341] chore: release 2.12.0 (#547) :robot: I have created a release \*beep\* \*boop\* --- ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) ### Features * make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) ### Bug Fixes * remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 512d38108..e5ed7bc9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) + + +### Features + +* make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) + + +### Bug Fixes + +* remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) + ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index e6e357434..67e043bde 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.11.0" +__version__ = "2.12.0" From 4b14a29503987fa36d8c796508f080c64bca3eab Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 18:26:04 +0100 Subject: [PATCH 154/341] test: remove duplicate test for Decimal scalars (#553) --- tests/unit/test_dbapi__helpers.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 9a505c1ec..4b2724de0 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -72,33 +72,6 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) - def test_decimal_to_query_parameter(self): # TODO: merge with previous test - - expected_types = [ - (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), - (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value - (decimal.Decimal("1.123456789"), "NUMERIC"), - (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 - (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), - ( - decimal.Decimal("12345678901234567890123456789012345678"), - "BIGNUMERIC", # larger than max size, even if precision <=38 - ), - ] - - for value, expected_type in expected_types: - msg = f"value: {value} expected_type: {expected_type}" - - parameter = _helpers.scalar_to_query_parameter(value) - self.assertIsNone(parameter.name, msg=msg) - self.assertEqual(parameter.type_, expected_type, msg=msg) - self.assertEqual(parameter.value, value, msg=msg) - - named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") - self.assertEqual(named_parameter.name, "myvar", msg=msg) - self.assertEqual(named_parameter.type_, expected_type, msg=msg) - self.assertEqual(named_parameter.value, value, msg=msg) - def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) From 7447f05f6a92bc4c047dff9d0377598b7af15f18 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 16 Mar 2021 18:49:35 +0100 Subject: [PATCH 155/341] chore(deps): update dependency google-cloud-bigquery to v2.12.0 (#554) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 34896627e..ef9264454 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b55d2b3a4..db1c4b66a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From 84e646e6b7087a1626e56ad51eeb130f4ddfa2fb Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Mar 2021 12:54:53 -0500 Subject: [PATCH 156/341] fix: avoid policy tags 403 error in `load_table_from_dataframe` (#557) * WIP: fix: don't set policy tags in load job from dataframe * copy fields parameter for struct support * update tests to allow missing description property * fix load from dataframe test on python 3.6 Also, check that sent schema matches DataFrame order, not table order --- google/cloud/bigquery/client.py | 13 +++- google/cloud/bigquery/schema.py | 43 ++++++----- tests/unit/job/test_load_config.py | 12 +-- tests/unit/test_client.py | 113 +++++++++++++++++++---------- tests/unit/test_external_config.py | 9 +-- tests/unit/test_schema.py | 109 +++++++++++----------------- 6 files changed, 150 insertions(+), 149 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index bdbcb767c..305d60d3b 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2291,9 +2291,18 @@ def load_table_from_dataframe( name for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) ) - # schema fields not present in the dataframe are not needed job_config.schema = [ - field for field in table.schema if field.name in columns_and_indexes + # Field description and policy tags are not needed to + # serialize a data frame. + SchemaField( + field.name, + field.field_type, + mode=field.mode, + fields=field.fields, + ) + # schema fields not present in the dataframe are not needed + for field in table.schema + if field.name in columns_and_indexes ] job_config.schema = _pandas_helpers.dataframe_to_bq_schema( diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 9be27f3e8..680dcc138 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -19,6 +19,7 @@ from google.cloud.bigquery_v2 import types +_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -73,14 +74,18 @@ def __init__( name, field_type, mode="NULLABLE", - description=None, + description=_DEFAULT_VALUE, fields=(), policy_tags=None, ): - self._name = name - self._field_type = field_type - self._mode = mode - self._description = description + self._properties = { + "name": name, + "type": field_type, + } + if mode is not None: + self._properties["mode"] = mode.upper() + if description is not _DEFAULT_VALUE: + self._properties["description"] = description self._fields = tuple(fields) self._policy_tags = policy_tags @@ -98,7 +103,7 @@ def from_api_repr(cls, api_repr): """ # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description") + description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) return cls( @@ -113,7 +118,7 @@ def from_api_repr(cls, api_repr): @property def name(self): """str: The name of the field.""" - return self._name + return self._properties["name"] @property def field_type(self): @@ -122,7 +127,7 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._field_type + return self._properties["type"] @property def mode(self): @@ -131,17 +136,17 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._mode + return self._properties.get("mode") @property def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" - return self._mode == "NULLABLE" + return self.mode == "NULLABLE" @property def description(self): """Optional[str]: description for the field.""" - return self._description + return self._properties.get("description") @property def fields(self): @@ -164,13 +169,7 @@ def to_api_repr(self): Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - # Put together the basic representation. See http://bit.ly/2hOAT5u. - answer = { - "mode": self.mode.upper(), - "name": self.name, - "type": self.field_type.upper(), - "description": self.description, - } + answer = self._properties.copy() # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. @@ -193,10 +192,10 @@ def _key(self): Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( - self._name, - self._field_type.upper(), - self._mode.upper(), - self._description, + self.name, + self.field_type.upper(), + self.mode.upper(), + self.description, self._fields, self._policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index c18f51bff..63f15ec5a 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -434,13 +434,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -449,24 +447,18 @@ def test_schema_setter_fields(self): def test_schema_setter_valid_mappings_list(self): config = self._get_target_class()() - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - full_name_repr = { "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } + schema = [full_name_repr, age_repr] + config.schema = schema self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6c3263ea5..26ef340de 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1596,18 +1596,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1641,18 +1631,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2602,7 +2582,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2613,8 +2593,10 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2647,7 +2629,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2773,13 +2755,24 @@ def test_update_table_w_query(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), ] resource = self._make_table_resource() resource.update( @@ -7658,18 +7651,47 @@ def test_load_table_from_file_w_invalid_job_config(self): def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList, SchemaField client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) + records = [ + {"id": 1, "age": 100, "accounts": [2, 3]}, + {"id": 2, "age": 60, "accounts": [5]}, + {"id": 3, "age": 40, "accounts": []}, + ] + # Mixup column order so that we can verify sent schema matches the + # serialized order, not the table column order. + column_order = ["age", "accounts", "id"] + dataframe = pandas.DataFrame(records, columns=column_order) + table_fields = { + "id": SchemaField( + "id", + "INTEGER", + mode="REQUIRED", + description="integer column", + policy_tags=PolicyTagList(names=("foo", "bar")), + ), + "age": SchemaField( + "age", + "INTEGER", + mode="NULLABLE", + description="age column", + policy_tags=PolicyTagList(names=("baz",)), + ), + "accounts": SchemaField( + "accounts", "INTEGER", mode="REPEATED", description="array column", + ), + } + get_table_schema = [ + table_fields["id"], + table_fields["age"], + table_fields["accounts"], + ] get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, - return_value=mock.Mock( - schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] - ), + return_value=mock.Mock(schema=get_table_schema), ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -7695,8 +7717,21 @@ def test_load_table_from_dataframe(self): sent_file = load_table_from_file.mock_calls[0][1][1] assert sent_file.closed - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET + sent_config = load_table_from_file.mock_calls[0][2]["job_config"].to_api_repr()[ + "load" + ] + assert sent_config["sourceFormat"] == job.SourceFormat.PARQUET + for field_index, field in enumerate(sent_config["schema"]["fields"]): + assert field["name"] == column_order[field_index] + table_field = table_fields[field["name"]] + assert field["name"] == table_field.name + assert field["type"] == table_field.field_type + assert field["mode"] == table_field.mode + assert len(field.get("fields", [])) == len(table_field.fields) + # Omit unnecessary fields when they come from getting the table + # (not passed in via job_config) + assert "description" not in field + assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 4b6ef5118..4ca2e9012 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -77,14 +77,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 71bf6b5ae..87baaf379 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -35,19 +35,19 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): field = self._make_one("test", "STRING") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(field.fields, ()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -55,13 +55,13 @@ def test_constructor_subfields(self): field = self._make_one( "phone_number", "RECORD", fields=[sub_field1, sub_field2] ) - self.assertEqual(field._name, "phone_number") - self.assertEqual(field._field_type, "RECORD") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(len(field._fields), 2) - self.assertIs(field._fields[0], sub_field1) - self.assertIs(field._fields[1], sub_field2) + self.assertEqual(field.name, "phone_number") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(len(field.fields), 2) + self.assertEqual(field.fields[0], sub_field1) + self.assertEqual(field.fields[1], sub_field2) def test_constructor_with_policy_tags(self): from google.cloud.bigquery.schema import PolicyTagList @@ -70,12 +70,12 @@ def test_constructor_with_policy_tags(self): field = self._make_one( "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy ) - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) - self.assertEqual(field._policy_tags, policy) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, policy) def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -92,7 +92,6 @@ def test_to_api_repr(self): "mode": "NULLABLE", "name": "foo", "type": "INTEGER", - "description": None, "policyTags": {"names": ["foo", "bar"]}, }, ) @@ -104,18 +103,10 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "description": None, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, - "description": None, }, ) @@ -168,17 +159,17 @@ def test_from_api_repr_defaults(self): def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") - self.assertIs(schema_field.name, name) + self.assertEqual(schema_field.name, name) def test_field_type_property(self): field_type = "BOOLEAN" schema_field = self._make_one("whether", field_type) - self.assertIs(schema_field.field_type, field_type) + self.assertEqual(schema_field.field_type, field_type) def test_mode_property(self): mode = "REPEATED" schema_field = self._make_one("again", "FLOAT", mode=mode) - self.assertIs(schema_field.mode, mode) + self.assertEqual(schema_field.mode, mode) def test_is_nullable(self): mode = "NULLABLE" @@ -193,14 +184,14 @@ def test_is_not_nullable(self): def test_description_property(self): description = "It holds some data." schema_field = self._make_one("do", "TIMESTAMP", description=description) - self.assertIs(schema_field.description, description) + self.assertEqual(schema_field.description, description) def test_fields_property(self): sub_field1 = self._make_one("one", "STRING") sub_field2 = self._make_one("fish", "INTEGER") fields = (sub_field1, sub_field2) schema_field = self._make_one("boat", "RECORD", fields=fields) - self.assertIs(schema_field.fields, fields) + self.assertEqual(schema_field.fields, fields) def test_to_standard_sql_simple_type(self): sql_type = self._get_standard_sql_data_type_class() @@ -532,17 +523,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} ) def test_w_description(self): @@ -552,7 +536,13 @@ def test_w_description(self): full_name = SchemaField( "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION ) - age = SchemaField("age", "INTEGER", mode="REQUIRED") + age = SchemaField( + "age", + "INTEGER", + mode="REQUIRED", + # Explicitly unset description. + description=None, + ) resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( @@ -581,13 +571,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -595,20 +579,9 @@ def test_w_subfields(self): "name": "phone", "type": "RECORD", "mode": "REPEATED", - "description": None, "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) From 97ee6ec6cd4bc9f833cd506dc6d244d103654cfd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:53:09 -0500 Subject: [PATCH 157/341] fix: avoid overly strict dependency on pyarrow 3.x (#564) Exclude "bignumeric_type" from the "all" extra --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 31b6a3ff7..99d3804ed 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "grpcio >= 1.32.0, < 2.0dev", "pyarrow >= 1.0.0, < 4.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ @@ -61,6 +61,11 @@ all_extras = [] for extra in extras: + # Exclude this extra from all to avoid overly strict dependencies on core + # libraries such as pyarrow. + # https://github.com/googleapis/python-bigquery/issues/563 + if extra in {"bignumeric_type"}: + continue all_extras.extend(extras[extra]) extras["all"] = all_extras From d93986e0259952257f2571f60719b52099c29c0c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:54:39 -0500 Subject: [PATCH 158/341] feat: add `ExternalConfig.connection_id` property to connect to external sources (#560) * feat: add `ExternalConfig.connection_id` property to connect to external sources * add tests * fix unit tests --- google/cloud/bigquery/external_config.py | 17 +++++++++++++++++ tests/unit/test_external_config.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 112dfdba4..59e4960f9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -760,6 +760,23 @@ def schema(self): prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + @property + def connection_id(self): + """Optional[str]: [Experimental] ID of a BigQuery Connection API + resource. + + .. WARNING:: + + This feature is experimental. Pre-GA features may have limited + support, and changes to pre-GA features may not be compatible with + other pre-GA versions. + """ + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value): + self._properties["connectionId"] = value + @schema.setter def schema(self, value): prop = value diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 4ca2e9012..648a8717e 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -74,6 +74,7 @@ def test_to_api_repr_base(self): ec.autodetect = True ec.ignore_unknown_values = False ec.compression = "compression" + ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { @@ -87,10 +88,17 @@ def test_to_api_repr_base(self): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "connectionId": "path/to/connection", "schema": exp_schema, } self.assertEqual(got_resource, exp_resource) + def test_connection_id(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.connection_id) + ec.connection_id = "path/to/connection" + self.assertEqual(ec.connection_id, "path/to/connection") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From d2d532c4949b8e3ca674d90c24daafeaa7bb2bce Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 22 Mar 2021 07:41:26 -0700 Subject: [PATCH 159/341] chore(python): add kokoro configs for periodic builds against head (#565) This change should be non-destructive. Note for library repo maintainers: After applying this change, you can easily add (or change) periodic builds against head by adding config files in google3. See python-pubsub repo for example. Source-Author: Takashi Matsuo Source-Date: Fri Mar 19 11:17:59 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 79c8dd7ee768292f933012d3a69a5b4676404cda Source-Link: https://github.com/googleapis/synthtool/commit/79c8dd7ee768292f933012d3a69a5b4676404cda --- .kokoro/samples/python3.6/periodic-head.cfg | 11 +++ .kokoro/samples/python3.7/periodic-head.cfg | 11 +++ .kokoro/samples/python3.8/periodic-head.cfg | 11 +++ .kokoro/test-samples-against-head.sh | 28 ++++++ .kokoro/test-samples-impl.sh | 102 ++++++++++++++++++++ .kokoro/test-samples.sh | 96 +++--------------- synth.metadata | 11 ++- 7 files changed, 187 insertions(+), 83 deletions(-) create mode 100644 .kokoro/samples/python3.6/periodic-head.cfg create mode 100644 .kokoro/samples/python3.7/periodic-head.cfg create mode 100644 .kokoro/samples/python3.8/periodic-head.cfg create mode 100755 .kokoro/test-samples-against-head.sh create mode 100755 .kokoro/test-samples-impl.sh diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.6/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.7/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.8/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh new file mode 100755 index 000000000..689948a23 --- /dev/null +++ b/.kokoro/test-samples-against-head.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A customized test runner for samples. +# +# For periodic builds, you can specify this file for testing against head. + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +cd github/python-bigquery + +exec .kokoro/test-samples-impl.sh diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh new file mode 100755 index 000000000..cf5de74c1 --- /dev/null +++ b/.kokoro/test-samples-impl.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Install nox +python3.6 -m pip install --upgrade --quiet nox + +# Use secrets acessor service account to get secrets +if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then + gcloud auth activate-service-account \ + --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ + --project="cloud-devrel-kokoro-resources" +fi + +# This script will create 3 files: +# - testing/test-env.sh +# - testing/service-account.json +# - testing/client-secrets.json +./scripts/decrypt-secrets.sh + +source ./testing/test-env.sh +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json + +# For cloud-run session, we activate the service account for gcloud sdk. +gcloud auth activate-service-account \ + --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" + +export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json + +echo -e "\n******************** TESTING PROJECTS ********************" + +# Switch to 'fail at end' to allow all tests to complete before exiting. +set +e +# Use RTN to return a non-zero value if the test fails. +RTN=0 +ROOT=$(pwd) +# Find all requirements.txt in the samples directory (may break on whitespace). +for file in samples/**/requirements.txt; do + cd "$ROOT" + # Navigate to the project folder. + file=$(dirname "$file") + cd "$file" + + echo "------------------------------------------------------------" + echo "- testing $file" + echo "------------------------------------------------------------" + + # Use nox to execute the tests for the project. + python3.6 -m nox -s "$RUN_TESTS_SESSION" + EXIT=$? + + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + fi + + if [[ $EXIT -ne 0 ]]; then + RTN=1 + echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" + else + echo -e "\n Testing completed.\n" + fi + +done +cd "$ROOT" + +# Workaround for Kokoro permissions issue: delete secrets +rm testing/{test-env.sh,client-secrets.json,service-account.json} + +exit "$RTN" diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 3ce8994cb..62ef534cd 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# The default test runner for samples. +# +# For periodic builds, we rewinds the repo to the latest release, and +# run test-samples-impl.sh. # `-e` enables the script to automatically fail when a command fails # `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero @@ -24,87 +28,19 @@ cd github/python-bigquery # Run periodic samples tests at latest release if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + # preserving the test runner implementation. + cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh" + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + echo "Now we rewind the repo back to the latest release..." LATEST_RELEASE=$(git describe --abbrev=0 --tags) git checkout $LATEST_RELEASE -fi - -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" - exit 0 -fi - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Install nox -python3.6 -m pip install --upgrade --quiet nox - -# Use secrets acessor service account to get secrets -if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then - gcloud auth activate-service-account \ - --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ - --project="cloud-devrel-kokoro-resources" -fi - -# This script will create 3 files: -# - testing/test-env.sh -# - testing/service-account.json -# - testing/client-secrets.json -./scripts/decrypt-secrets.sh - -source ./testing/test-env.sh -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json - -# For cloud-run session, we activate the service account for gcloud sdk. -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" - -export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json - -echo -e "\n******************** TESTING PROJECTS ********************" - -# Switch to 'fail at end' to allow all tests to complete before exiting. -set +e -# Use RTN to return a non-zero value if the test fails. -RTN=0 -ROOT=$(pwd) -# Find all requirements.txt in the samples directory (may break on whitespace). -for file in samples/**/requirements.txt; do - cd "$ROOT" - # Navigate to the project folder. - file=$(dirname "$file") - cd "$file" - - echo "------------------------------------------------------------" - echo "- testing $file" - echo "------------------------------------------------------------" - - # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" - EXIT=$? - - # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. - if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot - $KOKORO_GFILE_DIR/linux_amd64/flakybot + echo "The current head is: " + echo $(git rev-parse --verify HEAD) + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + # move back the test runner implementation if there's no file. + if [ ! -f .kokoro/test-samples-impl.sh ]; then + cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh fi +fi - if [[ $EXIT -ne 0 ]]; then - RTN=1 - echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" - else - echo -e "\n Testing completed.\n" - fi - -done -cd "$ROOT" - -# Workaround for Kokoro permissions issue: delete secrets -rm testing/{test-env.sh,client-secrets.json,service-account.json} - -exit "$RTN" +exec .kokoro/test-samples-impl.sh diff --git a/synth.metadata b/synth.metadata index 9f81d3045..2425b03fb 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" + "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } } ], @@ -74,16 +74,21 @@ ".kokoro/samples/lint/presubmit.cfg", ".kokoro/samples/python3.6/common.cfg", ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic-head.cfg", ".kokoro/samples/python3.6/periodic.cfg", ".kokoro/samples/python3.6/presubmit.cfg", ".kokoro/samples/python3.7/common.cfg", ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic-head.cfg", ".kokoro/samples/python3.7/periodic.cfg", ".kokoro/samples/python3.7/presubmit.cfg", ".kokoro/samples/python3.8/common.cfg", ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic-head.cfg", ".kokoro/samples/python3.8/periodic.cfg", ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples-against-head.sh", + ".kokoro/test-samples-impl.sh", ".kokoro/test-samples.sh", ".kokoro/trampoline.sh", ".kokoro/trampoline_v2.sh", From 8f274e8fad7308eca09c055d17d31f58fdc86909 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 22 Mar 2021 09:49:45 -0500 Subject: [PATCH 160/341] chore: release 2.13.0 (#568) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5ed7bc9d..9cdcdf5fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) + + +### Features + +* add `ExternalConfig.connection_id` property to connect to external sources ([#560](https://www.github.com/googleapis/python-bigquery/issues/560)) ([d93986e](https://www.github.com/googleapis/python-bigquery/commit/d93986e0259952257f2571f60719b52099c29c0c)) + + +### Bug Fixes + +* avoid overly strict dependency on pyarrow 3.x ([#564](https://www.github.com/googleapis/python-bigquery/issues/564)) ([97ee6ec](https://www.github.com/googleapis/python-bigquery/commit/97ee6ec6cd4bc9f833cd506dc6d244d103654cfd)) +* avoid policy tags 403 error in `load_table_from_dataframe` ([#557](https://www.github.com/googleapis/python-bigquery/issues/557)) ([84e646e](https://www.github.com/googleapis/python-bigquery/commit/84e646e6b7087a1626e56ad51eeb130f4ddfa2fb)) + ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 67e043bde..b6000e20f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.12.0" +__version__ = "2.13.0" From a3edb8b921e029e2c03d33302d408ad5d4e9d4ad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Mar 2021 15:06:24 -0500 Subject: [PATCH 161/341] fix: add ConnectionError to default retry (#571) --- google/cloud/bigquery/retry.py | 8 ++++---- setup.py | 1 + testing/constraints-3.6.txt | 1 + tests/unit/test_retry.py | 9 +++++++++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 4bc4b757f..20a8e7b13 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +import requests.exceptions _RETRYABLE_REASONS = frozenset( @@ -21,9 +22,11 @@ ) _UNSTRUCTURED_RETRYABLE_TYPES = ( + ConnectionError, exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ConnectionError, ) @@ -33,10 +36,7 @@ def _should_retry(exc): We retry if and only if the 'reason' is 'backendError' or 'rateLimitExceeded'. """ - if not hasattr(exc, "errors"): - return False - - if len(exc.errors) == 0: + if not hasattr(exc, "errors") or len(exc.errors) == 0: # Check for unstructured error returns, e.g. from GFE return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) diff --git a/setup.py b/setup.py index 99d3804ed..12a9bde31 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "google-resumable-media >= 0.6.0, < 2.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "requests >= 2.18.0, < 3.0.0dev", ] extras = { "bqstorage": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index c4a5c51be..322373eba 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -17,5 +17,6 @@ pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==1.0.0 +requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index d9f867cb3..318a54d34 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -15,6 +15,7 @@ import unittest import mock +import requests.exceptions class Test_should_retry(unittest.TestCase): @@ -42,6 +43,14 @@ def test_w_rateLimitExceeded(self): exc = mock.Mock(errors=[{"reason": "rateLimitExceeded"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_connectionerror(self): + exc = ConnectionError() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_connectionerror(self): + exc = requests.exceptions.ConnectionError() + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From 0fc3a55eb3051ed114f8b3d3d8cdec054994cd84 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 23 Mar 2021 21:06:52 +0100 Subject: [PATCH 162/341] chore(deps): update dependency google-cloud-bigquery to v2.13.0 (#570) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ef9264454..c5f60911e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index db1c4b66a..abbe6fde4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From e175d3a26f68e1bc5148bf055089dbfc1b83c76a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:20:07 +0000 Subject: [PATCH 163/341] chore: release 2.13.1 (#572) :robot: I have created a release \*beep\* \*boop\* --- ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ### Bug Fixes * add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cdcdf5fb..5dc2c8838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) + + +### Bug Fixes + +* add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) + ## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b6000e20f..2330d0c2c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.0" +__version__ = "2.13.1" From 907a1e08007d6f71ddec3a2259631cf476f7d311 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 31 Mar 2021 08:00:39 -0700 Subject: [PATCH 164/341] chore(deps): update precommit hook pycqa/flake8 to v3.9.0 (#574) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pycqa/flake8](https://gitlab.com/pycqa/flake8) | repository | minor | `3.8.4` -> `3.9.0` | --- ### Release Notes
pycqa/flake8 ### [`v3.9.0`](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0) [Compare Source](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Author: WhiteSource Renovate Source-Date: Tue Mar 23 17:38:03 2021 +0100 Source-Repo: googleapis/synthtool Source-Sha: f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 Source-Link: https://github.com/googleapis/synthtool/commit/f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 --- .pre-commit-config.yaml | 2 +- synth.metadata | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9024b15d..32302e488 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.0 hooks: - id: flake8 diff --git a/synth.metadata b/synth.metadata index 2425b03fb..3b34bf519 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" + "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } } ], From 1756f404c5201645bedda43d5cf06d469acd30c0 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:01:09 +0200 Subject: [PATCH 165/341] chore(deps): update dependency google-auth-oauthlib to v0.4.4 (#578) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index abbe6fde4..9f6073c8f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.3 +google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From b425f7ccb9f67224a309924896d2faf611c633c9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:38:03 +0200 Subject: [PATCH 166/341] chore(deps): update dependency matplotlib to v3.4.1 (#576) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | `==3.3.4` -> `==3.4.1` | [![age](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/compatibility-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/confidence-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
matplotlib/matplotlib ### [`v3.4.1`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.1) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.4.0...v3.4.1) This is the first bugfix release of the 3.4.x series. This release contains several critical bug-fixes: - fix errorbar when specifying fillstyle - fix Inkscape cleanup at exit on Windows for tests - fix legends of colour-mapped scatter plots - fix positioning of annotation fancy arrows - fix size and color rendering for 3D scatter plots - fix suptitle manual positioning when using constrained layout - respect antialiasing settings in cairo backends as well ### [`v3.4.0`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.0) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.3.4...v3.4.0) Highlights of this release include: - Figure and Axes creation / management - New subfigure functionality - Single-line string notation for `subplot_mosaic` - Changes to behavior of Axes creation methods (`gca`, `add_axes`, `add_subplot`) - `add_subplot`/`add_axes` gained an _axes_class_ parameter - Subplot and subplot2grid can now work with constrained layout - Plotting methods - `axline` supports transform parameter - New automatic labeling for bar charts - A list of hatches can be specified to `bar` and `barh` - Setting `BarContainer` orientation - Contour plots now default to using `ScalarFormatter` - `Axes.errorbar` cycles non-color properties correctly - `errorbar` _errorevery_ parameter matches _markevery_ - `hexbin` supports data reference for _C_ parameter - Support callable for formatting of Sankey labels - `Axes.spines` access shortcuts - New `stairs` method and `StepPatch` artist - Added _orientation_ parameter for stem plots - Angles on Bracket arrow styles - `TickedStroke` patheffect - Colors and colormaps - Collection color specification and mapping - Transparency (alpha) can be set as an array in collections - pcolormesh has improved transparency handling by enabling snapping - IPython representations for Colormap objects - `Colormap.set_extremes` and `Colormap.with_extremes` - Get under/over/bad colors of Colormap objects - New `cm.unregister_cmap` function - New `CenteredNorm` for symmetrical data around a center - New `FuncNorm` for arbitrary normalizations - GridSpec-based colorbars can now be positioned above or to the left of the main axes - Titles, ticks, and labels - supxlabel and supylabel - Shared-axes `subplots` tick label visibility is now correct for top or left labels - An iterable object with labels can be passed to `Axes.plot` - Fonts and Text - Text transform can rotate text direction - `matplotlib.mathtext` now supports _overset_ and _underset_ LaTeX symbols - _math_fontfamily_ parameter to change `Text` font family - `TextArea`/`AnchoredText` support _horizontalalignment_ - PDF supports URLs on Text artists - rcParams improvements - New rcParams for dates: set converter and whether to use interval_multiples - Date formatters now respect _usetex_ rcParam - Setting _image.cmap_ to a Colormap - Tick and tick label colors can be set independently using rcParams - 3D Axes improvements - Errorbar method in 3D Axes - Stem plots in 3D Axes - 3D Collection properties are now modifiable - Panning in 3D Axes - Interactive tool improvements - New `RangeSlider` widget - Sliders can now snap to arbitrary values - Pausing and Resuming Animations - Sphinx extensions - `plot_directive` _caption_ option - Backend-specific improvements - Consecutive rasterized draws now merged - Support raw/rgba frame format in `FFMpegFileWriter` - nbAgg/WebAgg support middle-click and double-click - nbAgg support binary communication - Indexed color for PNG images in PDF files when possible - Improved font subsettings in PDF/PS - Kerning added to strings in PDFs - Fully-fractional HiDPI in QtAgg - wxAgg supports fullscreen toggle
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9f6073c8f..6024d7655 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,8 @@ google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.4 +matplotlib==3.3.4; python_version < '3.7' +matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From 14eb2da54ae46c5a0947f04540f8fcb86a2c2cdc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 5 Apr 2021 15:47:58 +0200 Subject: [PATCH 167/341] chore: loosen opentelemetry dependencies (#587) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 12a9bde31..607ffb63f 100644 --- a/setup.py +++ b/setup.py @@ -53,9 +53,9 @@ "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.11b0", - "opentelemetry-sdk==0.11b0", - "opentelemetry-instrumentation==0.11b0", + "opentelemetry-api >= 0.11b0", + "opentelemetry-sdk >= 0.11b0", + "opentelemetry-instrumentation >= 0.11b0", ], } From c1195147a6e9220f26558a301427dd447646da3a Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 6 Apr 2021 04:26:17 -0600 Subject: [PATCH 168/341] chore: use gcp-sphinx-docfx-yaml (#584) Porting change in https://github.com/googleapis/synthtool/pull/1011 --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index df36d237e..a738d8c00 100644 --- a/noxfile.py +++ b/noxfile.py @@ -275,7 +275,7 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From 973e23649b59973494e5c706504bc833453155a8 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 7 Apr 2021 09:01:43 -0700 Subject: [PATCH 169/341] chore: Add license headers for python config files (#592) Source-Author: Anthonios Partheniou Source-Date: Tue Apr 6 11:32:03 2021 -0400 Source-Repo: googleapis/synthtool Source-Sha: 5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc Source-Link: https://github.com/googleapis/synthtool/commit/5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc --- .pre-commit-config.yaml | 14 ++++++++++++++ docs/conf.py | 13 +++++++++++++ synth.metadata | 6 +++--- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32302e488..8912e9b5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: diff --git a/docs/conf.py b/docs/conf.py index 37e0c46af..fdea01aad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,17 @@ # -*- coding: utf-8 -*- +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # # google-cloud-bigquery documentation build configuration file # diff --git a/synth.metadata b/synth.metadata index 3b34bf519..114359b88 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" + "sha": "c1195147a6e9220f26558a301427dd447646da3a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } } ], From be4961257f077b96b595cfcd6553650bd4c618ad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 7 Apr 2021 20:14:18 +0200 Subject: [PATCH 170/341] chore(deps): update dependency grpcio to v1.37.0 (#596) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6024d7655..734cdf445 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 -grpcio==1.36.1 +grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 8f4c0b84dac3840532d7865247b8ad94b625b897 Mon Sep 17 00:00:00 2001 From: Kevin Deggelman Date: Thu, 8 Apr 2021 07:16:06 -0700 Subject: [PATCH 171/341] docs: update the description of the return value of `_QueryResults.rows()` (#594) Updated the description of the return value of `rows` to be more accurate. --- google/cloud/bigquery/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 42547cd73..495c4effb 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -815,7 +815,7 @@ def total_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: - Optional[int}: Count generated on the server (None until set by the server). + Optional[int]: Count generated on the server (None until set by the server). """ total_rows = self._properties.get("totalRows") if total_rows is not None: @@ -858,7 +858,7 @@ def rows(self): Returns: Optional[List[google.cloud.bigquery.table.Row]]: - Fields describing the schema (None until set by the server). + Rows containing the results of the query. """ return _rows_from_json(self._properties.get("rows", ()), self.schema) From c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 12 Apr 2021 07:20:24 -0600 Subject: [PATCH 172/341] feat: accept DatasetListItem where DatasetReference is accepted (#597) * split out and pytestify list_tables tests. Also, exercise dataset polymorphism in some of the tests. * list_tables now accepts DatasetListItem objects * Get coverage to 100% But why do we run coverage on test code? * lint * Update exception text for DatasetListItem * Bypass opentelemetry tracing in unit tests. * Got rid of opentelemetry tracing checks. They aren't needed. * abstracted dataset-argument handling And applied it to `list_tables` and `list_models`. * Converted list_model tests to pytest and included check for dataset polymorphism * removed unneeded blanl lines. * Made list_routines accept DatasetListItem and conveted list_routines tests to pytest. * create_dataset accepts DatasetListItem Also converted create_dataset tests to pytest. (And fixed some long lines.) * Converted list_routine tests to pytest * include string dataset representation in dataset polymorphism. * removed some unused imports * Updated delete_dataset tests - Polymorphoc on dataset - pytest * black * lint * We don't actually need to avoid opentelemetry And a 3.6 test dependened on it. * fixed docstrings to include DatasetListItem in dataset polymorphic APIs. --- google/cloud/bigquery/client.py | 61 +- tests/unit/conftest.py | 23 + tests/unit/helpers.py | 49 + tests/unit/test_client.py | 3266 +++++++++++------------------ tests/unit/test_create_dataset.py | 349 +++ tests/unit/test_delete_dataset.py | 64 + tests/unit/test_list_models.py | 72 + tests/unit/test_list_routines.py | 75 + tests/unit/test_list_tables.py | 145 ++ 9 files changed, 2005 insertions(+), 2099 deletions(-) create mode 100644 tests/unit/conftest.py create mode 100644 tests/unit/test_create_dataset.py create mode 100644 tests/unit/test_delete_dataset.py create mode 100644 tests/unit/test_list_models.py create mode 100644 tests/unit/test_list_routines.py create mode 100644 tests/unit/test_list_tables.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 305d60d3b..10127e10d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -449,6 +449,22 @@ def _create_bqstorage_client(self): return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + def _dataset_from_arg(self, dataset): + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + if isinstance(dataset, DatasetListItem): + dataset = dataset.reference + else: + raise TypeError( + "dataset must be a Dataset, DatasetReference, DatasetListItem," + " or string" + ) + return dataset + def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None ): @@ -461,6 +477,7 @@ def create_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. @@ -491,10 +508,7 @@ def create_dataset( >>> dataset = client.create_dataset(dataset) """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) + dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): dataset = Dataset(dataset) @@ -1133,6 +1147,7 @@ def list_models( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose models to list from the @@ -1160,13 +1175,7 @@ def list_models( :class:`~google.cloud.bigquery.model.Model` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") + dataset = self._dataset_from_arg(dataset) path = "%s/models" % dataset.path span_attributes = {"path": path} @@ -1210,6 +1219,7 @@ def list_routines( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose routines to list from the @@ -1237,14 +1247,7 @@ def list_routines( :class:`~google.cloud.bigquery.routine.Routine`s contained within the requested dataset, limited by ``max_results``. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "{}/routines".format(dataset.path) span_attributes = {"path": path} @@ -1288,6 +1291,7 @@ def list_tables( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose tables to list from the @@ -1315,14 +1319,7 @@ def list_tables( :class:`~google.cloud.bigquery.table.TableListItem` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "%s/tables" % dataset.path span_attributes = {"path": path} @@ -1365,6 +1362,7 @@ def delete_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset to delete. If a string is passed @@ -1384,14 +1382,7 @@ def delete_dataset( Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset or a DatasetReference") - + dataset = self._dataset_from_arg(dataset) params = {} path = dataset.path if delete_contents: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 000000000..07fc9b4ad --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,23 @@ +import pytest + +from .helpers import make_client + + +@pytest.fixture +def client(): + yield make_client() + + +@pytest.fixture +def PROJECT(): + yield "PROJECT" + + +@pytest.fixture +def DS_ID(): + yield "DATASET_ID" + + +@pytest.fixture +def LOCATION(): + yield "us-central" diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index b51b0bbb7..67aeaca35 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import google.cloud.bigquery.client +import google.cloud.bigquery.dataset +import mock +import pytest + def make_connection(*responses): import google.cloud.bigquery._http @@ -31,3 +36,47 @@ def _to_pyarrow(value): import pyarrow return pyarrow.array([value])[0] + + +def make_client(project="PROJECT", **kw): + credentials = mock.Mock(spec=google.auth.credentials.Credentials) + return google.cloud.bigquery.client.Client(project, credentials, **kw) + + +def make_dataset_reference_string(project, ds_id): + return f"{project}.{ds_id}" + + +def make_dataset(project, ds_id): + return google.cloud.bigquery.dataset.Dataset( + google.cloud.bigquery.dataset.DatasetReference(project, ds_id) + ) + + +def make_dataset_list_item(project, ds_id): + return google.cloud.bigquery.dataset.DatasetListItem( + dict(datasetReference=dict(projectId=project, datasetId=ds_id)) + ) + + +def identity(x): + return x + + +def get_reference(x): + return x.reference + + +dataset_like = [ + (google.cloud.bigquery.dataset.DatasetReference, identity), + (make_dataset, identity), + (make_dataset_list_item, get_reference), + ( + make_dataset_reference_string, + google.cloud.bigquery.dataset.DatasetReference.from_string, + ), +] + +dataset_polymorphic = pytest.mark.parametrize( + "make_dataset,get_reference", dataset_like +) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 26ef340de..96e51678f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -856,2505 +856,1643 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - def test_create_dataset_minimal(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_routine_w_minimal_resource(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - } creds = _make_credentials() + path = "/projects/test-routine-project/datasets/test_routines/routines" + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) + conn = client._connection = make_connection(resource) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + actual_routine = client.create_routine(routine, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % PATH, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - }, - timeout=7.5, + method="POST", path=path, data=resource, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, RoutineReference.from_string(full_routine_id) ) - def test_create_dataset_w_attrs(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry + def test_create_routine_w_conflict(self): + from google.cloud.bigquery.routine import Routine - PATH = "projects/%s/datasets" % self.PROJECT - DESCRIPTION = "DESC" - FRIENDLY_NAME = "FN" - LOCATION = "US" - USER_EMAIL = "phred@example.com" - LABELS = {"color": "red"} - VIEW = { - "projectId": "my-proj", - "datasetId": "starry-skies", - "tableId": "northern-hemisphere", + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_routine(routine) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "labels": LABELS, - "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") + def test_span_status_is_set(self): + from google.cloud.bigquery.routine import Routine + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_routine(routine) + + span_list = memory_exporter.get_finished_spans() + self.assertTrue(span_list[0].status is not None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + def test_create_routine_w_conflict_exists_ok(self): + from google.cloud.bigquery.routine import Routine + creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - entries = [ - AccessEntry("OWNER", "userByEmail", USER_EMAIL), - AccessEntry(None, "view", VIEW), - ] + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + path = "/projects/test-routine-project/datasets/test_routines/routines" - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.access_entries = entries - before.description = DESCRIPTION - before.friendly_name = FRIENDLY_NAME - before.default_table_expiration_ms = 3600 - before.location = LOCATION - before.labels = LABELS + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists"), resource + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) + actual_routine = client.create_routine(routine, exists_ok=True) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + final_attributes.assert_called_with( + {"path": "%s/minimal_routine" % path}, client, None + ) + + self.assertEqual(actual_routine.project, "test-routine-project") + self.assertEqual(actual_routine.dataset_id, "test_routines") + self.assertEqual(actual_routine.routine_id, "minimal_routine") + conn.api_request.assert_has_calls( + [ + mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=None, + ), + ] + ) + + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.description, DESCRIPTION) - self.assertEqual(after.friendly_name, FRIENDLY_NAME) - self.assertEqual(after.location, LOCATION) - self.assertEqual(after.default_table_expiration_ms, 3600) - self.assertEqual(after.labels, LABELS) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "access": [ - {"role": "OWNER", "userByEmail": USER_EMAIL}, - {"view": VIEW}, - ], - "labels": LABELS, + "timePartitioning": {"type": "DAY"}, + "labels": {}, }, - timeout=None, + timeout=7.5, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_custom_property(self): + def test_create_table_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" conn = client._connection = make_connection(resource) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before._properties["newAlphaProperty"] = "unreleased property" + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after._properties["newAlphaProperty"], "unreleased property") + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "newAlphaProperty": "unreleased property", "labels": {}, }, timeout=None, ) + self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_wo_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_encryption_configuration(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + from google.cloud.bigquery.table import Table - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_w_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning - PATH = "projects/%s/datasets" % self.PROJECT - OTHER_LOCATION = "EU" - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": OTHER_LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.location = OTHER_LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning(expiration_ms=100) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, OTHER_LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, + "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, - "location": OTHER_LOCATION, }, timeout=None, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(table.time_partitioning.expiration_ms, 100) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_reference(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + }, + "view": {"query": query}, + } ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.view_query = query + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] }, + "view": {"query": query, "useLegacySql": False}, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) - def test_create_dataset_w_fully_qualified_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_external(self): + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, + } + } ) conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig("CSV") + ec.autodetect = True + table.external_data_configuration = ec + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual( + got.external_data_configuration.source_format, SourceFormat.CSV + ) + self.assertEqual(got.external_data_configuration.autodetect, True) - def test_create_dataset_w_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_reference(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(self.TABLE_REF) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_alreadyexists_w_exists_ok_false(self): - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_dataset(self.DS_ID) - - def test_create_dataset_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets".format(self.PROJECT) - get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "{}:{}".format(self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_fully_qualified_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists"), resource - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID, exists_ok=True) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + got = client.create_table( + "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_minimal_resource(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - path = "/projects/test-routine-project/datasets/test_routines/routines" - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, RoutineReference.from_string(full_routine_id) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_conflict(self): - from google.cloud.bigquery.routine import Routine + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_alreadyexists_w_exists_ok_false(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + google.api_core.exceptions.AlreadyExists("table already exists") ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with pytest.raises(google.api_core.exceptions.AlreadyExists): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.create_routine(routine) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, - ) - - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") - def test_span_status_is_set(self): - from google.cloud.bigquery.routine import Routine - - tracer_provider = TracerProvider() - memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) - tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + final_attributes.assert_called_with( + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_routine(routine) - - span_list = memory_exporter.get_finished_spans() - self.assertTrue(span_list[0].status is not None) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, ) - def test_create_routine_w_conflict_exists_ok(self): - from google.cloud.bigquery.routine import Routine - + def test_create_table_alreadyexists_w_exists_ok_true(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + get_path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + resource = self._make_table_resource() creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - path = "/projects/test-routine-project/datasets/test_routines/routines" - + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists"), resource + google.api_core.exceptions.AlreadyExists("table already exists"), resource ) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, exists_ok=True) + got = client.create_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True + ) - final_attributes.assert_called_with( - {"path": "%s/minimal_routine" % path}, client, None - ) + final_attributes.assert_called_with({"path": get_path}, client, None) + + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(actual_routine.project, "test-routine-project") - self.assertEqual(actual_routine.dataset_id, "test_routines") - self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), mock.call( - method="GET", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, timeout=None, ), + mock.call(method="GET", path=get_path, timeout=None), ] ) - def test_create_table_w_day_partition(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + def test_close(self): + creds = _make_credentials() + http = mock.Mock() + http._auth_request.session = mock.Mock() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + client.close() + + http.close.assert_called_once() + http._auth_request.session.close.assert_called_once() + + def test_get_model(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning() + + model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table, timeout=7.5) + got = client.get_model(model_ref, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY"}, - "labels": {}, - }, - timeout=7.5, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.table import Table + self.assertEqual(got.model_id, self.MODEL_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_model_w_string(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + got = client.get_model(model_id) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=None ) - self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") - self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.model_id, self.MODEL_ID) - def test_create_table_w_encryption_configuration(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - from google.cloud.bigquery.table import Table + def test_get_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routines = [ + full_routine_id, + Routine(full_routine_id), + RoutineReference.from_string(full_routine_id), + ] + for routine in routines: + creds = _make_credentials() + resource = { + "etag": "im-an-etag", + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", }, - "labels": {}, - "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_day_partition_and_expire(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + "routineType": "SCALAR_FUNCTION", + } + path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning(expiration_ms=100) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.get_routine(routine, timeout=7.5) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY", "expirationMs": "100"}, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(table.time_partitioning.expiration_ms, 100) - self.assertEqual(got.table_id, self.TABLE_ID) + final_attributes.assert_called_once_with({"path": path}, client, None) - def test_create_table_w_schema_and_query(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, + RoutineReference.from_string(full_routine_id), + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.etag, + "im-an-etag", + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.type_, + "SCALAR_FUNCTION", + msg="routine={}".format(repr(routine)), + ) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) + def test_get_table(self): + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.view_query = query - with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + table = client.get_table(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query, "useLegacySql": False}, - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.schema, schema) - self.assertEqual(got.view_query, query) - - def test_create_table_w_external(self): - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.table import Table + self.assertEqual(table.table_id, self.TABLE_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_table_sets_user_agent(self): creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource.update( - { - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - } - } + http = mock.create_autospec(requests.Session) + mock_response = http.request( + url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - ec = ExternalConfig("CSV") - ec.autodetect = True - table.external_data_configuration = ec - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + http.reset_mock() + http.is_mtls = False + mock_response.status_code = 200 + mock_response.json.return_value = self._make_table_resource() + user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") + client = self._make_one( + project=self.PROJECT, + credentials=creds, + client_info=user_agent_override, + _http=http, ) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - }, - "labels": {}, + client.get_table(self.TABLE_REF) + + expected_user_agent = user_agent_override.to_user_agent() + http.request.assert_called_once_with( + url=mock.ANY, + method="GET", + headers={ + "X-Goog-API-Client": expected_user_agent, + "Accept-Encoding": "gzip", + "User-Agent": expected_user_agent, }, + data=mock.ANY, timeout=None, ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual( - got.external_data_configuration.source_format, SourceFormat.CSV - ) - self.assertEqual(got.external_data_configuration.autodetect, True) - - def test_create_table_w_reference(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) + self.assertIn("my-application/1.2.3", expected_user_agent) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(self.TABLE_REF) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_fully_qualified_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) - ) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_alreadyexists_w_exists_ok_false(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_with( - {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - - def test_create_table_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - get_path = "/projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID - ) - resource = self._make_table_resource() - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists"), resource - ) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True - ) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.table_id, self.TABLE_ID) - - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] - ) - - def test_close(self): - creds = _make_credentials() - http = mock.Mock() - http._auth_request.session = mock.Mock() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - client.close() - - http.close.assert_called_once() - http._auth_request.session.close.assert_called_once() - - def test_get_model(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_ref, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_model_w_string(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_id) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference - - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routines = [ - full_routine_id, - Routine(full_routine_id), - RoutineReference.from_string(full_routine_id), - ] - for routine in routines: - creds = _make_credentials() - resource = { - "etag": "im-an-etag", - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - }, - "routineType": "SCALAR_FUNCTION", - } - path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.get_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path=path, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, - RoutineReference.from_string(full_routine_id), - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.etag, - "im-an-etag", - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.type_, - "SCALAR_FUNCTION", - msg="routine={}".format(repr(routine)), - ) - - def test_get_table(self): - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(table.table_id, self.TABLE_ID) - - def test_get_table_sets_user_agent(self): - creds = _make_credentials() - http = mock.create_autospec(requests.Session) - mock_response = http.request( - url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY - ) - http.reset_mock() - http.is_mtls = False - mock_response.status_code = 200 - mock_response.json.return_value = self._make_table_resource() - user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") - client = self._make_one( - project=self.PROJECT, - credentials=creds, - client_info=user_agent_override, - _http=http, - ) - - client.get_table(self.TABLE_REF) - - expected_user_agent = user_agent_override.to_user_agent() - http.request.assert_called_once_with( - url=mock.ANY, - method="GET", - headers={ - "X-Goog-API-Client": expected_user_agent, - "Accept-Encoding": "gzip", - "User-Agent": expected_user_agent, - }, - data=mock.ANY, - timeout=None, - ) - self.assertIn("my-application/1.2.3", expected_user_agent) - - def test_get_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - BODY = {"options": {"requestedPolicyVersion": 1}} - ETAG = "CARDI" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - RETURNED = { - "resourceId": PATH, - "etag": ETAG, - "version": VERSION, - "bindings": [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ], - } - EXPECTED = { - binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] - } - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - self.assertIsInstance(policy, Policy) - self.assertEqual(policy.etag, RETURNED["etag"]) - self.assertEqual(policy.version, RETURNED["version"]) - self.assertEqual(dict(policy), EXPECTED) - - def test_get_iam_policy_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.get_iam_policy(table_resource_string) - - def test_get_iam_policy_w_invalid_version(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(ValueError): - client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) - - def test_set_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - ETAG = "foo" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - BINDINGS = [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ] - MASK = "bindings,etag" - RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} - - policy = Policy() - for binding in BINDINGS: - policy[binding["role"]] = binding["members"] - - BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - returned_policy = client.set_iam_policy( - self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - self.assertEqual(returned_policy.etag, ETAG) - self.assertEqual(returned_policy.version, VERSION) - self.assertEqual(dict(returned_policy), dict(policy)) - - def test_set_iam_policy_no_mask(self): - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - RETURNED = {"etag": "foo", "version": 1, "bindings": []} - - policy = Policy() - BODY = {"policy": policy.to_api_repr()} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_set_iam_policy_invalid_policy(self): - from google.api_core.iam import Policy - - policy = Policy() - invalid_policy_repr = policy.to_api_repr() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(TypeError): - client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) - - def test_set_iam_policy_w_invalid_table(self): - from google.api_core.iam import Policy - - policy = Policy() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.set_iam_policy(table_resource_string, policy) - - def test_test_iam_permissions(self): - PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - BODY = {"permissions": PERMISSIONS} - RETURNED = {"permissions": PERMISSIONS} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_test_iam_permissions_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - - with self.assertRaises(TypeError): - client.test_iam_permissions(table_resource_string, PERMISSIONS) - - def test_update_dataset_w_invalid_field(self): - from google.cloud.bigquery.dataset import Dataset - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(ValueError): - client.update_dataset( - Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] - ) - - def test_update_dataset(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry - - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - DESCRIPTION = "DESCRIPTION" - FRIENDLY_NAME = "TITLE" - LOCATION = "loc" - LABELS = {"priority": "high"} - ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] - EXP = 17 - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": EXP, - "labels": LABELS, - "access": ACCESS, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE, RESOURCE) - ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - ds.description = DESCRIPTION - ds.friendly_name = FRIENDLY_NAME - ds.location = LOCATION - ds.default_table_expiration_ms = EXP - ds.labels = LABELS - ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] - fields = [ - "description", - "friendly_name", - "location", - "labels", - "access_entries", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={ - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "labels": LABELS, - "access": ACCESS, - }, - path="/" + PATH, - headers=None, - timeout=7.5, - ) - self.assertEqual(ds2.description, ds.description) - self.assertEqual(ds2.friendly_name, ds.friendly_name) - self.assertEqual(ds2.location, ds.location) - self.assertEqual(ds2.labels, ds.labels) - self.assertEqual(ds2.access_entries, ds.access_entries) - - # ETag becomes If-Match header. - ds._properties["etag"] = "etag" - client.update_dataset(ds, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_dataset_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.dataset import Dataset - - path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - dataset._properties["newAlphaProperty"] = "unreleased property" - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.update_dataset(dataset, ["newAlphaProperty"]) - - final_attributes.assert_called_once_with( - {"path": path, "fields": ["newAlphaProperty"]}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={"newAlphaProperty": "unreleased property"}, - path=path, - headers=None, - timeout=None, - ) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - - def test_update_model(self): - from google.cloud.bigquery.model import Model - - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - description = "description" - title = "title" - expires = datetime.datetime( - 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC - ) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - }, - "description": description, - "etag": "etag", - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - model = Model(model_id) - model.description = description - model.friendly_name = title - model.expires = expires - model.labels = {"x": "y"} - fields = ["description", "friendly_name", "labels", "expires"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_model = client.update_model(model, fields, timeout=7.5) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": fields}, client, None - ) - - sent = { - "description": description, - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_model.model_id, model.model_id) - self.assertEqual(updated_model.description, model.description) - self.assertEqual(updated_model.friendly_name, model.friendly_name) - self.assertEqual(updated_model.labels, model.labels) - self.assertEqual(updated_model.expires, model.expires) - - # ETag becomes If-Match header. - model._proto.etag = "etag" - client.update_model(model, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineArgument - - full_routine_id = "routines-project.test_routines.updated_routine" - resource = { - "routineReference": { - "projectId": "routines-project", - "datasetId": "test_routines", - "routineId": "updated_routine", - }, - "routineType": "SCALAR_FUNCTION", - "language": "SQL", - "definitionBody": "x * 3", - "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], - "returnType": None, - "someNewField": "someValue", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - routine = Routine(full_routine_id) - routine.arguments = [ - RoutineArgument( - name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 - ), - ) - ] - routine.body = "x * 3" - routine.language = "SQL" - routine.type_ = "SCALAR_FUNCTION" - routine._properties["someNewField"] = "someValue" - fields = [ - "arguments", - "language", - "body", - "type_", - "return_type", - "someNewField", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.update_routine(routine, fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": fields}, client, None - ) - - # TODO: routineReference isn't needed when the Routines API supports - # partial updates. - sent = resource - conn.api_request.assert_called_once_with( - method="PUT", - data=sent, - path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, - timeout=7.5, - ) - self.assertEqual(actual_routine.arguments, routine.arguments) - self.assertEqual(actual_routine.body, routine.body) - self.assertEqual(actual_routine.language, routine.language) - self.assertEqual(actual_routine.type_, routine.type_) - - # ETag becomes If-Match header. - routine._properties["etag"] = "im-an-etag" - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.update_routine(routine, []) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": []}, client, None - ) - - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - - def test_update_table(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - description = "description" - title = "title" - resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "etag": "etag", - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="New field description" - ), - ] - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - table = Table(self.TABLE_REF, schema=schema) - table.description = description - table.friendly_name = title - table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_table = client.update_table(table, fields, timeout=7.5) - span_path = "/%s" % path + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy - final_attributes.assert_called_once_with( - {"path": span_path, "fields": fields}, client, None + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, ) - - sent = { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_table.description, table.description) - self.assertEqual(updated_table.friendly_name, table.friendly_name) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.labels, table.labels) - # ETag becomes If-Match header. - table._properties["etag"] = "etag" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.update_table(table, []) + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": []}, client, None + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 ) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) - def test_update_table_w_custom_property(self): - from google.cloud.bigquery.table import Table + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["newAlphaProperty"]) + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"newAlphaProperty": "unreleased property"}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual( - updated_table._properties["newAlphaProperty"], "unreleased property" + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) - def test_update_table_only_use_legacy_sql(self): - from google.cloud.bigquery.table import Table + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["view"] = {"useLegacySql": True} + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.view_use_legacy_sql = True + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["view_use_legacy_sql"]) + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, - ) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"view": {"useLegacySql": True}}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_update_table_w_query(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - query = "select fullname, age from person_ages" - location = "EU" - exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) - schema_resource = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "this is a column", - }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, - ] + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset( + Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] + ) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + DESCRIPTION = "DESCRIPTION" + FRIENDLY_NAME = "TITLE" + LOCATION = "loc" + LABELS = {"priority": "high"} + ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] + EXP = 17 + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": EXP, + "labels": LABELS, + "access": ACCESS, } - schema = [ - SchemaField( - "full_name", - "STRING", - mode="REQUIRED", - # Explicitly unset the description. - description=None, - ), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="this is a column" - ), - # Omit the description to not make updates to it. - SchemaField("country", "STRING"), - ] - resource = self._make_table_resource() - resource.update( - { - "schema": schema_resource, - "view": {"query": query, "useLegacySql": True}, - "location": location, - "expirationTime": _millis(exp_time), - } - ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.expires = exp_time - table.view_query = query - table.view_use_legacy_sql = True - updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] + conn = client._connection = make_connection(RESOURCE, RESOURCE) + ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds.labels = LABELS + ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + fields = [ + "description", + "friendly_name", + "location", + "labels", + "access_entries", + ] + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, updated_properties) + ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, + {"path": "/%s" % PATH, "fields": fields}, client, None ) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.view_query, table.view_query) - self.assertEqual(updated_table.expires, table.expires) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - self.assertEqual(updated_table.location, location) - conn.api_request.assert_called_once_with( method="PATCH", - path="/%s" % path, data={ - "view": {"query": query, "useLegacySql": True}, - "expirationTime": str(_millis(exp_time)), - "schema": schema_resource, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "labels": LABELS, + "access": ACCESS, }, + path="/" + PATH, headers=None, - timeout=None, + timeout=7.5, ) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) - def test_update_table_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - resource1 = self._make_table_resource() - resource1.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } - } - ) - resource2 = self._make_table_resource() - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table( - # Test with string for table ID - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ) - ) + # ETag becomes If-Match header. + ds._properties["etag"] = "etag" + client.update_dataset(ds, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) + def test_update_dataset_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.dataset import Dataset - table.schema = None + path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "newAlphaProperty": "unreleased property", + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) + dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + dataset._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + dataset = client.update_dataset(dataset, ["newAlphaProperty"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["schema"]}, client, None + {"path": path, "fields": ["newAlphaProperty"]}, client, None ) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} - self.assertEqual(req[1]["data"], sent) - self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) + conn.api_request.assert_called_once_with( + method="PATCH", + data={"newAlphaProperty": "unreleased property"}, + path=path, + headers=None, + timeout=None, + ) - def test_update_table_delete_property(self): - from google.cloud.bigquery.table import Table + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - description = "description" - title = "title" - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_update_model(self): + from google.cloud.bigquery.model import Model + + path = "projects/%s/datasets/%s/models/%s" % ( self.PROJECT, self.DS_ID, - self.TABLE_ID, + self.MODEL_ID, ) - resource1 = self._make_table_resource() - resource1.update({"description": description, "friendlyName": title}) - resource2 = self._make_table_resource() - resource2["description"] = None + description = "description" + title = "title" + expires = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + }, + "description": description, + "etag": "etag", + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - table = Table(self.TABLE_REF) - table.description = description - table.friendly_name = title - + conn = client._connection = make_connection(resource, resource) + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + model = Model(model_id) + model.description = description + model.friendly_name = title + model.expires = expires + model.labels = {"x": "y"} + fields = ["description", "friendly_name", "labels", "expires"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - table2 = client.update_table(table, ["description", "friendly_name"]) + updated_model = client.update_model(model, fields, timeout=7.5) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, - client, - None, + {"path": "/%s" % path, "fields": fields}, client, None ) - self.assertEqual(table2.description, table.description) - table2.description = None - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table3 = client.update_table(table2, ["description"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description"]}, client, None + sent = { + "description": description, + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } + conn.api_request.assert_called_once_with( + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_model.model_id, model.model_id) + self.assertEqual(updated_model.description, model.description) + self.assertEqual(updated_model.friendly_name, model.friendly_name) + self.assertEqual(updated_model.labels, model.labels) + self.assertEqual(updated_model.expires, model.expires) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - self.assertEqual(req[1]["path"], "/%s" % path) - sent = {"description": None} - self.assertEqual(req[1]["data"], sent) - self.assertIsNone(table3.description) + # ETag becomes If-Match header. + model._proto.etag = "etag" + client.update_model(model, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - def test_list_tables_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + def test_update_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineArgument + + full_routine_id = "routines-project.test_routines.updated_routine" + resource = { + "routineReference": { + "projectId": "routines-project", + "datasetId": "test_routines", + "routineId": "updated_routine", + }, + "routineType": "SCALAR_FUNCTION", + "language": "SQL", + "definitionBody": "x * 3", + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "returnType": None, + "someNewField": "someValue", + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) + conn = client._connection = make_connection(resource, resource) + routine = Routine(full_routine_id) + routine.arguments = [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + routine.body = "x * 3" + routine.language = "SQL" + routine.type_ = "SCALAR_FUNCTION" + routine._properties["someNewField"] = "someValue" + fields = [ + "arguments", + "language", + "body", + "type_", + "return_type", + "someNewField", + ] - dataset = DatasetReference(self.PROJECT, self.DS_ID) - iterator = client.list_tables(dataset, timeout=7.5) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + actual_routine = client.update_routine(routine, fields, timeout=7.5,) - final_attributes.assert_called_once_with({"path": path}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": fields}, client, None + ) - self.assertEqual(tables, []) - self.assertIsNone(token) + # TODO: routineReference isn't needed when the Routines API supports + # partial updates. + sent = resource conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + method="PUT", + data=sent, + path="/projects/routines-project/datasets/test_routines/routines/updated_routine", + headers=None, + timeout=7.5, ) + self.assertEqual(actual_routine.arguments, routine.arguments) + self.assertEqual(actual_routine.body, routine.body) + self.assertEqual(actual_routine.language, routine.language) + self.assertEqual(actual_routine.type_, routine.type_) - def test_list_models_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) - iterator = client.list_models(dataset_id, timeout=7.5) + # ETag becomes If-Match header. + routine._properties["etag"] = "im-an-etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - models = list(page) - token = iterator.next_page_token + client.update_routine(routine, []) - self.assertEqual(models, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": []}, client, None ) - def test_list_models_defaults(self): - from google.cloud.bigquery.model import Model + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - MODEL_1 = "model_one" - MODEL_2 = "model_two" - PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "models": [ - { - "modelReference": { - "modelId": MODEL_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - { - "modelReference": { - "modelId": MODEL_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - ], - } + def test_update_table(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + description = "description" + title = "title" + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "etag": "etag", + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } + ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), + ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_models(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource, resource) + table = Table(self.TABLE_REF, schema=schema) + table.description = description + table.friendly_name = title + table.labels = {"x": "y"} + fields = ["schema", "description", "friendly_name", "labels"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - models = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, fields, timeout=7.5) + span_path = "/%s" % path - self.assertEqual(len(models), len(DATA["models"])) - for found, expected in zip(models, DATA["models"]): - self.assertIsInstance(found, Model) - self.assertEqual(found.model_id, expected["modelReference"]["modelId"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": span_path, "fields": fields}, client, None + ) + sent = { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.labels, table.labels) - def test_list_models_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) - - def test_list_routines_empty_w_timeout(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + # ETag becomes If-Match header. + table._properties["etag"] = "etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + client.update_table(table, []) final_attributes.assert_called_once_with( - {"path": "/projects/test-routines/datasets/test_routines/routines"}, - client, - None, - ) - routines = list(page) - token = iterator.next_page_token - - self.assertEqual(routines, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/test-routines/datasets/test_routines/routines", - query_params={}, - timeout=7.5, + {"path": "/%s" % path, "fields": []}, client, None ) - def test_list_routines_defaults(self): - from google.cloud.bigquery.routine import Routine + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - project_id = "test-routines" - dataset_id = "test_routines" - path = "/projects/test-routines/datasets/test_routines/routines" - routine_1 = "routine_one" - routine_2 = "routine_two" - token = "TOKEN" - resource = { - "nextPageToken": token, - "routines": [ - { - "routineReference": { - "routineId": routine_1, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - { - "routineReference": { - "routineId": routine_2, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - ], - } + def test_update_table_w_custom_property(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" creds = _make_credentials() - client = self._make_one(project=project_id, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = DatasetReference(client.project, dataset_id) + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" - iterator = client.list_routines(dataset) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - routines = list(page) - actual_token = iterator.next_page_token + updated_table = client.update_table(table, ["newAlphaProperty"]) - self.assertEqual(len(routines), len(resource["routines"])) - for found, expected in zip(routines, resource["routines"]): - self.assertIsInstance(found, Routine) - self.assertEqual( - found.routine_id, expected["routineReference"]["routineId"] - ) - self.assertEqual(actual_token, token) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"newAlphaProperty": "unreleased property"}, + headers=None, + timeout=None, + ) + self.assertEqual( + updated_table._properties["newAlphaProperty"], "unreleased property" ) - def test_list_routines_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_routines( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_list_tables_defaults(self): - from google.cloud.bigquery.table import TableListItem - - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "tables": [ - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - ], - } + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["view"] = {"useLegacySql": True} creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.view_use_legacy_sql = True with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, ["view_use_legacy_sql"]) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"view": {"useLegacySql": True}}, + headers=None, + timeout=None, ) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_list_tables_explicit(self): - from google.cloud.bigquery.table import TableListItem + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "tables": [ + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + query = "select fullname, age from person_ages" + location = "EU" + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = { + "fields": [ { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } - + schema = [ + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), + ] + resource = self._make_table_resource() + resource.update( + { + "schema": schema_resource, + "view": {"query": query, "useLegacySql": True}, + "location": location, + "expirationTime": _millis(exp_time), + } + ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables( - # Test with string for dataset ID. - self.DS_ID, - max_results=3, - page_token=TOKEN, - ) - self.assertEqual(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + updated_table = client.update_table(table, updated_properties) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": updated_properties}, client, None, + ) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertIsNone(token) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.location, location) conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"maxResults": 3, "pageToken": TOKEN}, + method="PATCH", + path="/%s" % path, + data={ + "view": {"query": query, "useLegacySql": True}, + "expirationTime": str(_millis(exp_time)), + "schema": schema_resource, + }, + headers=None, timeout=None, ) - def test_list_tables_wrong_type(self): + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } + ) + resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) - - def test_delete_dataset(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import DatasetReference + conn = client._connection = make_connection(resource1, resource2) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table( + # Test with string for table ID + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) + ) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID)) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(*([{}] * len(datasets))) - for arg in datasets: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, timeout=7.5) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH}, client, None - ) + table.schema = None - conn.api_request.assert_called_with( - method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["schema"]) - def test_delete_dataset_delete_contents(self): - from google.cloud.bigquery.dataset import Dataset + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["schema"]}, client, None + ) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}, {}) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - for arg in (ds_ref, Dataset(ds_ref)): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, delete_contents=True) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + sent = {"schema": None} + self.assertEqual(req[1]["data"], sent) + self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(len(updated_table.schema), 0) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "deleteContents": True}, client, None - ) - conn.api_request.assert_called_with( - method="DELETE", - path="/%s" % PATH, - query_params={"deleteContents": "true"}, - timeout=None, - ) + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table - def test_delete_dataset_wrong_type(self): + description = "description" + title = "title" + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update({"description": description, "friendlyName": title}) + resource2 = self._make_table_resource() + resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.delete_dataset( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_delete_dataset_w_not_found_ok_false(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) - - with self.assertRaises(google.api_core.exceptions.NotFound): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(self.DS_ID) + conn = client._connection = make_connection(resource1, resource2) + table = Table(self.TABLE_REF) + table.description = description + table.friendly_name = title - final_attributes.assert_called_once_with({"path": path}, client, None) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table2 = client.update_table(table, ["description", "friendly_name"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, + client, + None, ) - def test_delete_dataset_w_not_found_ok_true(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) + self.assertEqual(table2.description, table.description) + table2.description = None with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.delete_dataset(self.DS_ID, not_found_ok=True) - - final_attributes.assert_called_once_with({"path": path}, client, None) + table3 = client.update_table(table2, ["description"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description"]}, client, None ) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + self.assertEqual(req[1]["path"], "/%s" % path) + sent = {"description": None} + self.assertEqual(req[1]["data"], sent) + self.assertIsNone(table3.description) + def test_delete_model(self): from google.cloud.bigquery.model import Model diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py new file mode 100644 index 000000000..3eb8f1072 --- /dev/null +++ b/tests/unit/test_create_dataset.py @@ -0,0 +1,349 @@ +from google.cloud.bigquery.dataset import Dataset, DatasetReference +from .helpers import make_connection, dataset_polymorphic, make_client +import google.cloud.bigquery.dataset +import mock +import pytest + + +@dataset_polymorphic +def test_create_dataset_minimal(make_dataset, get_reference, client, PROJECT, DS_ID): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + } + conn = client._connection = make_connection(RESOURCE) + + dataset = make_dataset(PROJECT, DS_ID) + after = client.create_dataset(dataset, timeout=7.5) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + }, + timeout=7.5, + ) + + +def test_create_dataset_w_attrs(client, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import AccessEntry + + PATH = "projects/%s/datasets" % PROJECT + DESCRIPTION = "DESC" + FRIENDLY_NAME = "FN" + LOCATION = "US" + USER_EMAIL = "phred@example.com" + LABELS = {"color": "red"} + VIEW = { + "projectId": "my-proj", + "datasetId": "starry-skies", + "tableId": "northern-hemisphere", + } + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "labels": LABELS, + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + } + conn = client._connection = make_connection(RESOURCE) + entries = [ + AccessEntry("OWNER", "userByEmail", USER_EMAIL), + AccessEntry(None, "view", VIEW), + ] + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.access_entries = entries + before.description = DESCRIPTION + before.friendly_name = FRIENDLY_NAME + before.default_table_expiration_ms = 3600 + before.location = LOCATION + before.labels = LABELS + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.description == DESCRIPTION + assert after.friendly_name == FRIENDLY_NAME + assert after.location == LOCATION + assert after.default_table_expiration_ms == 3600 + assert after.labels == LABELS + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "labels": LABELS, + }, + timeout=None, + ) + + +def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): + # The library should handle sending properties to the API that are not + # yet part of the library + + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + } + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before._properties["newAlphaProperty"] = "unreleased property" + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after._properties["newAlphaProperty"] == "unreleased property" + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + "labels": {}, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + OTHER_LOCATION = "EU" + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": OTHER_LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.location = OTHER_LOCATION + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == OTHER_LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": OTHER_LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset(DatasetReference(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset("{}.{}".format(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_alreadyexists_w_exists_ok_false(PROJECT, DS_ID, LOCATION): + client = make_client(location=LOCATION) + client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_dataset(DS_ID) + + +def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION): + post_path = "/projects/{}/datasets".format(PROJECT) + get_path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists"), resource + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID, exists_ok=True) + + final_attributes.assert_called_with({"path": get_path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ), + mock.call(method="GET", path=get_path, timeout=None), + ] + ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py new file mode 100644 index 000000000..c57b517e0 --- /dev/null +++ b/tests/unit/test_delete_dataset.py @@ -0,0 +1,64 @@ +from .helpers import make_connection, make_client, dataset_polymorphic +import google.api_core.exceptions +import pytest + + +@dataset_polymorphic +def test_delete_dataset(make_dataset, get_reference, client, PROJECT, DS_ID): + dataset = make_dataset(PROJECT, DS_ID) + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + client.delete_dataset(dataset, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_delete_dataset_delete_contents( + make_dataset, get_reference, client, PROJECT, DS_ID +): + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + dataset = make_dataset(PROJECT, DS_ID) + client.delete_dataset(dataset, delete_contents=True) + conn.api_request.assert_called_with( + method="DELETE", + path="/%s" % PATH, + query_params={"deleteContents": "true"}, + timeout=None, + ) + + +def test_delete_dataset_wrong_type(client): + with pytest.raises(TypeError): + client.delete_dataset(42) + + +def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + with pytest.raises(google.api_core.exceptions.NotFound): + client.delete_dataset(DS_ID) + + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) + + +def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + client.delete_dataset(DS_ID, not_found_ok=True) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py new file mode 100644 index 000000000..534a4b54c --- /dev/null +++ b/tests/unit/test_list_models.py @@ -0,0 +1,72 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset_id = "{}.{}".format(PROJECT, DS_ID) + iterator = client.list_models(dataset_id, timeout=7.5) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert models == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.model import Model + + MODEL_1 = "model_one" + MODEL_2 = "model_two" + PATH = "projects/%s/datasets/%s/models" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "models": [ + { + "modelReference": { + "modelId": MODEL_1, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + { + "modelReference": { + "modelId": MODEL_2, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_models(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert len(models) == len(DATA["models"]) + for found, expected in zip(models, DATA["models"]): + assert isinstance(found, Model) + assert found.model_id == expected["modelReference"]["modelId"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_models_wrong_type(client): + with pytest.raises(TypeError): + client.list_models(42) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py new file mode 100644 index 000000000..82719fce6 --- /dev/null +++ b/tests/unit/test_list_routines.py @@ -0,0 +1,75 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_routines_empty_w_timeout(client): + conn = client._connection = make_connection({}) + + iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + page = next(iterator.pages) + routines = list(page) + token = iterator.next_page_token + + assert routines == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routines/datasets/test_routines/routines", + query_params={}, + timeout=7.5, + ) + + +@dataset_polymorphic +def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): + from google.cloud.bigquery.routine import Routine + + project_id = PROJECT + dataset_id = "test_routines" + path = f"/projects/{PROJECT}/datasets/test_routines/routines" + routine_1 = "routine_one" + routine_2 = "routine_two" + token = "TOKEN" + resource = { + "nextPageToken": token, + "routines": [ + { + "routineReference": { + "routineId": routine_1, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + { + "routineReference": { + "routineId": routine_2, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + ], + } + + conn = client._connection = make_connection(resource) + dataset = make_dataset(client.project, dataset_id) + + iterator = client.list_routines(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + routines = list(page) + actual_token = iterator.next_page_token + + assert len(routines) == len(resource["routines"]) + for found, expected in zip(routines, resource["routines"]): + assert isinstance(found, Routine) + assert found.routine_id == expected["routineReference"]["routineId"] + assert actual_token == token + + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=None + ) + + +def test_list_routines_wrong_type(client): + with pytest.raises(TypeError): + client.list_routines(42) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py new file mode 100644 index 000000000..fdd3aa857 --- /dev/null +++ b/tests/unit/test_list_tables.py @@ -0,0 +1,145 @@ +from .helpers import make_connection, dataset_polymorphic +import google.cloud.bigquery.dataset +import pytest + + +@dataset_polymorphic +def test_list_tables_empty_w_timeout( + make_dataset, get_reference, client, PROJECT, DS_ID +): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "tables": [ + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_tables(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_tables_explicit(client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "tables": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ] + } + + conn = client._connection = make_connection(DATA) + dataset = google.cloud.bigquery.dataset.DatasetReference(PROJECT, DS_ID) + + iterator = client.list_tables( + # Test with string for dataset ID. + DS_ID, + max_results=3, + page_token=TOKEN, + ) + assert iterator.dataset == dataset + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) + + +def test_list_tables_wrong_type(client): + with pytest.raises(TypeError): + client.list_tables(42) From dde9dc5114c2311fb76fafc5b222fff561e8abf1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 12 Apr 2021 19:00:25 +0200 Subject: [PATCH 173/341] feat: use pyarrow stream compression, if available (#593) * feat: use pyarrow stream compression, if available * Remove unnecessary pyarrow version check Arrow stream compression requires pyarrow>=1.0.0, but that's already guaranteed by a version pin in setup.py if bqstorage extra is installed. * Remvoe unused pyarrow version parsing in tests * Only use arrow compression in tests if available --- google/cloud/bigquery/_pandas_helpers.py | 13 ++++ google/cloud/bigquery/dbapi/cursor.py | 14 +++++ tests/system/test_client.py | 8 --- tests/unit/job/test_query_pandas.py | 78 ++++++++++++++++++++++-- tests/unit/test_dbapi_cursor.py | 47 ++++++++++++++ 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7ad416e08..412f32754 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -33,6 +33,14 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import schema @@ -631,6 +639,11 @@ def _download_table_bqstorage( for field in selected_fields: requested_session.read_options.selected_fields.append(field.name) + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + session = bqstorage_client.create_read_session( parent="projects/{}".format(project_id), read_session=requested_session, diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index e90bcc2c0..ee09158d8 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -19,6 +19,14 @@ import copy import logging +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -255,6 +263,12 @@ def _bqstorage_fetch(self, bqstorage_client): table=table_reference.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW, ) + + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + read_session = bqstorage_client.create_read_session( parent="projects/{}".format(table_reference.project), read_session=requested_session, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 133f609a6..024441012 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -28,7 +28,6 @@ import psutil import pytest -import pkg_resources from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -116,13 +115,6 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") - -if pyarrow: - PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version -else: - PYARROW_INSTALLED_VERSION = None - MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d1600ad43..0f9623203 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,6 +41,22 @@ from .helpers import _make_job_resource +@pytest.fixture +def table_read_options_kwarg(): + # Create a BigQuery Storage table read options object with pyarrow compression + # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is + # installed to support the compression. + if not hasattr(bigquery_storage, "ArrowSerializationOptions"): + return {} + + read_options = bigquery_storage.ReadSession.TableReadOptions( + arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( + buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + ) + return {"read_options": read_options} + + @pytest.mark.parametrize( "query,expected", ( @@ -82,7 +98,7 @@ def test__contains_order_by(query, expected): "SelecT name, age froM table OrdeR \n\t BY other_column;", ), ) -def test_to_dataframe_bqstorage_preserve_order(query): +def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class job_resource = _make_job_resource( @@ -123,8 +139,10 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", @@ -431,7 +449,7 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_to_dataframe_bqstorage(): +def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class resource = _make_job_resource(job_type="query", ended=True) @@ -468,8 +486,10 @@ def test_to_dataframe_bqstorage(): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -478,6 +498,52 @@ def test_to_dataframe_bqstorage(): ) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage_no_pyarrow_compression(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [{"name": "name", "type": ["null", "string"]}], + } + ) + bqstorage_client.create_read_session.return_value = session + + with mock.patch( + "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT", new=False + ): + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.ReadSession( + table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index cbd6f6909..0f44e3895 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -123,6 +123,7 @@ def _mock_job( schema=schema, num_dml_affected_rows=num_dml_affected_rows, ) + mock_job.destination.project = "P" mock_job.destination.to_bqstorage.return_value = ( "projects/P/datasets/DS/tables/T" ) @@ -380,6 +381,52 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # Use unordered data to also test any non-determenistic key order in dicts. + row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with mock.patch( + "google.cloud.bigquery.dbapi.cursor._ARROW_COMPRESSION_SUPPORT", new=False + ): + rows = cursor.fetchall() + + mock_client.list_rows.assert_not_called() # The default client was not used. + + # Check the BQ Storage session config. + expected_session = bigquery_storage.ReadSession( + table="projects/P/datasets/DS/tables/T", + data_format=bigquery_storage.DataFormat.ARROW, + ) + mock_bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/P", read_session=expected_session, max_stream_count=1 + ) + + # Check the data returned. + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [[("foo", 1.1), ("bar", 1.2)]] + + self.assertEqual(sorted_row_data, expected_row_data) + def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect From 9fb6f2f22cf2d69c31e10bbde460f319fa56698f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 13 Apr 2021 08:06:04 -0700 Subject: [PATCH 174/341] chore: add constraints file check for python samples (#601) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/b7a528df-1b0b-42e0-a583-e53b45ee05fc/targets - [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.) Source-Link: https://github.com/googleapis/synthtool/commit/0a071b3460344886297a304253bf924aa68ddb7e --- .github/header-checker-lint.yml | 2 +- renovate.json | 5 ++++- samples/geography/noxfile.py | 10 ++++++++-- samples/snippets/noxfile.py | 10 ++++++++-- synth.metadata | 6 +++--- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index fc281c05b..6fe78aa79 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -1,6 +1,6 @@ {"allowedCopyrightHolders": ["Google LLC"], "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], "sourceFileExtensions": [ "ts", "js", diff --git a/renovate.json b/renovate.json index f08bc22c9..c04895563 100644 --- a/renovate.json +++ b/renovate.json @@ -2,5 +2,8 @@ "extends": [ "config:base", ":preserveSemverRanges" ], - "ignorePaths": [".pre-commit-config.yaml"] + "ignorePaths": [".pre-commit-config.yaml"], + "pip_requirements": { + "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] + } } diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/synth.metadata b/synth.metadata index 114359b88..7221c0f0f 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "c1195147a6e9220f26558a301427dd447646da3a" + "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } } ], From df48cc5a0be99ad39d5835652d1b7422209afc5d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 13 Apr 2021 09:20:17 -0600 Subject: [PATCH 175/341] fix: missing license headers in new test files (#604) --- tests/unit/conftest.py | 14 ++++++++++++++ tests/unit/test_create_dataset.py | 14 ++++++++++++++ tests/unit/test_delete_dataset.py | 14 ++++++++++++++ tests/unit/test_list_models.py | 14 ++++++++++++++ tests/unit/test_list_routines.py | 14 ++++++++++++++ tests/unit/test_list_tables.py | 14 ++++++++++++++ 6 files changed, 84 insertions(+) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 07fc9b4ad..7a67ea6b5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from .helpers import make_client diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 3eb8f1072..d07aaed4f 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index c57b517e0..3a65e031c 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions import pytest diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 534a4b54c..56aa66126 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 82719fce6..714ede0d4 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index fdd3aa857..9acee9580 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import google.cloud.bigquery.dataset import pytest From c741c381c2248eb69cebb20e675bb088d27bb636 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Apr 2021 18:50:05 +0200 Subject: [PATCH 176/341] refactor: simplify OrderedDict arguments in lexer (#598) Python 3.6+ guarantees that kwargs order is preserved, thus we don't need to assure the order by passing them as a list of tuples. --- .../bigquery/magics/line_arg_parser/lexer.py | 119 ++++++------------ 1 file changed, 37 insertions(+), 82 deletions(-) diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 0cb63292c..5a6ee1a83 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -49,90 +49,45 @@ # the value of an option other than "--params", we do not really care about its # structure, and thus do not want to use any of the "Python tokens" for pattern matching. # -# Since token definition order is important, an OrderedDict is needed with tightly -# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs). +# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468 +# guarantees us that the order of kwargs is preserved in Python 3.6+. token_types = OrderedDict( - [ - ( - "state_parse_pos_args", - OrderedDict( - [ - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--))", # double dash - starting the options list - ), - ( - "DEST_VAR", - r"(?P[^\d\W]\w*)", # essentially a Python ID - ), - ] - ), - ), - ( - "state_parse_non_params_options", - OrderedDict( - [ - ( - "GOTO_PARSE_PARAMS_OPTION", - r"(?P(?=--params(?:\s|=|--|$)))", # the --params option - ), - ("OPTION_SPEC", r"(?P--\w+)"), - ("OPTION_EQ", r"(?P=)"), - ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"), - ] - ), - ), - ( - "state_parse_params_option", - OrderedDict( - [ - ( - "PY_STRING", - r"(?P(?:{})|(?:{}))".format( - r"'(?:[^'\\]|\.)*'", - r'"(?:[^"\\]|\.)*"', # single and double quoted strings - ), - ), - ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"), - ("PARAMS_OPT_EQ", r"(?P=)"), - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--\w+))", # found another option spec - ), - ("PY_BOOL", r"(?PTrue|False)"), - ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"), - ( - "PY_NUMBER", - r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", - ), - ("SQUOTE", r"(?P')"), - ("DQUOTE", r'(?P")'), - ("COLON", r"(?P:)"), - ("COMMA", r"(?P,)"), - ("LCURL", r"(?P\{)"), - ("RCURL", r"(?P})"), - ("LSQUARE", r"(?P\[)"), - ("RSQUARE", r"(?P])"), - ("LPAREN", r"(?P\()"), - ("RPAREN", r"(?P\))"), - ] - ), - ), - ( - "common", - OrderedDict( - [ - ("WS", r"(?P\s+)"), - ("EOL", r"(?P$)"), - ( - # anything not a whitespace or matched by something else - "UNKNOWN", - r"(?P\S+)", - ), - ] - ), + state_parse_pos_args=OrderedDict( + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--))", # double dash - starting the options list + DEST_VAR=r"(?P[^\d\W]\w*)", # essentially a Python ID + ), + state_parse_non_params_options=OrderedDict( + GOTO_PARSE_PARAMS_OPTION=r"(?P(?=--params(?:\s|=|--|$)))", # the --params option + OPTION_SPEC=r"(?P--\w+)", + OPTION_EQ=r"(?P=)", + OPT_VAL=r"(?P\S+?(?=\s|--|$))", + ), + state_parse_params_option=OrderedDict( + PY_STRING=r"(?P(?:{})|(?:{}))".format( # single and double quoted strings + r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"' ), - ] + PARAMS_OPT_SPEC=r"(?P--params(?=\s|=|--|$))", + PARAMS_OPT_EQ=r"(?P=)", + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--\w+))", # found another option spec + PY_BOOL=r"(?PTrue|False)", + DOLLAR_PY_ID=r"(?P\$[^\d\W]\w*)", + PY_NUMBER=r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", + SQUOTE=r"(?P')", + DQUOTE=r'(?P")', + COLON=r"(?P:)", + COMMA=r"(?P,)", + LCURL=r"(?P\{)", + RCURL=r"(?P})", + LSQUARE=r"(?P\[)", + RSQUARE=r"(?P])", + LPAREN=r"(?P\()", + RPAREN=r"(?P\))", + ), + common=OrderedDict( + WS=r"(?P\s+)", + EOL=r"(?P$)", + UNKNOWN=r"(?P\S+)", # anything not a whitespace or matched by something else + ), ) From 8089bdbd146e856e9e4d47bc1329f633cb4b9671 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Apr 2021 19:17:39 +0200 Subject: [PATCH 177/341] chore(deps): update dependency mock to v4.0.3 (#605) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 676ff949e..a5da1a77d 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 676ff949e..a5da1a77d 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 From ff2ec3abe418a443cd07751c08e654f94e8b3155 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 13 Apr 2021 12:49:39 -0500 Subject: [PATCH 178/341] docs: add sample to run DML query (#591) * docs: add sample to run DML query * cleanup leftover datasets before test run * fix import order --- samples/snippets/conftest.py | 40 ++++++++++++ samples/snippets/test_update_with_dml.py | 36 +++++++++++ samples/snippets/update_with_dml.py | 82 ++++++++++++++++++++++++ samples/snippets/user_sessions_data.json | 10 +++ 4 files changed, 168 insertions(+) create mode 100644 samples/snippets/test_update_with_dml.py create mode 100644 samples/snippets/update_with_dml.py create mode 100644 samples/snippets/user_sessions_data.json diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index d22a33318..31c6ba104 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,10 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random + from google.cloud import bigquery import pytest +RESOURCE_PREFIX = "python_bigquery_samples_snippets" + + +def resource_prefix() -> str: + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + random_string = hex(random.randrange(1000000))[2:] + return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + for dataset in bigquery_client.list_datasets(): + if ( + dataset.dataset_id.startswith(RESOURCE_PREFIX) + and dataset.created < yesterday + ): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + @pytest.fixture(scope="session") def bigquery_client(): bigquery_client = bigquery.Client() @@ -25,3 +50,18 @@ def bigquery_client(): @pytest.fixture(scope="session") def project_id(bigquery_client): return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client: bigquery.Client, project_id: str): + dataset_id = resource_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py new file mode 100644 index 000000000..3cca7a649 --- /dev/null +++ b/samples/snippets/test_update_with_dml.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +from conftest import resource_prefix +import update_with_dml + + +@pytest.fixture +def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + table_id = f"{resource_prefix()}_update_with_dml" + yield table_id + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): + override_values = { + "dataset_id": dataset_id, + "table_id": table_id, + } + num_rows = update_with_dml.run_sample(override_values=override_values) + assert num_rows > 0 diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py new file mode 100644 index 000000000..7fd09dd80 --- /dev/null +++ b/samples/snippets/update_with_dml.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_update_with_dml] +import pathlib + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def load_from_newline_delimited_json( + client: bigquery.Client, + filepath: pathlib.Path, + project_id: str, + dataset_id: str, + table_id: str, +): + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + job_config = bigquery.LoadJobConfig() + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = [ + bigquery.SchemaField("id", enums.SqlTypeNames.STRING), + bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER), + bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING), + ] + + with open(filepath, "rb") as json_file: + load_job = client.load_table_from_file( + json_file, full_table_id, job_config=job_config + ) + + # Wait for load job to finish. + load_job.result() + + +def update_with_dml( + client: bigquery.Client, project_id: str, dataset_id: str, table_id: str +): + query_text = f""" + UPDATE `{project_id}.{dataset_id}.{table_id}` + SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") + WHERE TRUE + """ + query_job = client.query(query_text) + + # Wait for query job to finish. + query_job.result() + + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") + return query_job.num_dml_affected_rows + + +def run_sample(override_values={}): + client = bigquery.Client() + filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" + project_id = client.project + dataset_id = "sample_db" + table_id = "UserSessions" + # [END bigquery_update_with_dml] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + # [START bigquery_update_with_dml] + load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id) + return update_with_dml(client, project_id, dataset_id, table_id) + + +# [END bigquery_update_with_dml] diff --git a/samples/snippets/user_sessions_data.json b/samples/snippets/user_sessions_data.json new file mode 100644 index 000000000..7ea3715ad --- /dev/null +++ b/samples/snippets/user_sessions_data.json @@ -0,0 +1,10 @@ +{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} +{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} +{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} +{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} +{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} +{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} +{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} +{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} +{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} +{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} From e7a54374e65869dc3ee117e6fb4629bec3fce3aa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:01:42 +0200 Subject: [PATCH 179/341] chore(deps): update dependency pytest to v6 (#606) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index a5da1a77d..299d90b65 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index a5da1a77d..299d90b65 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 From 9239d1a0bf3a9fccb607122ae17e695a980dc965 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:03:06 +0200 Subject: [PATCH 180/341] chore(deps): update dependency google-cloud-bigquery to v2.13.1 (#573) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c5f60911e..6939c07e0 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 734cdf445..74a18981e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From f95f415d3441b3928f6cc705cb8a75603d790fd6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Apr 2021 23:08:17 +0200 Subject: [PATCH 181/341] feat: add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` (#575) * feat: add max_queue_size option for BQ Storage API The new parameter allows configuring the maximum size of the internal queue used to hold result pages when query data is streamed over the BigQuery Storage API. * Slightly simplify bits of page streaming logic * Only retain max_queue_size where most relevant * Adjust tests, add support for infinite queue size * Remove deleted param's description --- google/cloud/bigquery/_pandas_helpers.py | 28 +++++++--- google/cloud/bigquery/table.py | 32 +++++++++--- tests/unit/test__pandas_helpers.py | 66 ++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 412f32754..7553726fa 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -53,6 +53,8 @@ _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. +_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads + _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -616,6 +618,7 @@ def _download_table_bqstorage( preserve_order=False, selected_fields=None, page_to_item=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" @@ -667,7 +670,17 @@ def _download_table_bqstorage( download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. - worker_queue = queue.Queue() + # + # The queue needs to be bounded by default, because if the user code processes the + # fetched result pages too slowly, while at the same time new pages are rapidly being + # fetched from the server, the queue can grow to the point where the process runs + # out of memory. + if max_queue_size is _MAX_QUEUE_SIZE_DEFAULT: + max_queue_size = total_streams + elif max_queue_size is None: + max_queue_size = 0 # unbounded + + worker_queue = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -708,15 +721,12 @@ def _download_table_bqstorage( continue # Return any remaining values after the workers finished. - while not worker_queue.empty(): # pragma: NO COVER + while True: # pragma: NO COVER try: - # Include a timeout because even though the queue is - # non-empty, it doesn't guarantee that a subsequent call to - # get() will not block. - frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + frame = worker_queue.get_nowait() yield frame except queue.Empty: # pragma: NO COVER - continue + break finally: # No need for a lock because reading/replacing a variable is # defined to be an atomic operation in the Python language @@ -729,7 +739,7 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None + project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, ): return _download_table_bqstorage( project_id, @@ -749,6 +759,7 @@ def download_dataframe_bqstorage( dtypes, preserve_order=False, selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -758,6 +769,7 @@ def download_dataframe_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=page_to_item, + max_queue_size=max_queue_size, ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a2366b806..bd5bca30f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1490,13 +1490,12 @@ def _to_page_iterable( if not self._validate_bqstorage(bqstorage_client, False): bqstorage_client = None - if bqstorage_client is not None: - for item in bqstorage_download(): - yield item - return - - for item in tabledata_list_download(): - yield item + result_pages = ( + bqstorage_download() + if bqstorage_client is not None + else tabledata_list_download() + ) + yield from result_pages def _to_arrow_iterable(self, bqstorage_client=None): """Create an iterable of arrow RecordBatches, to process the table as a stream.""" @@ -1622,7 +1621,12 @@ def to_arrow( arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) - def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + def to_dataframe_iterable( + self, + bqstorage_client=None, + dtypes=None, + max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ): """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1642,6 +1646,17 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + ..versionadded:: 2.14.0 + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -1665,6 +1680,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): dtypes, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index abd725820..43692f4af 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -17,6 +17,7 @@ import decimal import functools import operator +import queue import warnings import mock @@ -41,6 +42,11 @@ from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -1265,6 +1271,66 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.parametrize( + "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", + [ + (3, {"max_queue_size": 2}, 3, 2), # custom queue size + (4, {}, 4, 4), # default queue size + (7, {"max_queue_size": None}, 7, 0), # infinite queue size + ], +) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage( + module_under_test, + stream_count, + maxsize_kwarg, + expected_call_count, + expected_maxsize, +): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + + queue_used = None # A reference to the queue used by code under test. + + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=["stream/s{i}" for i in range(stream_count)]) + bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), "table-z", + ) + + def fake_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + nonlocal queue_used + queue_used = worker_queue + try: + worker_queue.put_nowait("result_page") + except queue.Full: # pragma: NO COVER + pass + + download_stream = mock.Mock(side_effect=fake_download_stream) + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=download_stream + ): + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, bqstorage_client, **maxsize_kwarg + ) + list(result_gen) + + # Timing-safe, as the method under test should block until the pool shutdown is + # complete, at which point all download stream workers have already been submitted + # to the thread pool. + assert download_stream.call_count == stream_count # once for each stream + assert queue_used.maxsize == expected_maxsize + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From b5a928e5fc6405e08a986e39e3308f86f3f4817f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 15 Apr 2021 07:55:15 -0700 Subject: [PATCH 182/341] chore: generate PyPI token in secrets manager, fix spacing in docs (via synth) (#612) * docs(python): add empty lines between methods Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 14:41:09 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 721339ab60a6eb63b889978b3d9b295dcb3be370 Source-Link: https://github.com/googleapis/synthtool/commit/721339ab60a6eb63b889978b3d9b295dcb3be370 * build: use PyPI API token in secret manager Migrate python libraries onto the PyPI API token stored in secret manager. A PyPI API token is limited in scope to uploading new releases. https://pypi.org/help/#apitoken Verified that this works with [build](https://fusion2.corp.google.com/invocations/14bae126-83fa-4328-8da9-d390ed99315c/targets/cloud-devrel%2Fclient-libraries%2Fpython%2Fgoogleapis%2Fpython-vision%2Frelease%2Frelease;config=default/log) on https://github.com/googleapis/python-vision/pull/136 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 17:46:06 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 043cc620d6a6111816d9e09f2a97208565fde958 Source-Link: https://github.com/googleapis/synthtool/commit/043cc620d6a6111816d9e09f2a97208565fde958 --- .kokoro/release.sh | 4 ++-- .kokoro/release/common.cfg | 14 ++------------ docs/_static/custom.css | 13 ++++++++++++- synth.metadata | 6 +++--- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 0e58f0640..3abba6e06 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password") +TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") cd github/python-bigquery python3 setup.py sdist bdist_wheel -twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/* +twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 18b417709..922d7fe50 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,18 +23,8 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} \ No newline at end of file + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" +} diff --git a/docs/_static/custom.css b/docs/_static/custom.css index bcd37bbd3..b0a295464 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,9 +1,20 @@ div#python2-eol { border-color: red; border-width: medium; -} +} /* Ensure minimum width for 'Parameters' / 'Returns' column */ dl.field-list > dt { min-width: 100px } + +/* Insert space between methods for readability */ +dl.method { + padding-top: 10px; + padding-bottom: 10px +} + +/* Insert empty space between classes */ +dl.class { + padding-bottom: 50px +} diff --git a/synth.metadata b/synth.metadata index 7221c0f0f..b031618b0 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" + "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } } ], From 72d4c4a462f111cfc56e5b878fa641819638d8f5 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 15 Apr 2021 14:56:38 -0400 Subject: [PATCH 183/341] chore: prevent normalization of semver versioning (#611) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 607ffb63f..46a128426 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=version, + version=setuptools.sic(version), description=description, long_description=readme, author="Google LLC", From f75dcdf3943b87daba60011c9a3b42e34ff81910 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Apr 2021 18:40:04 -0500 Subject: [PATCH 184/341] feat: accept job object as argument to `get_job` and `cancel_job` (#617) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows one to more easily cancel or get updated metadata for an existing job from the client class. Ensures that project ID and location are correctly populated. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #616 🦕 --- google/cloud/bigquery/client.py | 63 ++++++++++++++++++++++++++++++--- tests/system/test_client.py | 11 +++--- tests/unit/test_client.py | 43 ++++++++++++++-------- 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 10127e10d..8211e23a3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1734,12 +1734,20 @@ def get_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1757,6 +1765,10 @@ def get_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -1791,12 +1803,20 @@ def cancel_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1814,6 +1834,10 @@ def cancel_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -3518,6 +3542,37 @@ def _item_to_table(iterator, resource): return TableListItem(resource) +def _extract_job_reference(job, project=None, location=None): + """Extract fully-qualified job reference from a job-like object. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + project (Optional[str]): + Project where the job was run. Ignored if ``job_id`` is a job + object. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + + Returns: + Tuple[str, str, str]: ``(project, location, job_id)`` + """ + if hasattr(job, "job_id"): + project = job.project + job_id = job.job_id + location = job.location + else: + job_id = job + + return (project, location, job_id) + + def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 024441012..f31d994ca 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -189,7 +189,9 @@ def test_get_service_account_email(self): def _create_bucket(self, bucket_name, location=None): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - retry_storage_errors(bucket.create)(location=location) + retry_storage_errors(storage_client.create_bucket)( + bucket_name, location=location + ) self.to_delete.append(bucket) return bucket @@ -872,7 +874,7 @@ def test_load_table_from_file_w_explicit_location(self): job_id = load_job.job_id # Can get the job from the EU. - load_job = client.get_job(job_id, location="EU") + load_job = client.get_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) self.assertTrue(load_job.exists()) @@ -889,7 +891,7 @@ def test_load_table_from_file_w_explicit_location(self): # Can cancel the job from the EU. self.assertTrue(load_job.cancel()) - load_job = client.cancel_job(job_id, location="EU") + load_job = client.cancel_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) @@ -1204,8 +1206,7 @@ def test_query_w_timeout(self): # Even though the query takes >1 second, the call to getQueryResults # should succeed. self.assertFalse(query_job.done(timeout=1)) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) def test_query_w_page_size(self): page_size = 45 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 96e51678f..c5e742c9e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2933,31 +2933,30 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.get_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + query_params={"projection": "full"}, timeout=None, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" creds = _make_credentials() - client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT) + client.get_job(JOB_ID) conn.api_request.assert_called_once_with( method="GET", - path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + path="/projects/client-proj/jobs/NONESUCH", + query_params={"projection": "full", "location": "client-loc"}, timeout=None, ) @@ -2971,7 +2970,11 @@ def test_get_job_hit_w_timeout(self): QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "resource-proj", + "jobId": "query_job", + "location": "us-east1", + }, "state": "DONE", "configuration": { "query": { @@ -2989,18 +2992,21 @@ def test_get_job_hit_w_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(ASYNC_QUERY_DATA) + job_from_resource = QueryJob.from_api_repr(ASYNC_QUERY_DATA, client) - job = client.get_job(JOB_ID, timeout=7.5) + job = client.get_job(job_from_resource, timeout=7.5) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "resource-proj") + self.assertEqual(job.location, "us-east1") self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED) self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) conn.api_request.assert_called_once_with( method="GET", - path="/projects/PROJECT/jobs/query_job", - query_params={"projection": "full"}, + path="/projects/resource-proj/jobs/query_job", + query_params={"projection": "full", "location": "us-east1"}, timeout=7.5, ) @@ -3049,7 +3055,11 @@ def test_cancel_job_hit(self): QUERY = "SELECT * from test_dataset:test_table" QUERY_JOB_RESOURCE = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "asia-northeast1", + }, "state": "RUNNING", "configuration": {"query": {"query": QUERY}}, } @@ -3057,17 +3067,20 @@ def test_cancel_job_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(RESOURCE) + job_from_resource = QueryJob.from_api_repr(QUERY_JOB_RESOURCE, client) - job = client.cancel_job(JOB_ID) + job = client.cancel_job(job_from_resource) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "job-based-proj") + self.assertEqual(job.location, "asia-northeast1") self.assertEqual(job.query, QUERY) conn.api_request.assert_called_once_with( method="POST", - path="/projects/PROJECT/jobs/query_job/cancel", - query_params={"projection": "full"}, + path="/projects/job-based-proj/jobs/query_job/cancel", + query_params={"projection": "full", "location": "asia-northeast1"}, timeout=None, ) From e0b373d0e721a70656ed8faceb7f5c70f642d144 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:19:36 +0200 Subject: [PATCH 185/341] feat: DB API cursors are now iterable (#618) * feat: make DB API Cursors iterable * Raise error if obtaining iterator of closed Cursor --- google/cloud/bigquery/dbapi/_helpers.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 4 ++++ tests/unit/test_dbapi_cursor.py | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 69694c98c..beb3c5e71 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -276,7 +276,7 @@ def decorate_public_methods(klass): """Apply ``_raise_on_closed()`` decorator to public instance methods. """ for name in dir(klass): - if name.startswith("_"): + if name.startswith("_") and name != "__iter__": continue member = getattr(klass, name) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index ee09158d8..7e5449718 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -365,6 +365,10 @@ def setinputsizes(self, sizes): def setoutputsize(self, size, column=None): """No-op, but for consistency raise an error if cursor is closed.""" + def __iter__(self): + self._try_fetch() + return iter(self._query_data) + def _format_operation_list(operation, parameters): """Formats parameters in operation in the way BigQuery expects. diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 0f44e3895..8ca4e9b6c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -178,6 +178,7 @@ def test_raises_error_if_closed(self): "fetchone", "setinputsizes", "setoutputsize", + "__iter__", ) for method in method_names: @@ -611,6 +612,29 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_is_iterable(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect( + self._mock_client(rows=[("hello", "there", 7), ("good", "bye", -3)]) + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar, baz FROM hello_world WHERE baz < 42;") + + rows_iter = iter(cursor) + + row = next(rows_iter) + self.assertEqual(row, ("hello", "there", 7)) + row = next(rows_iter) + self.assertEqual(row, ("good", "bye", -3)) + self.assertRaises(StopIteration, next, rows_iter) + + self.assertEqual( + list(cursor), + [], + "Iterating again over the same results should produce no rows.", + ) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 6502a602337ae562652a20b20270949f2c9d5073 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:52:07 +0200 Subject: [PATCH 186/341] fix: consistent percents handling in DB API query (#619) Fixes #608. Percents in the query string are now always de-escaped, regardless of whether any query parameters are passed or not. In addition, misformatting placeholders that don't match parameter values now consistently raise `ProgrammingError`. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/dbapi/cursor.py | 6 +-- tests/unit/test_dbapi_cursor.py | 53 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 7e5449718..ca78d3907 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -393,7 +393,7 @@ def _format_operation_list(operation, parameters): try: return operation % tuple(formatted_params) - except TypeError as exc: + except (TypeError, ValueError) as exc: raise exceptions.ProgrammingError(exc) @@ -423,7 +423,7 @@ def _format_operation_dict(operation, parameters): try: return operation % formatted_params - except KeyError as exc: + except (KeyError, ValueError, TypeError) as exc: raise exceptions.ProgrammingError(exc) @@ -445,7 +445,7 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation + return operation.replace("%%", "%") # Still do percent de-escaping. if isinstance(parameters, collections_abc.Mapping): return _format_operation_dict(operation, parameters) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 8ca4e9b6c..039ef3b4c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -657,6 +657,14 @@ def test__format_operation_w_wrong_dict(self): {"somevalue-not-here": "hi", "othervalue": "world"}, ) + def test__format_operation_w_redundant_dict_key(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation( + "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} + ) + self.assertEqual(formatted_operation, "SELECT @`somevalue`;") + def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor @@ -676,8 +684,53 @@ def test__format_operation_w_too_short_sequence(self): ("hello",), ) + def test__format_operation_w_too_long_sequence(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s;", + ("hello", "world", "everyone"), + ) + def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor formatted_operation = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") + + def test__format_operation_wo_params_single_percent(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_wo_params_double_percents(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_unescaped_percent_w_dict_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %(foo)s, '100 %';", + {"foo": "bar"}, + ) + + def test__format_operation_unescaped_percent_w_list_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s, '100 %';", + ["foo", "bar"], + ) From 6ee582413c9b83fe8c853393d20090ed9d2e8b77 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:49:42 +0200 Subject: [PATCH 187/341] chore: add unit test nox session w/o extras (#623) --- noxfile.py | 12 ++++++++++-- tests/unit/test__pandas_helpers.py | 1 + tests/unit/test_client.py | 7 ++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index a738d8c00..bde3b990e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -31,6 +31,7 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ + "unit_noextras", "unit", "system", "snippets", @@ -42,7 +43,7 @@ ] -def default(session): +def default(session, install_extras=True): """Default unit test session. This is intended to be run **without** an interpreter set, so @@ -65,7 +66,8 @@ def default(session): constraints_path, ) - session.install("-e", ".[all]", "-c", constraints_path) + install_target = ".[all]" if install_extras else "." + session.install("-e", install_target, "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -90,6 +92,12 @@ def unit(session): default(session) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_noextras(session): + """Run the unit test suite.""" + default(session, install_extras=False) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 43692f4af..39a3d845b 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1464,6 +1464,7 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) result = next(results_gen) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c5e742c9e..860f25f35 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -65,7 +65,12 @@ from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") def _make_credentials(): From 34ecc3f1ca0ff073330c0c605673d89b43af7ed9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:54:32 +0200 Subject: [PATCH 188/341] feat: retry google.auth TransportError by default (#624) --- google/cloud/bigquery/retry.py | 2 ++ tests/unit/test_retry.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 20a8e7b13..5e9075fe1 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +from google.auth import exceptions as auth_exceptions import requests.exceptions @@ -27,6 +28,7 @@ exceptions.InternalServerError, exceptions.BadGateway, requests.exceptions.ConnectionError, + auth_exceptions.TransportError, ) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 318a54d34..0bef1e5e1 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -51,6 +51,12 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): + from google.auth.exceptions import TransportError + + exc = TransportError("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From f8d4aaa335a0eef915e73596fc9b43b11d11be9f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 23:49:26 +0200 Subject: [PATCH 189/341] feat: add type hints for public methods (#613) * feat: add type hint for public methods * feat: add bigquery-storage in requirement file * feat: add pandas in requirement file * feat: add return type hint * feat: remove pandas import as a string * Use the latest pytype version (2021.4.9) * Silence false import and module attribute errors * Fix misc. pytype warnings and false postiives * Make changes to generated files persistent * Make final cleanup of client.py * Change import ignores to more specific errors * Silence false positive type warning in job config * Silence noisy _helper type warnings * Silence false positives for resumable media code * Add pytype to nox.options.sessions * Hide for-type-check-only imports behind a flag * Remove obsolete skipIf decorator from two tests inspect.signature() was added in Python 3.3, and the library only needs to suppport Python3.6+. * Install dependencies in pytype session This avoids numerous unnecessary import and module attribute errors, rendering lots of pytype directive comments obsolete. * Be more specific about to_dataframe()'s return type * Add missing return type for _get_query_results() * Be more specific about pandas/pyarrow return types * Exclude typing-only imports from coverage checks Co-authored-by: HemangChothani Co-authored-by: Tim Swast --- .gitignore | 1 + google/cloud/bigquery/_http.py | 3 +- google/cloud/bigquery/_pandas_helpers.py | 2 + google/cloud/bigquery/client.py | 549 +++++++++++------- google/cloud/bigquery/dataset.py | 16 +- google/cloud/bigquery/external_config.py | 28 +- google/cloud/bigquery/job/base.py | 43 +- google/cloud/bigquery/job/extract.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- google/cloud/bigquery/job/query.py | 55 +- .../bigquery/magics/line_arg_parser/lexer.py | 2 +- google/cloud/bigquery/model.py | 10 +- google/cloud/bigquery/query.py | 20 +- google/cloud/bigquery/routine/routine.py | 22 +- google/cloud/bigquery/schema.py | 13 +- google/cloud/bigquery/table.py | 75 ++- noxfile.py | 11 + samples/geography/requirements.txt | 1 + setup.cfg | 14 + synth.py | 29 + tests/unit/test_signature_compatibility.py | 8 - 21 files changed, 575 insertions(+), 331 deletions(-) diff --git a/.gitignore b/.gitignore index b4243ced7..99c3a1444 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ pip-log.txt .nox .cache .pytest_cache +.pytype # Mac diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index ede26cc70..81e7922e6 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -17,8 +17,7 @@ import os import pkg_resources -from google.cloud import _http - +from google.cloud import _http # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7553726fa..e93a99eba 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -373,6 +373,7 @@ def augment_schema(dataframe, current_bq_schema): Returns: Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] """ + # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] @@ -406,6 +407,7 @@ def augment_schema(dataframe, current_bq_schema): return None return augmented_schema + # pytype: enable=attribute-error def dataframe_to_arrow(dataframe, bq_schema): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8211e23a3..5aa8608a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -19,6 +19,7 @@ from collections import abc as collections_abc import copy +import datetime import functools import gzip import io @@ -27,6 +28,7 @@ import math import os import tempfile +from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings @@ -35,17 +37,18 @@ except ImportError: # pragma: NO COVER pyarrow = None -from google import resumable_media +from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload import google.api_core.client_options -import google.api_core.exceptions +import google.api_core.exceptions as core_exceptions from google.api_core.iam import Policy from google.api_core import page_iterator +from google.api_core import retry as retries import google.cloud._helpers -from google.cloud import exceptions -from google.cloud.client import ClientWithProject +from google.cloud import exceptions # pytype: disable=import-error +from google.cloud.client import ClientWithProject # pytype: disable=import-error from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -59,6 +62,13 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job +from google.cloud.bigquery.job import ( + LoadJobConfig, + QueryJob, + QueryJobConfig, + CopyJobConfig, + ExtractJobConfig, +) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref @@ -216,8 +226,11 @@ def close(self): self._http.close() def get_service_account_email( - self, project=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> str: """Get the email address of the project's BigQuery service account Note: @@ -259,8 +272,12 @@ def get_service_account_email( return api_response["email"] def list_projects( - self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List projects for the project associated with this client. See @@ -313,14 +330,14 @@ def api_request(*args, **kwargs): def list_datasets( self, - project=None, - include_all=False, - filter=None, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + project: str = None, + include_all: bool = False, + filter: str = None, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List datasets for the project associated with this client. See @@ -390,7 +407,7 @@ def api_request(*args, **kwargs): extra_params=extra_params, ) - def dataset(self, dataset_id, project=None): + def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -466,8 +483,12 @@ def _dataset_from_arg(self, dataset): return dataset def create_dataset( - self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + dataset: Union[str, Dataset, DatasetReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """API call: create the dataset via a POST request. See @@ -531,14 +552,18 @@ def create_dataset( timeout=timeout, ) return Dataset.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_dataset(dataset.reference, retry=retry) def create_routine( - self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + routine: Routine, + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Create a routine via a POST request. See @@ -582,12 +607,18 @@ def create_routine( timeout=timeout, ) return Routine.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_routine(routine.reference, retry=retry) - def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None): + def create_table( + self, + table: Union[str, Table, TableReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """API call: create a table via a PUT request See @@ -636,7 +667,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None timeout=timeout, ) return Table.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_table(table.reference, retry=retry) @@ -654,7 +685,12 @@ def _call_api( return call() return call() - def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): + def get_dataset( + self, + dataset_ref: Union[DatasetReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` Args: @@ -693,8 +729,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): return Dataset.from_api_repr(api_response) def get_iam_policy( - self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + requested_policy_version: int = 1, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -718,8 +758,13 @@ def get_iam_policy( return Policy.from_api_repr(response) def set_iam_policy( - self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + policy: Policy, + updateMask: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -747,8 +792,12 @@ def set_iam_policy( return Policy.from_api_repr(response) def test_iam_permissions( - self, table, permissions, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + permissions: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -768,7 +817,12 @@ def test_iam_permissions( return response - def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): + def get_model( + self, + model_ref: Union[ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. Args: @@ -806,7 +860,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): + def get_routine( + self, + routine_ref: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. Args: @@ -845,7 +904,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): + def get_table( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Fetch the table referenced by ``table``. Args: @@ -881,7 +945,13 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): ) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): + def update_dataset( + self, + dataset: Dataset, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -945,7 +1015,13 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) - def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): + def update_model( + self, + model: Model, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Change some fields of a model. Use ``fields`` to specify which fields to update. At least one field @@ -1003,7 +1079,13 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): + def update_routine( + self, + routine: Routine, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Change some fields of a routine. Use ``fields`` to specify which fields to update. At least one field @@ -1071,7 +1153,13 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): + def update_table( + self, + table: Table, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field @@ -1132,12 +1220,12 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): def list_models( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List models in the dataset. See @@ -1204,12 +1292,12 @@ def api_request(*args, **kwargs): def list_routines( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. See @@ -1276,12 +1364,12 @@ def api_request(*args, **kwargs): def list_tables( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List tables in the dataset. See @@ -1347,12 +1435,12 @@ def api_request(*args, **kwargs): def delete_dataset( self, - dataset, - delete_contents=False, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, - ): + dataset: Union[Dataset, DatasetReference, str], + delete_contents: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a dataset. See @@ -1401,13 +1489,17 @@ def delete_dataset( query_params=params, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_model( - self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + model: Union[Model, ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a model See @@ -1449,13 +1541,17 @@ def delete_model( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_routine( - self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + routine: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a routine. See @@ -1499,13 +1595,17 @@ def delete_routine( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_table( - self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a table See @@ -1545,13 +1645,19 @@ def delete_table( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, - ): + self, + job_id: str, + retry: retries.Retry, + project: str = None, + timeout_ms: int = None, + location: str = None, + timeout: float = None, + ) -> _QueryResults: """Get the query results object for a query job. Args: @@ -1609,7 +1715,7 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource): + def job_from_resource(self, resource: dict) -> job.UnknownJob: """Detect correct job type from resource and instantiate. Args: @@ -1635,7 +1741,12 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): + def create_job( + self, + job_config: dict, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1726,8 +1837,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): raise TypeError("Invalid job configuration received.") def get_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. See @@ -1795,8 +1911,13 @@ def get_job( return self.job_from_resource(resource) def cancel_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. See @@ -1865,17 +1986,17 @@ def cancel_job( def list_jobs( self, - project=None, - parent_job=None, - max_results=None, - page_token=None, - all_users=None, - state_filter=None, - retry=DEFAULT_RETRY, - timeout=None, - min_creation_time=None, - max_creation_time=None, - ): + project: str = None, + parent_job: Optional[Union[QueryJob, str]] = None, + max_results: int = None, + page_token: str = None, + all_users: bool = None, + state_filter: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + min_creation_time: datetime.datetime = None, + max_creation_time: datetime.datetime = None, + ) -> page_iterator.Iterator: """List jobs for the project associated with this client. See @@ -1926,7 +2047,7 @@ def list_jobs( Iterable of job instances. """ if isinstance(parent_job, job._AsyncJob): - parent_job = parent_job.job_id + parent_job = parent_job.job_id # pytype: disable=attribute-error extra_params = { "allUsers": all_users, @@ -1975,16 +2096,16 @@ def api_request(*args, **kwargs): def load_table_from_uri( self, - source_uris, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + source_uris: Union[str, Sequence[str]], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. See @@ -2057,18 +2178,18 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj, - destination, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + file_obj: BinaryIO, + destination: Union[Table, TableReference, str], + rewind: bool = False, + size: int = None, + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of this table from a file-like object. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2162,16 +2283,16 @@ def load_table_from_file( def load_table_from_dataframe( self, dataframe, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - parquet_compression="snappy", - timeout=None, - ): + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + parquet_compression: str = "snappy", + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2299,7 +2420,7 @@ def load_table_from_dataframe( ): try: table = self.get_table(destination) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: table = None else: columns_and_indexes = frozenset( @@ -2388,16 +2509,16 @@ def load_table_from_dataframe( def load_table_from_json( self, - json_rows, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + json_rows: Iterable[Dict[str, Any]], + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. Args: @@ -2669,16 +2790,18 @@ def _do_multipart_upload( def copy_table( self, - sources, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + sources: Union[ + Table, TableReference, str, Sequence[Union[Table, TableReference, str]] + ], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: CopyJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.CopyJob: """Copy one or more tables to another table. See @@ -2772,17 +2895,17 @@ def copy_table( def extract_table( self, - source, - destination_uris, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - source_type="Table", - ): + source: Union[Table, TableReference, Model, ModelReference, str], + destination_uris: Union[str, Sequence[str]], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: ExtractJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + source_type: str = "Table", + ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. See @@ -2871,15 +2994,15 @@ def extract_table( def query( self, - query, - job_config=None, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + query: str, + job_config: QueryJobConfig = None, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.QueryJob: """Run a SQL query. See @@ -2956,7 +3079,13 @@ def query( return query_job - def insert_rows(self, table, rows, selected_fields=None, **kwargs): + def insert_rows( + self, + table: Union[Table, TableReference, str], + rows: Union[Iterable[Tuple], Iterable[Dict]], + selected_fields: Sequence[SchemaField] = None, + **kwargs: dict, + ) -> Sequence[dict]: """Insert rows into a table via the streaming API. See @@ -2979,7 +3108,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. - kwargs (Dict): + kwargs (dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -3019,8 +3148,13 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) def insert_rows_from_dataframe( - self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs - ): + self, + table: Union[Table, TableReference, str], + dataframe, + selected_fields: Sequence[SchemaField] = None, + chunk_size: int = 500, + **kwargs: Dict, + ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. Args: @@ -3068,15 +3202,15 @@ def insert_rows_from_dataframe( def insert_rows_json( self, - table, - json_rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableReference, str], + json_rows: Sequence[Dict], + row_ids: Sequence[str] = None, + skip_invalid_rows: bool = None, + ignore_unknown_values: bool = None, + template_suffix: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. See @@ -3172,7 +3306,12 @@ def insert_rows_json( return errors - def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): + def list_partitions( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[str]: """List the partitions in a table. Args: @@ -3214,15 +3353,15 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): def list_rows( self, - table, - selected_fields=None, - max_results=None, - page_token=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableListItem, TableReference, str], + selected_fields: Sequence[SchemaField] = None, + max_results: int = None, + page_token: str = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of the table. See @@ -3323,18 +3462,18 @@ def list_rows( def _list_rows_from_query_results( self, - job_id, - location, - project, - schema, - total_rows=None, - destination=None, - max_results=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + job_id: str, + location: str, + project: str, + schema: SchemaField, + total_rows: int = None, + destination: Union[Table, TableReference, TableListItem, str] = None, + max_results: int = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of a completed query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults @@ -3419,7 +3558,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path): + def schema_from_json(self, file_or_path: Union[str, BinaryIO]): """Takes a file object or file path that contains json that describes a table schema. @@ -3432,7 +3571,9 @@ def schema_from_json(self, file_or_path): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json(self, schema_list, destination): + def schema_to_json( + self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] + ): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. @@ -3606,7 +3747,7 @@ def _check_mode(stream): mode = getattr(stream, "mode", None) if isinstance(stream, gzip.GzipFile): - if mode != gzip.READ: + if mode != gzip.READ: # pytype: disable=module-attr raise ValueError( "Cannot upload gzip files opened in write mode: use " "gzip.GzipFile(filename, mode='rb')" diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 2d3a4755f..21e56f305 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -220,7 +220,7 @@ def to_api_repr(self): return resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "AccessEntry": """Factory: construct an access entry given its API representation Args: @@ -288,7 +288,7 @@ def path(self): routine = _get_routine_reference @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "DatasetReference": """Factory: construct a dataset reference given its API representation Args: @@ -304,7 +304,9 @@ def from_api_repr(cls, resource): return cls(project, dataset_id) @classmethod - def from_string(cls, dataset_id, default_project=None): + def from_string( + cls, dataset_id: str, default_project: str = None + ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. Args: @@ -350,7 +352,7 @@ def from_string(cls, dataset_id, default_project=None): return cls(output_project_id, output_dataset_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset reference Returns: @@ -640,7 +642,7 @@ def default_encryption_configuration(self, value): self._properties["defaultEncryptionConfiguration"] = api_repr @classmethod - def from_string(cls, full_dataset_id): + def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. Args: @@ -664,7 +666,7 @@ def from_string(cls, full_dataset_id): return cls(DatasetReference.from_string(full_dataset_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Dataset": """Factory: construct a dataset given its API representation Args: @@ -689,7 +691,7 @@ def from_api_repr(cls, resource): dataset._properties = copy.deepcopy(resource) return dataset - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset Returns: diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 59e4960f9..ef4d569fa 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -149,7 +149,7 @@ def type_(self): def type_(self, value): self._properties["type"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -159,7 +159,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumn": """Factory: construct a :class:`~.external_config.BigtableColumn` instance given its API representation. @@ -251,7 +251,7 @@ def columns(self): def columns(self, value): self._properties["columns"] = [col.to_api_repr() for col in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -261,7 +261,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": """Factory: construct a :class:`~.external_config.BigtableColumnFamily` instance given its API representation. @@ -333,7 +333,7 @@ def column_families(self): def column_families(self, value): self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -343,7 +343,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableOptions": """Factory: construct a :class:`~.external_config.BigtableOptions` instance given its API representation. @@ -450,7 +450,7 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -459,7 +459,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "CSVOptions": """Factory: construct a :class:`~.external_config.CSVOptions` instance given its API representation. @@ -513,7 +513,7 @@ def range(self): def range(self, value): self._properties["range"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -522,7 +522,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` instance given its API representation. @@ -601,7 +601,7 @@ def require_partition_filter(self): def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -610,7 +610,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions": """Factory: construct a :class:`~.external_config.HivePartitioningOptions` instance given its API representation. @@ -784,7 +784,7 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -799,7 +799,7 @@ def to_api_repr(self): return config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ExternalConfig": """Factory: construct an :class:`~.external_config.ExternalConfig` instance given its API representation. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f24e972c8..20ad81c0b 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -18,6 +18,7 @@ import copy import http import threading +import typing from google.api_core import exceptions import google.api_core.future.polling @@ -25,6 +26,9 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.api_core import retry as retries + _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -466,7 +470,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def exists( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: test for the existence of the job via a GET request See @@ -509,7 +515,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): else: return True - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def reload( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """API call: refresh job properties via a GET request. See @@ -544,7 +552,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def cancel( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: cancel job via a POST request See @@ -610,7 +620,12 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + def done( + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + reload: bool = True, + ) -> bool: """Checks if the job is complete. Args: @@ -633,7 +648,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result(self, retry=DEFAULT_RETRY, timeout=None): + def result( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. Args: @@ -788,7 +805,7 @@ def _del_sub_prop(self, key): """ _helpers._del_sub_prop(self._properties, [self._job_type, key]) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the job config. Returns: @@ -818,7 +835,10 @@ def _fill_from_default(self, default_job_config): + repr(default_job_config._job_type) ) - new_job_config = self.__class__() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter default_job_properties = copy.deepcopy(default_job_config._properties) for key in self._properties: @@ -831,7 +851,7 @@ def _fill_from_default(self, default_job_config): return new_job_config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "_JobConfig": """Factory: construct a job configuration given its API representation Args: @@ -842,7 +862,10 @@ def from_api_repr(cls, resource): Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - job_config = cls() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # pytype: disable=missing-parameter job_config._properties = resource return job_config @@ -929,7 +952,7 @@ class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "UnknownJob": """Construct an UnknownJob from the JSON representation. Args: diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index a6e262a32..3373bcdef 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -241,7 +241,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e784af0a6..b8174af3e 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -733,7 +733,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 491983f8e..f52f9c621 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -17,6 +17,8 @@ import concurrent.futures import copy import re +import typing +from typing import Any, Dict, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -46,6 +48,15 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +if typing.TYPE_CHECKING: # pragma: NO COVER + # Assumption: type checks are only used by library developers and CI environments + # that have all optional dependencies installed, thus no conditional imports. + import pandas + import pyarrow + from google.api_core import retry as retries + from google.cloud import bigquery_storage + from google.cloud.bigquery.table import RowIterator + _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _TIMEOUT_BUFFER_SECS = 0.1 @@ -491,7 +502,7 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the query job config. Returns: @@ -718,7 +729,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "QueryJob": """Factory: construct a job given its API representation Args: @@ -1036,7 +1047,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise - def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + def _reload_query_results( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """Refresh the cached query results. Args: @@ -1111,12 +1124,12 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): + page_size: int = None, + max_results: int = None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + start_index: int = None, + ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. Args: @@ -1196,10 +1209,10 @@ def result( # changes to table.RowIterator.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1265,12 +1278,12 @@ def to_arrow( # changes to table.RowIterator.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob Args: @@ -1350,7 +1363,7 @@ def __init__(self, kind, substeps): self.substeps = list(substeps) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep": """Factory: construct instance from the JSON repr. Args: @@ -1380,7 +1393,7 @@ def __init__(self): self._properties = {} @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntry": """Factory: construct instance from the JSON repr. Args: diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 5a6ee1a83..cd809c389 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -98,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( +TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 55846bd1a..2d3f6660f 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -279,7 +279,7 @@ def encryption_configuration(self, value): self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Model": """Factory: construct a model resource given its API representation Args: @@ -322,7 +322,7 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model. Returns: @@ -389,7 +389,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, model_id, default_project=None): + def from_string( + cls, model_id: str, default_project: str = None + ) -> "ModelReference": """Construct a model reference from model ID string. Args: @@ -417,7 +419,7 @@ def from_string(cls, model_id, default_project=None): {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model reference. Returns: diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 495c4effb..3751eb124 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -286,7 +286,7 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -297,7 +297,7 @@ def from_api_repr(cls, resource): """ raise NotImplementedError - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -329,7 +329,7 @@ def __init__(self, name, type_, value): self.value = value @classmethod - def positional(cls, type_, value): + def positional(cls, type_: str, value) -> "ScalarQueryParameter": """Factory for positional paramater. Args: @@ -347,7 +347,7 @@ def positional(cls, type_, value): return cls(None, type_, value) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -369,7 +369,7 @@ def from_api_repr(cls, resource): return cls(name, type_, converted) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -441,7 +441,7 @@ def __init__(self, name, array_type, values): self.array_type = array_type @classmethod - def positional(cls, array_type, values): + def positional(cls, array_type: str, values: list) -> "ArrayQueryParameter": """Factory for positional parameters. Args: @@ -490,7 +490,7 @@ def _from_api_repr_scalar(cls, resource): return cls(name, array_type, converted) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ArrayQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -504,7 +504,7 @@ def from_api_repr(cls, resource): return cls._from_api_repr_struct(resource) return cls._from_api_repr_scalar(resource) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -623,7 +623,7 @@ def positional(cls, *sub_params): return cls(None, *sub_params) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "StructQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -663,7 +663,7 @@ def from_api_repr(cls, resource): instance.struct_values[key] = converted return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 103799e8f..bbc0a7693 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -266,7 +266,7 @@ def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. Args: @@ -281,7 +281,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine. Returns: @@ -387,7 +387,7 @@ def data_type(self, value): self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineArgument": """Factory: construct a routine argument given its API representation. Args: @@ -401,7 +401,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine argument. Returns: @@ -438,17 +438,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] + return self._properties["projectId"] # pytype: disable=key-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] + return self._properties["datasetId"] # pytype: disable=key-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] + return self._properties["routineId"] # pytype: disable=key-error @property def path(self): @@ -460,7 +460,7 @@ def path(self): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineReference": """Factory: construct a routine reference given its API representation. Args: @@ -476,7 +476,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, routine_id, default_project=None): + def from_string( + cls, routine_id: str, default_project: str = None + ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. Args: @@ -504,7 +506,7 @@ def from_string(cls, routine_id, default_project=None): {"projectId": proj, "datasetId": dset, "routineId": routine} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine reference. Returns: diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 680dcc138..cb221d6de 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -90,7 +90,7 @@ def __init__( self._policy_tags = policy_tags @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: @@ -163,7 +163,7 @@ def policy_tags(self): """ return self._policy_tags - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. Returns: @@ -194,13 +194,14 @@ def _key(self): return ( self.name, self.field_type.upper(), - self.mode.upper(), + # Mode is always str, if not given it defaults to a str value + self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, self._policy_tags, ) - def to_standard_sql(self): + def to_standard_sql(self) -> types.StandardSqlField: """Return the field as the standard SQL field representation object. Returns: @@ -375,7 +376,7 @@ def __repr__(self): return "PolicyTagList{}".format(self._key()) @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": """Return a :class:`PolicyTagList` object deserialized from a dict. This method creates a new ``PolicyTagList`` instance that points to @@ -398,7 +399,7 @@ def from_api_repr(cls, api_repr): names = api_repr.get("names", ()) return cls(names=names) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``PolicyTagList`` diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index bd5bca30f..5ab649a25 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -22,6 +22,8 @@ import logging import operator import pytz +import typing +from typing import Any, Dict, Iterable, Tuple import warnings try: @@ -47,6 +49,13 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +if typing.TYPE_CHECKING: # pragma: NO COVER + # Unconditionally import optional dependencies again to tell pytype that + # they are not None, avoiding false "no attribute" errors. + import pandas + import pyarrow + from google.cloud import bigquery_storage + _LOGGER = logging.getLogger(__name__) @@ -143,7 +152,9 @@ def path(self): ) @classmethod - def from_string(cls, table_id, default_project=None): + def from_string( + cls, table_id: str, default_project: str = None + ) -> "TableReference": """Construct a table reference from table ID string. Args: @@ -182,7 +193,7 @@ def from_string(cls, table_id, default_project=None): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "TableReference": """Factory: construct a table reference given its API representation Args: @@ -200,7 +211,7 @@ def from_api_repr(cls, resource): table_id = resource["tableId"] return cls(DatasetReference(project, dataset_id), table_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this table reference. Returns: @@ -212,7 +223,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -847,7 +858,7 @@ def external_data_configuration(self, value): self._properties["externalDataConfiguration"] = api_repr @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. Args: @@ -871,7 +882,7 @@ def from_string(cls, full_table_id): return cls(TableReference.from_string(full_table_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Table": """Factory: construct a table given its API representation Args: @@ -907,7 +918,7 @@ def from_api_repr(cls, resource): return table - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -915,7 +926,7 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1104,7 +1115,7 @@ def clustering_fields(self): return list(prop.get("fields", ())) @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "TableListItem": """Construct a table from fully-qualified table ID. Args: @@ -1129,7 +1140,7 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1137,7 +1148,7 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -1231,7 +1242,7 @@ def values(self): """ return copy.deepcopy(self._xxx_values) - def keys(self): + def keys(self) -> Iterable[str]: """Return the keys for using a row as a dict. Returns: @@ -1244,7 +1255,7 @@ def keys(self): """ return self._xxx_field_to_index.keys() - def items(self): + def items(self) -> Iterable[Tuple[str, Any]]: """Return items as ``(key, value)`` pairs. Returns: @@ -1259,7 +1270,7 @@ def items(self): for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) - def get(self, key, default=None): + def get(self, key: str, default: Any = None) -> Any: """Return a value for key, with a default value if it does not exist. Args: @@ -1520,10 +1531,10 @@ def _to_arrow_iterable(self, bqstorage_client=None): # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1623,10 +1634,10 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client=None, - dtypes=None, - max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1698,12 +1709,12 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. Args: @@ -1831,7 +1842,7 @@ def to_arrow( progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, - ): + ) -> "pyarrow.Table": """[Beta] Create an empty class:`pyarrow.Table`. Args: @@ -1853,7 +1864,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, - ): + ) -> "pandas.DataFrame": """Create an empty dataframe. Args: @@ -2164,7 +2175,7 @@ def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": """Return a :class:`TimePartitioning` object deserialized from a dict. This method creates a new ``TimePartitioning`` instance that points to @@ -2192,7 +2203,7 @@ def from_api_repr(cls, api_repr): instance._properties = api_repr return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``TimePartitioning`` diff --git a/noxfile.py b/noxfile.py index bde3b990e..7ba081660 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,6 +21,7 @@ import nox +PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -39,6 +40,7 @@ "lint", "lint_setup_py", "blacken", + "pytype", "docs", ] @@ -98,6 +100,15 @@ def unit_noextras(session): default(session, install_extras=False) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def pytype(session): + """Run type checks.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(PYTYPE_VERSION) + session.run("pytype") + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6939c07e0..96819343c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.13.1 +google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/setup.cfg b/setup.cfg index c3a2b39f6..8eefc4435 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,3 +17,17 @@ # Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 + +[pytype] +python_version = 3.8 +inputs = + google/cloud/ +exclude = + tests/ + google/cloud/bigquery_v2/ +output = .pytype/ +disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error diff --git a/synth.py b/synth.py index 3c6440600..d99f368cc 100644 --- a/synth.py +++ b/synth.py @@ -13,6 +13,7 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +import textwrap import synthtool as s from synthtool import gcp @@ -120,4 +121,32 @@ '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) +# ---------------------------------------------------------------------------- +# pytype-related changes +# ---------------------------------------------------------------------------- + +# Add .pytype to .gitignore +s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") + +# Add pytype config to setup.cfg +s.replace( + "setup.cfg", + r"universal = 1", + textwrap.dedent(""" \g<0> + + [pytype] + python_version = 3.8 + inputs = + google/cloud/ + exclude = + tests/ + google/cloud/bigquery_v2/ + output = .pytype/ + disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error""") +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py index 6002ae3e8..e5016b0e5 100644 --- a/tests/unit/test_signature_compatibility.py +++ b/tests/unit/test_signature_compatibility.py @@ -31,20 +31,12 @@ def row_iterator_class(): return RowIterator -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_arrow) sig2 = inspect.signature(row_iterator_class.to_arrow) assert sig == sig2 -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_dataframe) sig2 = inspect.signature(row_iterator_class.to_dataframe) From 0abb56669c097c59fbffce007c702e7a55f2d9c1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Apr 2021 02:34:02 -0500 Subject: [PATCH 190/341] feat: add `Client.delete_job_metadata` method to remove job metadata (#610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note: this only removes job metadata. Use `Client.cancel_job` to stop a running job. Also, this feature is in preview and has not rolled out to all regions yet Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards internal issue 176186229 🦕 --- google/cloud/bigquery/client.py | 71 +++++++++++++++++++++++++++++++++ tests/system/test_client.py | 22 +++++++++- tests/unit/test_client.py | 60 ++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 5aa8608a5..8d0acb867 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1545,6 +1545,77 @@ def delete_model( if not not_found_ok: raise + def delete_job_metadata( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = f"/projects/{project}/jobs/{job_id}/delete" + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_routine( self, routine: Union[Routine, RoutineReference, str], diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f31d994ca..e71788a43 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,7 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest @@ -62,6 +63,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -123,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): @@ -142,7 +144,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -430,6 +432,22 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job_metadata(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job_metadata(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 860f25f35..8f535145b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2498,6 +2498,66 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) + def test_delete_job_metadata_not_found(self): + creds = _make_credentials() + client = self._make_one("client-proj", creds, location="client-loc") + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_job_metadata("my-job") + + conn.api_request.reset_mock() + client.delete_job_metadata("my-job", not_found_ok=True) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_id(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job_metadata("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_resource(self): + from google.cloud.bigquery.job import QueryJob + + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) + + client.delete_job_metadata(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, + ) + def test_delete_model(self): from google.cloud.bigquery.model import Model From 1cff487e912d3cc3414968c28cf1e6554361a9e3 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Mon, 26 Apr 2021 10:31:29 -0400 Subject: [PATCH 191/341] chore(revert): revert preventing normalization (#625) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 46a128426..607ffb63f 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=setuptools.sic(version), + version=version, description=description, long_description=readme, author="Google LLC", From 33a871f06329f9bf5a6a92fab9ead65bf2bee75d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 26 Apr 2021 16:35:16 +0200 Subject: [PATCH 192/341] fix: unsetting clustering fileds on Table is now possible (#622) * fix: unsetting clustering fields from Table * Remove unused stuff from table.py * Use _PROPERTY_TO_API_FIELD in Table properties * Clarify why a property is set to explicit None --- google/cloud/bigquery/table.py | 204 +++++++++++++++++++++------------ tests/system/test_client.py | 25 ++++ tests/unit/test_table.py | 8 +- 3 files changed, 160 insertions(+), 77 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 5ab649a25..b91c91a39 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -19,7 +19,6 @@ import copy import datetime import functools -import logging import operator import pytz import typing @@ -57,12 +56,6 @@ from google.cloud import bigquery_storage -_LOGGER = logging.getLogger(__name__) - -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." @@ -302,16 +295,36 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + "clustering_fields": "clustering", + "created": "creationTime", + "dataset_id": ["tableReference", "datasetId"], + "description": "description", "encryption_configuration": "encryptionConfiguration", + "etag": "etag", "expires": "expirationTime", "external_data_configuration": "externalDataConfiguration", "friendly_name": "friendlyName", + "full_table_id": "id", + "labels": "labels", + "location": "location", + "modified": "lastModifiedTime", "mview_enable_refresh": "materializedView", + "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "num_bytes": "numBytes", + "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", + "project": ["tableReference", "projectId"], + "range_partitioning": "rangePartitioning", + "time_partitioning": "timePartitioning", + "schema": "schema", + "streaming_buffer": "streamingBuffer", + "self_link": "selfLink", + "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", + "type": "type", "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", @@ -327,17 +340,23 @@ def __init__(self, table_ref, schema=None): @property def project(self): """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property def table_id(self): """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) reference = property(_reference_getter) @@ -356,11 +375,15 @@ def require_partition_filter(self): partition filter that can be used for partition elimination to be specified. """ - return self._properties.get("requirePartitionFilter") + return self._properties.get( + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ) @require_partition_filter.setter def require_partition_filter(self, value): - self._properties["requirePartitionFilter"] = value + self._properties[ + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ] = value @property def schema(self): @@ -376,7 +399,7 @@ def schema(self): is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. """ - prop = self._properties.get("schema") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) if not prop: return [] else: @@ -384,11 +407,13 @@ def schema(self): @schema.setter def schema(self, value): + api_field = self._PROPERTY_TO_API_FIELD["schema"] + if value is None: - self._properties["schema"] = None + self._properties[api_field] = None else: value = _to_schema_fields(value) - self._properties["schema"] = {"fields": _build_schema_resource(value)} + self._properties[api_field] = {"fields": _build_schema_resource(value)} @property def labels(self): @@ -401,13 +426,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault("labels", {}) + return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties["labels"] = value + self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value @property def encryption_configuration(self): @@ -421,7 +446,9 @@ def encryption_configuration(self): `_ in the BigQuery documentation. """ - prop = self._properties.get("encryptionConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ) if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -431,14 +458,16 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["encryptionConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ] = api_repr @property def created(self): """Union[datetime.datetime, None]: Datetime at which the table was created (:data:`None` until set from the server). """ - creation_time = self._properties.get("creationTime") + creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -450,14 +479,14 @@ def etag(self): """Union[str, None]: ETag for the table resource (:data:`None` until set from the server). """ - return self._properties.get("etag") + return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the table was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get("lastModifiedTime") + modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -469,21 +498,25 @@ def num_bytes(self): """Union[int, None]: The size of the table in bytes (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numBytes")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) + ) @property def num_rows(self): """Union[int, None]: The number of rows in the table (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numRows")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) + ) @property def self_link(self): """Union[str, None]: URL for the table resource (:data:`None` until set from the server). """ - return self._properties.get("selfLink") + return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) @property def full_table_id(self): @@ -492,7 +525,7 @@ def full_table_id(self): In the format ``project-id:dataset_id.table_id``. """ - return self._properties.get("id") + return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) @property def table_type(self): @@ -502,7 +535,7 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or ``'EXTERNAL'``. """ - return self._properties.get("type") + return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) @property def range_partitioning(self): @@ -523,7 +556,9 @@ def range_partitioning(self): :class:`~google.cloud.bigquery.table.RangePartitioning` or :data:`None`. """ - resource = self._properties.get("rangePartitioning") + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["range_partitioning"] + ) if resource is not None: return RangePartitioning(_properties=resource) @@ -536,7 +571,7 @@ def range_partitioning(self, value): raise ValueError( "Expected value to be RangePartitioning or None, got {}.".format(value) ) - self._properties["rangePartitioning"] = resource + self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource @property def time_partitioning(self): @@ -553,7 +588,7 @@ def time_partitioning(self): :class:`~google.cloud.bigquery.table.TimePartitioning` or :data:`None`. """ - prop = self._properties.get("timePartitioning") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -566,7 +601,7 @@ def time_partitioning(self, value): raise ValueError( "value must be google.cloud.bigquery.table.TimePartitioning " "or None" ) - self._properties["timePartitioning"] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr @property def partitioning_type(self): @@ -591,9 +626,10 @@ def partitioning_type(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] if self.time_partitioning is None: - self._properties["timePartitioning"] = {} - self._properties["timePartitioning"]["type"] = value + self._properties[api_field] = {} + self._properties[api_field]["type"] = value @property def partition_expiration(self): @@ -620,9 +656,11 @@ def partition_expiration(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] + if self.time_partitioning is None: - self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY} - self._properties["timePartitioning"]["expirationMs"] = str(value) + self._properties[api_field] = {"type": TimePartitioningType.DAY} + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -637,7 +675,7 @@ def clustering_fields(self): BigQuery supports clustering for both partitioned and non-partitioned tables. """ - prop = self._properties.get("clustering") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) if prop is not None: return list(prop.get("fields", ())) @@ -647,12 +685,15 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ + api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] + if value is not None: - prop = self._properties.setdefault("clustering", {}) + prop = self._properties.setdefault(api_field, {}) prop["fields"] = value else: - if "clustering" in self._properties: - del self._properties["clustering"] + # In order to allow unsetting clustering fields completely, we explicitly + # set this property to None (as oposed to merely removing the key). + self._properties[api_field] = None @property def description(self): @@ -662,13 +703,13 @@ def description(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("description") + return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) @description.setter def description(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["description"] = value + self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value @property def expires(self): @@ -678,7 +719,7 @@ def expires(self): Raises: ValueError: For invalid value types. """ - expiration_time = self._properties.get("expirationTime") + expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) if expiration_time is not None: # expiration_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -690,7 +731,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties["expirationTime"] = _helpers._str_or_none(value_ms) + self._properties[ + self._PROPERTY_TO_API_FIELD["expires"] + ] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -699,13 +742,13 @@ def friendly_name(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("friendlyName") + return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) @friendly_name.setter def friendly_name(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["friendlyName"] = value + self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value @property def location(self): @@ -713,7 +756,7 @@ def location(self): Defaults to :data:`None`. """ - return self._properties.get("location") + return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) @property def view_query(self): @@ -726,14 +769,17 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - return _helpers._get_sub_prop(self._properties, ["view", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, str): raise ValueError("Pass a string") - _helpers._set_sub_prop(self._properties, ["view", "query"], value) - view = self._properties["view"] + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], value) + view = self._properties[api_field] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -742,7 +788,7 @@ def view_query(self, value): @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" - self._properties.pop("view", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -750,27 +796,29 @@ def view_query(self): def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") - if self._properties.get("view") is None: - self._properties["view"] = {} - self._properties["view"]["useLegacySql"] = value + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + if self._properties.get(api_field) is None: + self._properties[api_field] = {} + self._properties[api_field]["useLegacySql"] = value @property def mview_query(self): """Optional[str]: SQL query defining the table as a materialized view (defaults to :data:`None`). """ - return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @mview_query.setter def mview_query(self, value): - _helpers._set_sub_prop( - self._properties, ["materializedView", "query"], str(value) - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) @mview_query.deleter def mview_query(self): """Delete SQL query defining the table as a materialized view.""" - self._properties.pop("materializedView", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) @property def mview_last_refresh_time(self): @@ -778,7 +826,7 @@ def mview_last_refresh_time(self): refreshed (:data:`None` until set from the server). """ refresh_time = _helpers._get_sub_prop( - self._properties, ["materializedView", "lastRefreshTime"] + self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] ) if refresh_time is not None: # refresh_time will be in milliseconds. @@ -791,14 +839,14 @@ def mview_enable_refresh(self): """Optional[bool]: Enable automatic refresh of the materialized view when the base table is updated. The default value is :data:`True`. """ - return _helpers._get_sub_prop( - self._properties, ["materializedView", "enableRefresh"] - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] + return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) @mview_enable_refresh.setter def mview_enable_refresh(self, value): + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] return _helpers._set_sub_prop( - self._properties, ["materializedView", "enableRefresh"], value + self._properties, [api_field, "enableRefresh"], value ) @property @@ -807,8 +855,9 @@ def mview_refresh_interval(self): materialized view will be refreshed. The default value is 1800000 milliseconds (30 minutes). """ + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] refresh_interval = _helpers._get_sub_prop( - self._properties, ["materializedView", "refreshIntervalMs"] + self._properties, [api_field, "refreshIntervalMs"] ) if refresh_interval is not None: return datetime.timedelta(milliseconds=int(refresh_interval)) @@ -820,10 +869,9 @@ def mview_refresh_interval(self, value): else: refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, - ["materializedView", "refreshIntervalMs"], - refresh_interval_ms, + self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, ) @property @@ -831,7 +879,7 @@ def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's streaming buffer. """ - sb = self._properties.get("streamingBuffer") + sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) if sb is not None: return StreamingBuffer(sb) @@ -843,7 +891,9 @@ def external_data_configuration(self): Raises: ValueError: For invalid value types. """ - prop = self._properties.get("externalDataConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ) if prop is not None: prop = ExternalConfig.from_api_repr(prop) return prop @@ -855,7 +905,9 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["externalDataConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ] = api_repr @classmethod def from_string(cls, full_table_id: str) -> "Table": @@ -908,9 +960,15 @@ def from_api_repr(cls, resource: dict) -> "Table": "Resource lacks required identity information:" '["tableReference"]["tableId"]' ) - project_id = resource["tableReference"]["projectId"] - table_id = resource["tableReference"]["tableId"] - dataset_id = resource["tableReference"]["datasetId"] + project_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["project"] + ) + table_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["table_id"] + ) + dataset_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] + ) dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index e71788a43..7c8ef50fa 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -90,6 +90,12 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +CLUSTERING_SCHEMA = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("body_height_cm", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"), +] TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"), bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"), @@ -579,6 +585,25 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_update_table_clustering_configuration(self): + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=CLUSTERING_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + table.clustering_fields = ["full_name", "date_of_birth"] + table2 = Config.CLIENT.update_table(table, ["clustering_fields"]) + self.assertEqual(table2.clustering_fields, ["full_name", "date_of_birth"]) + + table2.clustering_fields = None + table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) + self.assertIsNone(table3.clustering_fields, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3373528e0..ce4a15761 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1210,8 +1210,8 @@ def test_clustering_fields_setter_w_none(self): table._properties["clustering"] = {"fields": fields} table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_clustering_fields_setter_w_none_noop(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1219,8 +1219,8 @@ def test_clustering_fields_setter_w_none_noop(self): table = self._make_one(table_ref) table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_encryption_configuration_setter(self): # Previously, the EncryptionConfiguration class was in the table module, not the From a3224337dac217ec07df83bf0ad570b7aa6d2ec9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:56:03 +0000 Subject: [PATCH 193/341] chore: release 2.14.0 (#602) :robot: I have created a release \*beep\* \*boop\* --- ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) ### Features * accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) * accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) * add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) * add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) * add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) * DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) * retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) * use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ### Bug Fixes * consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) * missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) * unsetting clustering fileds on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ### Documentation * add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) * update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dc2c8838..9aee40510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) + + +### Features + +* accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) +* accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) +* add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) +* add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) +* add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) +* DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) +* retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) +* use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) + + +### Bug Fixes + +* consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) +* missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) +* unsetting clustering fields on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) + + +### Documentation + +* add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) +* update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) + ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2330d0c2c..ba8b4e8af 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.1" +__version__ = "2.14.0" From f4e34c09be696f41a097823db309a67d34db6efa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 27 Apr 2021 17:08:05 +0200 Subject: [PATCH 194/341] chore(deps): update dependency google-cloud-bigquery to v2.14.0 (#627) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.13.1` -> `==2.14.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/compatibility-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/confidence-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.14.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2140-httpswwwgithubcomgoogleapispython-bigquerycomparev2131v2140-2021-04-26) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) ##### Features - accept DatasetListItem where DatasetReference is accepted ([#​597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) - accept job object as argument to `get_job` and `cancel_job` ([#​617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) - add `Client.delete_job_metadata` method to remove job metadata ([#​610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) - add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#​575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) - add type hints for public methods ([#​613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) - DB API cursors are now iterable ([#​618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) - retry google.auth TransportError by default ([#​624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) - use pyarrow stream compression, if available ([#​593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ##### Bug Fixes - consistent percents handling in DB API query ([#​619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) - missing license headers in new test files ([#​604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) - unsetting clustering fields on Table is now possible ([#​622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ##### Documentation - add sample to run DML query ([#​591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) - update the description of the return value of `_QueryResults.rows()` ([#​594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) ##### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ##### Bug Fixes - add ConnectionError to default retry ([#​571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad))
--- ### Configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/conftest.py | 12 ++++++++++-- samples/snippets/requirements.txt | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 96819343c..7e017e283 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 31c6ba104..0d0299ee5 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -20,21 +20,29 @@ RESOURCE_PREFIX = "python_bigquery_samples_snippets" +RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" +RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) random_string = hex(random.randrange(1000000))[2:] return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" +def resource_name_to_date(resource_name: str): + start_date = len(RESOURCE_PREFIX) + 1 + date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] + return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): if ( dataset.dataset_id.startswith(RESOURCE_PREFIX) - and dataset.created < yesterday + and resource_name_to_date(dataset.dataset_id) < yesterday ): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 74a18981e..d7e60f77d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 4396e70771af6889d3242c37c5ff2e80241023a2 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 27 Apr 2021 14:15:52 -0600 Subject: [PATCH 195/341] fix: The DB API Binary function accepts bytes data (#630) * fix: The DB API Binary function accepts bytes data * Binary should accept bytes-like objects. * check for an integer before converting to bytes. Because we don't want to accidentally create a giant bytes. * blackened. * Fixed exception string. * parameterized binary tests and rearranged imports. * typo * Blackened --- google/cloud/bigquery/dbapi/types.py | 20 +++++++++++++---- tests/unit/test_dbapi_types.py | 32 ++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 20eca9b00..717593ae1 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -30,16 +30,28 @@ TimestampFromTicks = datetime.datetime.fromtimestamp -def Binary(string): +def Binary(data): """Contruct a DB-API binary value. Args: - string (str): A string to encode as a binary value. + data (bytes-like): An object containing binary data and that + can be converted to bytes with the `bytes` builtin. Returns: - bytes: The UTF-8 encoded bytes representing the string. + bytes: The binary data as a bytes object. """ - return string.encode("utf-8") + if isinstance(data, int): + # This is not the conversion we're looking for, because it + # will simply create a bytes object of the given size. + raise TypeError("cannot convert `int` object to binary") + + try: + return bytes(data) + except TypeError: + if isinstance(data, str): + return data.encode("utf-8") + else: + raise def TimeFromTicks(ticks, tz=None): diff --git a/tests/unit/test_dbapi_types.py b/tests/unit/test_dbapi_types.py index e05660ffe..cf282c68b 100644 --- a/tests/unit/test_dbapi_types.py +++ b/tests/unit/test_dbapi_types.py @@ -15,6 +15,8 @@ import datetime import unittest +import pytest + import google.cloud._helpers from google.cloud.bigquery.dbapi import types @@ -26,10 +28,6 @@ def test_binary_type(self): self.assertEqual("STRUCT", types.BINARY) self.assertNotEqual("STRING", types.BINARY) - def test_binary_constructor(self): - self.assertEqual(types.Binary(u"hello"), b"hello") - self.assertEqual(types.Binary(u"\u1f60"), u"\u1f60".encode("utf-8")) - def test_timefromticks(self): somedatetime = datetime.datetime( 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC @@ -40,3 +38,29 @@ def test_timefromticks(self): types.TimeFromTicks(ticks, google.cloud._helpers.UTC), datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC), ) + + +class CustomBinary: + def __bytes__(self): + return b"Google" + + +@pytest.mark.parametrize( + "raw,expected", + [ + (u"hello", b"hello"), + (u"\u1f60", u"\u1f60".encode("utf-8")), + (b"hello", b"hello"), + (bytearray(b"hello"), b"hello"), + (memoryview(b"hello"), b"hello"), + (CustomBinary(), b"Google"), + ], +) +def test_binary_constructor(raw, expected): + assert types.Binary(raw) == expected + + +@pytest.mark.parametrize("bad", (42, 42.0, None)) +def test_invalid_binary_constructor(bad): + with pytest.raises(TypeError): + types.Binary(bad) From 7196817e1a4ee6dfde4875a06f1ffb9bbdb8e2ed Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Apr 2021 16:28:10 +0200 Subject: [PATCH 196/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.4.0 (#595) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7e017e283..f46b141fd 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.1.0 +google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d7e60f77d..f7b5cebe9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.3.0 +google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' From c0851861ab1936e7444b5ae8970ded773482db43 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Apr 2021 15:44:25 -0500 Subject: [PATCH 197/341] chore: add yoshi to CODEOWNERS (#634) --- .github/CODEOWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 10f4ee7c0..ae570eb01 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,8 +5,7 @@ # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax # The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery +* @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes /samples/ @googleapis/python-samples-owners - From 8bcf397fbe2527e06317741875a059b109cfcd9c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 07:19:56 -0600 Subject: [PATCH 198/341] feat: Extended DB API parameter syntax to optionally provide parameter types (#626) * Added explicit type documentation. * Extended query-parameter system for specifying parameter types.assed. * Serialize non-floats (e.g. Decimals) using in FLOAT64 parameters. Co-authored-by: Tim Swast * De-reference aliases in SqlParameterScalarTypes when checking types Co-authored-by: Tim Swast --- docs/dbapi.rst | 37 ++++++++ google/cloud/bigquery/_helpers.py | 2 +- google/cloud/bigquery/dbapi/_helpers.py | 107 ++++++++++++++++-------- google/cloud/bigquery/dbapi/cursor.py | 93 ++++++++++++++++++-- google/cloud/bigquery/magics/magics.py | 2 +- tests/unit/test__helpers.py | 15 ++++ tests/unit/test_dbapi__helpers.py | 94 +++++++++++++++++++-- tests/unit/test_dbapi_cursor.py | 106 +++++++++++++++++++++-- 8 files changed, 396 insertions(+), 60 deletions(-) diff --git a/docs/dbapi.rst b/docs/dbapi.rst index ca0256d3c..41ec85833 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -4,3 +4,40 @@ DB-API Reference .. automodule:: google.cloud.bigquery.dbapi :members: :show-inheritance: + + +DB-API Query-Parameter Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The BigQuery DB-API uses the `qmark` `parameter style +`_ for +unnamed/positional parameters and the `pyformat` parameter style for +named parameters. + +An example of a query using unnamed parameters:: + + insert into people (name, income) values (?, ?) + +and using named parameters:: + + insert into people (name, income) values (%(name)s, %(income)s) + +Providing explicit type information +----------------------------------- + +BigQuery requires type information for parameters. The The BigQuery +DB-API can usually determine parameter types for parameters based on +provided values. Sometimes, however, types can't be determined (for +example when `None` is passed) or are determined incorrectly (for +example when passing a floating-point value to a numeric column). + +The BigQuery DB-API provides an extended parameter syntax. For named +parameters, a BigQuery type is provided after the name separated by a +colon, as in:: + + insert into people (name, income) values (%(name:string)s, %(income:numeric)s) + +For unnamed parameters, use the named syntax with a type, but now +name, as in:: + + insert into people (name, income) values (%(:string)s, %(:numeric)s) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index daa14b92a..ad8e3f003 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -275,7 +275,7 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value + return value if value is None else float(value) def _decimal_to_json(value): diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index beb3c5e71..3b0d8134c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -20,7 +20,7 @@ import numbers from google.cloud import bigquery -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import exceptions @@ -28,7 +28,28 @@ _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") -def scalar_to_query_parameter(value, name=None): +def _parameter_type(name, value, query_parameter_type=None, value_doc=""): + if query_parameter_type: + try: + parameter_type = getattr( + enums.SqlParameterScalarTypes, query_parameter_type.upper() + )._type + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {query_parameter_type}," + f" for {name} is not a valid BigQuery scalar type." + ) + else: + parameter_type = bigquery_scalar_type(value) + if parameter_type is None: + raise exceptions.ProgrammingError( + f"Encountered parameter {name} with " + f"{value_doc} value {value} of unexpected type." + ) + return parameter_type + + +def scalar_to_query_parameter(value, name=None, query_parameter_type=None): """Convert a scalar value into a query parameter. Args: @@ -37,6 +58,7 @@ def scalar_to_query_parameter(value, name=None): name (str): (Optional) Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: google.cloud.bigquery.ScalarQueryParameter: @@ -47,24 +69,19 @@ def scalar_to_query_parameter(value, name=None): google.cloud.bigquery.dbapi.exceptions.ProgrammingError: if the type cannot be determined. """ - parameter_type = bigquery_scalar_type(value) - - if parameter_type is None: - raise exceptions.ProgrammingError( - "encountered parameter {} with value {} of unexpected type".format( - name, value - ) - ) - return bigquery.ScalarQueryParameter(name, parameter_type, value) + return bigquery.ScalarQueryParameter( + name, _parameter_type(name, value, query_parameter_type), value + ) -def array_to_query_parameter(value, name=None): +def array_to_query_parameter(value, name=None, query_parameter_type=None): """Convert an array-like value into a query parameter. Args: value (Sequence[Any]): The elements of the array (should not be a string-like Sequence). name (Optional[str]): Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: A query parameter corresponding with the type and value of the plain @@ -80,29 +97,30 @@ def array_to_query_parameter(value, name=None): "not string-like.".format(name) ) - if not value: + if query_parameter_type or value: + array_type = _parameter_type( + name, + value[0] if value else None, + query_parameter_type, + value_doc="array element ", + ) + else: raise exceptions.ProgrammingError( "Encountered an empty array-like value of parameter {}, cannot " "determine array elements type.".format(name) ) - # Assume that all elements are of the same type, and let the backend handle - # any type incompatibilities among the array elements - array_type = bigquery_scalar_type(value[0]) - if array_type is None: - raise exceptions.ProgrammingError( - "Encountered unexpected first array element of parameter {}, " - "cannot determine array elements type.".format(name) - ) - return bigquery.ArrayQueryParameter(name, array_type, value) -def to_query_parameters_list(parameters): +def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. Args: parameters (Sequence[Any]): Sequence of query parameter values. + parameter_types: + A list of parameter types, one for each parameter. + Unknown types are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -110,23 +128,27 @@ def to_query_parameters_list(parameters): """ result = [] - for value in parameters: + for value, type_ in zip(parameters, parameter_types): if isinstance(value, collections_abc.Mapping): raise NotImplementedError("STRUCT-like parameter values are not supported.") elif array_like(value): - param = array_to_query_parameter(value) + param = array_to_query_parameter(value, None, type_) else: - param = scalar_to_query_parameter(value) + param = scalar_to_query_parameter(value, None, type_) + result.append(param) return result -def to_query_parameters_dict(parameters): +def to_query_parameters_dict(parameters, query_parameter_types): """Converts a dictionary of parameter values into query parameters. Args: parameters (Mapping[str, Any]): Dictionary of query parameter values. + parameter_types: + A dictionary of parameter types. It needn't have a key for each + parameter. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -140,21 +162,38 @@ def to_query_parameters_dict(parameters): "STRUCT-like parameter values are not supported " "(parameter {}).".format(name) ) - elif array_like(value): - param = array_to_query_parameter(value, name=name) else: - param = scalar_to_query_parameter(value, name=name) + query_parameter_type = query_parameter_types.get(name) + if array_like(value): + param = array_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type + ) + else: + param = scalar_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type, + ) + result.append(param) return result -def to_query_parameters(parameters): +def to_query_parameters(parameters, parameter_types): """Converts DB-API parameter values into query parameters. Args: parameters (Union[Mapping[str, Any], Sequence[Any]]): A dictionary or sequence of query parameter values. + parameter_types (Union[Mapping[str, str], Sequence[str]]): + A dictionary or list of parameter types. + + If parameters is a mapping, then this must be a dictionary + of parameter types. It needn't have a key for each + parameter. + + If parameters is a sequence, then this must be a list of + parameter types, one for each paramater. Unknown types + are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -164,9 +203,9 @@ def to_query_parameters(parameters): return [] if isinstance(parameters, collections_abc.Mapping): - return to_query_parameters_dict(parameters) - - return to_query_parameters_list(parameters) + return to_query_parameters_dict(parameters, parameter_types) + else: + return to_query_parameters_list(parameters, parameter_types) def bigquery_scalar_type(value): diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index ca78d3907..f74781df9 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -18,6 +18,7 @@ from collections import abc as collections_abc import copy import logging +import re try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -161,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the query job. """ + formatted_operation, parameter_types = _format_operation(operation, parameters) + self._execute( + formatted_operation, parameters, job_id, job_config, parameter_types + ) + + def _execute( + self, formatted_operation, parameters, job_id, job_config, parameter_types + ): self._query_data = None self._query_job = None client = self.connection._client @@ -169,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. - formatted_operation = _format_operation(operation, parameters=parameters) - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, parameter_types) if client._default_query_job_config: if job_config: @@ -209,8 +217,19 @@ def executemany(self, operation, seq_of_parameters): seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]): Sequence of many sets of parameter values. """ - for parameters in seq_of_parameters: - self.execute(operation, parameters) + if seq_of_parameters: + # There's no reason to format the line more than once, as + # the operation only barely depends on the parameters. So + # we just use the first set of parameters. If there are + # different numbers or types of parameters, we'll error + # anyway. + formatted_operation, parameter_types = _format_operation( + operation, seq_of_parameters[0] + ) + for parameters in seq_of_parameters: + self._execute( + formatted_operation, parameters, None, None, parameter_types + ) def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. @@ -427,7 +446,7 @@ def _format_operation_dict(operation, parameters): raise exceptions.ProgrammingError(exc) -def _format_operation(operation, parameters=None): +def _format_operation(operation, parameters): """Formats parameters in operation in way BigQuery expects. Args: @@ -445,9 +464,67 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation.replace("%%", "%") # Still do percent de-escaping. + return operation.replace("%%", "%"), None # Still do percent de-escaping. + + operation, parameter_types = _extract_types(operation) + if parameter_types is None: + raise exceptions.ProgrammingError( + f"Parameters were provided, but {repr(operation)} has no placeholders." + ) if isinstance(parameters, collections_abc.Mapping): - return _format_operation_dict(operation, parameters) + return _format_operation_dict(operation, parameters), parameter_types + + return _format_operation_list(operation, parameters), parameter_types + + +def _extract_types( + operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub +): + """Remove type information from parameter placeholders. + + For every parameter of the form %(name:type)s, replace with %(name)s and add the + item name->type to dict that's returned. + + Returns operation without type information and a dictionary of names and types. + """ + parameter_types = None + + def repl(m): + nonlocal parameter_types + prefix, name, type_ = m.groups() + if len(prefix) % 2: + # The prefix has an odd number of %s, the last of which + # escapes the % we're looking for, so we don't want to + # change anything. + return m.group(0) + + try: + if name: + if not parameter_types: + parameter_types = {} + if type_: + if name in parameter_types: + if type_ != parameter_types[name]: + raise exceptions.ProgrammingError( + f"Conflicting types for {name}: " + f"{parameter_types[name]} and {type_}." + ) + else: + parameter_types[name] = type_ + else: + if not isinstance(parameter_types, dict): + raise TypeError() + + return f"{prefix}%({name})s" + else: + if parameter_types is None: + parameter_types = [] + parameter_types.append(type_) + return f"{prefix}%s" + except (AttributeError, TypeError): + raise exceptions.ProgrammingError( + f"{repr(operation)} mixes named and unamed parameters." + ) - return _format_operation_list(operation, parameters) + return extra_type_sub(repl, operation), parameter_types diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 6ae7cae12..474d9a74a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -615,7 +615,7 @@ def _cell_magic(line, query): ) raise NameError(msg) - params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) + params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {}) project = args.project or context.project diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0fdf1142f..2437f3568 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1159,3 +1159,18 @@ def fake_isinstance(instance, target_class): "google.cloud.bigquery.schema.isinstance", side_effect=fake_isinstance ) return patcher + + +def test_decimal_as_float_api_repr(): + """Make sure decimals get converted to float.""" + import google.cloud.bigquery.query + from decimal import Decimal + + param = google.cloud.bigquery.query.ScalarQueryParameter( + "x", "FLOAT64", Decimal(42) + ) + assert param.to_api_repr() == { + "parameterType": {"type": "FLOAT64"}, + "parameterValue": {"value": 42.0}, + "name": "x", + } diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 4b2724de0..250ba46d9 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -18,13 +18,15 @@ import operator as op import unittest +import pytest + try: import pyarrow except ImportError: # pragma: NO COVER pyarrow = None import google.cloud._helpers -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -142,7 +144,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): def test_to_query_parameters_w_dict(self): parameters = {"somebool": True, "somestring": "a-string-value"} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -158,7 +160,7 @@ def test_to_query_parameters_w_dict(self): def test_to_query_parameters_w_dict_array_param(self): parameters = {"somelist": [10, 20]} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -171,11 +173,11 @@ def test_to_query_parameters_w_dict_dict_param(self): parameters = {"my_param": {"foo": "bar"}} with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, {}) def test_to_query_parameters_w_list(self): parameters = [True, "a-string-value"] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None, None]) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -186,7 +188,7 @@ def test_to_query_parameters_w_list(self): def test_to_query_parameters_w_list_array_param(self): parameters = [[10, 20]] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None]) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -199,10 +201,10 @@ def test_to_query_parameters_w_list_dict_param(self): parameters = [{"foo": "bar"}] with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, [None]) def test_to_query_parameters_none_argument(self): - query_parameters = _helpers.to_query_parameters(None) + query_parameters = _helpers.to_query_parameters(None, None) self.assertEqual(query_parameters, []) @@ -338,3 +340,79 @@ def test_custom_on_closed_error_type(self): with self.assertRaisesRegex(RuntimeError, "I'm closed!"): instance.instance_method() + + +VALID_BQ_TYPES = [ + (name, getattr(enums.SqlParameterScalarTypes, name)._type) + for name in dir(enums.SqlParameterScalarTypes) + if not name.startswith("_") +] + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_scalar_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.scalar_to_query_parameter(1.23, None, alias) == ( + bigquery.ScalarQueryParameter(None, type_, 1.23) + ) + assert _helpers.scalar_to_query_parameter(None, "foo", alias) == ( + bigquery.ScalarQueryParameter("foo", type_, None) + ) + + +def test_scalar_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.scalar_to_query_parameter(None, "foo", "INT") + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_array_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.array_to_query_parameter([1.23], None, alias) == ( + bigquery.ArrayQueryParameter(None, type_, [1.23]) + ) + assert _helpers.array_to_query_parameter((), "foo", alias) == ( + bigquery.ArrayQueryParameter("foo", type_, ()) + ) + + +def test_array_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.array_to_query_parameter((), "foo", "INT") + + +def test_to_query_parameters_dict_w_types(): + from google.cloud import bigquery + + assert sorted( + _helpers.to_query_parameters( + dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + ), + key=lambda p: p.name, + ) == [ + bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), + bigquery.ScalarQueryParameter("y", "STRING", None), + bigquery.ArrayQueryParameter("z", "FLOAT64", []), + ] + + +def test_to_query_parameters_list_w_types(): + from google.cloud import bigquery + + assert _helpers.to_query_parameters( + [1, 1.2, None, []], [None, "numeric", "string", "float64"] + ) == [ + bigquery.ScalarQueryParameter(None, "INT64", 1), + bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), + bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ArrayQueryParameter(None, "FLOAT64", []), + ] diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 039ef3b4c..5afe269ef 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import operator as op import unittest -import mock +import pytest + try: import pyarrow @@ -612,6 +614,15 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_executemany_empty(self): + from google.cloud.bigquery.dbapi import connect + + connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12)) + cursor = connection.cursor() + cursor.executemany((), ()) + self.assertIsNone(cursor.description) + self.assertEqual(cursor.rowcount, -1) + def test_is_iterable(self): from google.cloud.bigquery import dbapi @@ -638,13 +649,15 @@ def test_is_iterable(self): def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( - "SELECT %(somevalue)s, %(a `weird` one)s;", + parameter_types = {} + formatted_operation, parameter_types = cursor._format_operation( + "SELECT %(somevalue)s, %(a `weird` one:STRING)s;", {"somevalue": "hi", "a `weird` one": "world"}, ) self.assertEqual( formatted_operation, "SELECT @`somevalue`, @`a \\`weird\\` one`;" ) + self.assertEqual(parameter_types, {"a `weird` one": "STRING"}) def test__format_operation_w_wrong_dict(self): from google.cloud.bigquery import dbapi @@ -660,7 +673,7 @@ def test__format_operation_w_wrong_dict(self): def test__format_operation_w_redundant_dict_key(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} ) self.assertEqual(formatted_operation, "SELECT @`somevalue`;") @@ -668,7 +681,7 @@ def test__format_operation_w_redundant_dict_key(self): def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %s, %s;", ("hello", "world") ) self.assertEqual(formatted_operation, "SELECT ?, ?;") @@ -698,19 +711,19 @@ def test__format_operation_w_too_long_sequence(self): def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%f'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") def test__format_operation_wo_params_single_percent(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_wo_params_double_percents(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_unescaped_percent_w_dict_param(self): @@ -734,3 +747,80 @@ def test__format_operation_unescaped_percent_w_list_param(self): "SELECT %s, %s, '100 %';", ["foo", "bar"], ) + + def test__format_operation_no_placeholders(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT 42", + ["foo", "bar"], + ) + + +@pytest.mark.parametrize( + "inp,expect", + [ + ("", ("", None)), + ("values(%(foo)s, %(bar)s)", ("values(%(foo)s, %(bar)s)", {})), + ( + "values('%%(oof:INT64)s', %(foo)s, %(bar)s)", + ("values('%%(oof:INT64)s', %(foo)s, %(bar)s)", {}), + ), + ( + "values(%(foo:INT64)s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values('%%(oof:INT64)s, %(foo:INT64)s, %(foo)s)", + ("values('%%(oof:INT64)s, %(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(foo:INT64)s)", + ("values(%(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(bar:NUMERIC)s) 100 %", + ("values(%(foo)s, %(bar)s) 100 %", dict(foo="INT64", bar="NUMERIC")), + ), + (" %s %()s %(:int64)s ", (" %s %s %s ", [None, None, "int64"])), + (" %%s %s %()s %(:int64)s ", (" %%s %s %s %s ", [None, None, "int64"])), + ( + "values(%%%(foo:INT64)s, %(bar)s)", + ("values(%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values(%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%(foo:INT64)s, %(bar)s)", dict()), + ), + ( + "values(%%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ], +) +def test__extract_types(inp, expect): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + + assert et(inp) == expect + + +@pytest.mark.parametrize( + "match,inp", + [ + ( + "Conflicting types for foo: numeric and int64.", + " %(foo:numeric)s %(foo:int64)s ", + ), + (r"' %s %\(foo\)s ' mixes named and unamed parameters.", " %s %(foo)s "), + (r"' %\(foo\)s %s ' mixes named and unamed parameters.", " %(foo)s %s "), + ], +) +def test__extract_types_fail(match, inp): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises(exceptions.ProgrammingError, match=match): + et(inp) From 5df63fd9253cd0475cfb7cefd89a7729c6c5abf2 Mon Sep 17 00:00:00 2001 From: "google-cloud-policy-bot[bot]" <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 10:35:16 -0600 Subject: [PATCH 199/341] chore: add SECURITY.md (#636) Co-authored-by: google-cloud-policy-bot[bot] <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> --- SECURITY.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..8b58ae9c0 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,7 @@ +# Security Policy + +To report a security issue, please use [g.co/vulnz](https://g.co/vulnz). + +The Google Security Team will respond within 5 working days of your report on g.co/vulnz. + +We use g.co/vulnz for our intake, and do coordination and disclosure here using GitHub Security Advisory to privately discuss and fix the issue. From aa59023317b1c63720fb717b3544f755652da58d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 10:35:37 -0600 Subject: [PATCH 200/341] fix: add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC (#638) * Added decimal types to SqlTypeNames and SqlParameterScalarTypes * Go ahead and alias on the client To convey to the observant that these are aliases, even though they could be used (more or less) directly. * Make sure that DECIMAL data are converted when making API calls. This is mainly as a backstop -- DECIMAL requests should be converted to NUMERIC. * blacken --- google/cloud/bigquery/_helpers.py | 5 +++++ google/cloud/bigquery/enums.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ad8e3f003..4fe29291d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -363,6 +363,11 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, + # Make sure DECIMAL and BIGDECIMAL are handled, even though + # requests for them should be converted to NUMERIC. Better safe + # than sorry. + "DECIMAL": _decimal_to_json, + "BIGDECIMAL": _decimal_to_json, } diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index b378f091b..787c2449d 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -203,8 +203,8 @@ class SqlTypeNames(str, enum.Enum): INT64 = "INTEGER" FLOAT = "FLOAT" FLOAT64 = "FLOAT" - NUMERIC = "NUMERIC" - BIGNUMERIC = "BIGNUMERIC" + DECIMAL = NUMERIC = "NUMERIC" + BIGDECIMAL = BIGNUMERIC = "BIGNUMERIC" BOOLEAN = "BOOLEAN" BOOL = "BOOLEAN" GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types @@ -227,6 +227,8 @@ class SqlParameterScalarTypes: FLOAT64 = ScalarQueryParameterType("FLOAT64") NUMERIC = ScalarQueryParameterType("NUMERIC") BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + DECIMAL = ScalarQueryParameterType("NUMERIC") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") BOOLEAN = ScalarQueryParameterType("BOOL") BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") From 6e6cfdf213a8e762fc0718ec7806f511b00f36bd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 17:20:06 +0000 Subject: [PATCH 201/341] chore: release 2.15.0 (#637) :robot: I have created a release \*beep\* \*boop\* --- ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) ### Features * Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) ### Bug Fixes * add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) * The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aee40510..6a222a710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) + + +### Features + +* Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) + + +### Bug Fixes + +* add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) +* The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) + ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ba8b4e8af..a8381fff6 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.14.0" +__version__ = "2.15.0" From 28485871dfff01ed18cd6ee56f36a7e373c6733d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:20:25 +0200 Subject: [PATCH 202/341] chore(deps): update dependency google-cloud-bigquery to v2.15.0 (#639) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f46b141fd..324ece4ef 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f7b5cebe9..077896cb3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 471a76117b9f6353e343a2f493aee181e19c2f79 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:10 +0200 Subject: [PATCH 203/341] chore(deps): update dependency pyarrow to v4 (#641) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 077896cb3..7e04b06b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==3.0.0 +pyarrow==4.0.0 pytz==2021.1 From 6a48e80bc7d347f381b181f4cf81fef105d0ad0d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:32 +0200 Subject: [PATCH 204/341] chore(deps): update dependency grpcio to v1.37.1 (#640) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7e04b06b5..04883477a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.0 +grpcio==1.37.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From be3c49a72f0e04de4055f5ca7a99f821c2c8f240 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 4 May 2021 19:32:02 +0200 Subject: [PATCH 205/341] chore(deps): update dependency pytest to v6.2.4 (#647) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 299d90b65..b0cf76724 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 299d90b65..b0cf76724 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 From 6cc6876eb0e5bf49fdc047256a945dcf1b289576 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 14:41:46 +0200 Subject: [PATCH 206/341] feat: add with_name() to ScalarQueryParameterType (#644) * feat: add with_name() to ScalarQueryParameterType * Clarify unsetting a name, add extra test --- google/cloud/bigquery/query.py | 15 +++++++++++++++ tests/unit/test_query.py | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 3751eb124..d1e9a45a5 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,6 +16,7 @@ from collections import OrderedDict import copy +from typing import Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -119,6 +120,20 @@ def to_api_repr(self): # attributes in the API representation when needed. Here we omit them. return {"type": self._type} + def with_name(self, new_name: Union[str, None]): + """Return a copy of the instance with ``name`` set to ``new_name``. + + Args: + name (Union[str, None]): + The new name of the query parameter type. If ``None``, the existing + name is cleared. + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: + A new instance with updated name. + """ + return type(self)(self._type, name=new_name, description=self.description) + def __repr__(self): name = f", name={self.name!r}" if self.name is not None else "" description = ( diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index c8be2911f..90fc30b20 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -98,6 +98,26 @@ def test_repr_all_optional_attrs(self): "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", ) + def test_with_name_returns_copy_w_changed_name(self): + param_type = self._make_one("BOOLEAN", name=None, description="Some checkbox.") + modified_type = param_type.with_name("allow_emails") + + self.assertIsNot(modified_type, param_type) # Result is a copy. + self.assertEqual(modified_type.name, "allow_emails") + + # The rest of the The rest of the fields should have been preserved. + self.assertEqual(modified_type._type, param_type._type) + self.assertEqual(modified_type.description, param_type.description) + + def test_with_name_clearing_the_value(self): + param_type = self._make_one( + "BOOLEAN", name="allow_emails", description="Some checkbox." + ) + modified_type = param_type.with_name(None) + + self.assertIsNone(modified_type.name) + self.assertEqual(param_type.name, "allow_emails") # original unchanged + class Test_ArrayQueryParameterType(unittest.TestCase): @staticmethod From 9e1d3869c2024fe7a8af57ff59838d904ca5db03 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 15:49:10 +0200 Subject: [PATCH 207/341] deps: expand supported pyarrow versions to v4 (#643) * deps: expand supported pyarrow versions to v4 * Expand *all* pyarrow pins. * Constrain pyarrow to v4.0.0+ in Python 3.9 tests --- setup.py | 6 +++--- testing/constraints-3.9.txt | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 607ffb63f..6a6202ef9 100644 --- a/setup.py +++ b/setup.py @@ -47,10 +47,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 4.0dev", + "pyarrow >= 1.0.0, < 5.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index e69de29bb..39dc6250e 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -0,0 +1,7 @@ +# This constraints file is used to make sure that the latest dependency versions +# we claim to support in setup.py are indeed installed in test sessions in the most +# recent Python version supported (3.9 at the time of writing - 2021-05-05). +# +# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by +# the renovate bot. +pyarrow>=4.0.0 From a6a4eeac8f832cf9e24b0a4391b9848587fb6d29 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 17:17:19 +0200 Subject: [PATCH 208/341] chore: use file paths for --cov args in noxfile (#648) --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 7ba081660..654bbd093 100644 --- a/noxfile.py +++ b/noxfile.py @@ -77,8 +77,8 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", - "--cov=google.cloud.bigquery", - "--cov=tests.unit", + "--cov=google/cloud/bigquery", + "--cov=tests/unit", "--cov-append", "--cov-config=.coveragerc", "--cov-report=", From 144ceeaac0167f774b86c39a042a2de2b8b4d356 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 6 May 2021 11:36:10 +0200 Subject: [PATCH 209/341] chore: release 2.16.0 (#649) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a222a710..15d594c1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) + + +### Features + +* add with_name() to ScalarQueryParameterType ([#644](https://www.github.com/googleapis/python-bigquery/issues/644)) ([6cc6876](https://www.github.com/googleapis/python-bigquery/commit/6cc6876eb0e5bf49fdc047256a945dcf1b289576)) + + +### Dependencies + +* expand supported pyarrow versions to v4 ([#643](https://www.github.com/googleapis/python-bigquery/issues/643)) ([9e1d386](https://www.github.com/googleapis/python-bigquery/commit/9e1d3869c2024fe7a8af57ff59838d904ca5db03)) + ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a8381fff6..a93d72c2b 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.15.0" +__version__ = "2.16.0" From e24d47e72a3fdb8b6fe3d387abc31f79c5a32bc0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 7 May 2021 16:55:55 +0200 Subject: [PATCH 210/341] chore: avoid pytype error caused by attrs==21.1.0 (#656) --- noxfile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/noxfile.py b/noxfile.py index 654bbd093..dc77be3b7 100644 --- a/noxfile.py +++ b/noxfile.py @@ -103,6 +103,10 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def pytype(session): """Run type checks.""" + # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less + # recent version avoids the error until a possibly better fix is found. + # https://github.com/googleapis/python-bigquery/issues/655 + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install("ipython") session.install(PYTYPE_VERSION) From 240d1d2ba125b83c4277b2f2cd6724d66cd95bb9 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 7 May 2021 16:14:02 -0600 Subject: [PATCH 211/341] chore: use 3.8 for blacken session (#653) The Autosynth build now has 3.8: https://github.com/googleapis/synthtool/commit/fd33d7df9ecfc79cc6dbe552b497a4fb36f2e635#diff-f80f936e0eac73417c05535c764a44906afd70a37096ea3c58934a9f6f1e7fcd Should fix unexpected style in #651 --- noxfile.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/noxfile.py b/noxfile.py index dc77be3b7..a52025635 100644 --- a/noxfile.py +++ b/noxfile.py @@ -257,15 +257,12 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python="3.6") +@nox.session(python=DEFAULT_PYTHON_VERSION) def blacken(session): """Run black. Format code to uniform standard. - - This currently uses Python 3.6 due to the automated Kokoro run of synthtool. - That run uses an image that doesn't have 3.6 installed. Before updating this - check the state of the `gcp_ubuntu_config` we use for that Kokoro run. """ + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From 615d139be15bbbaea1517eb4a5d75b93055c6663 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Sun, 9 May 2021 04:05:30 -0400 Subject: [PATCH 212/341] chore: add library type to .repo-metadata.json (#658) --- .repo-metadata.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.repo-metadata.json b/.repo-metadata.json index f50dbbeb2..f132056d5 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -6,6 +6,7 @@ "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", + "library_type": "GAPIC_COMBO", "repo": "googleapis/python-bigquery", "distribution_name": "google-cloud-bigquery", "api_id": "bigquery.googleapis.com", From aeadc8c2d614bb9f0883ec901fca48930f3aaf19 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 12 May 2021 04:22:20 -0600 Subject: [PATCH 213/341] fix: executemany rowcount only reflected the last execution (#660) --- google/cloud/bigquery/dbapi/cursor.py | 4 ++++ tests/unit/test_dbapi_cursor.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index f74781df9..c8fc49378 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -218,6 +218,7 @@ def executemany(self, operation, seq_of_parameters): Sequence of many sets of parameter values. """ if seq_of_parameters: + rowcount = 0 # There's no reason to format the line more than once, as # the operation only barely depends on the parameters. So # we just use the first set of parameters. If there are @@ -230,6 +231,9 @@ def executemany(self, operation, seq_of_parameters): self._execute( formatted_operation, parameters, None, None, parameter_types ) + rowcount += self.rowcount + + self.rowcount = rowcount def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 5afe269ef..55e453254 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -612,7 +612,7 @@ def test_executemany_w_dml(self): (("test",), ("anothertest",)), ) self.assertIsNone(cursor.description) - self.assertEqual(cursor.rowcount, 12) + self.assertEqual(cursor.rowcount, 24) # 24 because 2 * 12 because cumulatve. def test_executemany_empty(self): from google.cloud.bigquery.dbapi import connect From c6ba15593f4d7541793e45295d9e531fd214094a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 13 May 2021 09:17:21 +0200 Subject: [PATCH 214/341] chore: release 2.16.1 (#662) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d594c1b..ef184dffb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) + + +### Bug Fixes + +* executemany rowcount only reflected the last execution ([#660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19)) + ## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a93d72c2b..61e0c0a83 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.0" +__version__ = "2.16.1" From 506b268cab218d6a123c82d431f99dc36a2dc35a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 13 May 2021 12:30:51 +0200 Subject: [PATCH 215/341] chore(deps): update dependency google-cloud-bigquery to v2.16.1 (#652) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 324ece4ef..e494fbaae 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 04883477a..2dfee39b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.1 From e8838a75f50a62f8a1189d2fcde3ee78f13eb1d1 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 13 May 2021 10:59:52 -0400 Subject: [PATCH 216/341] chore: migrate to owl bot (#663) * chore: migrate to owl bot * chore: copy files from googleapis-gen f2de93abafa306b2ebadf1d10d947db8bcf2bf15 * chore: run the post processor --- .github/.OwlBot.lock.yaml | 4 ++ .github/.OwlBot.yaml | 26 ++++++++ .pre-commit-config.yaml | 2 +- CONTRIBUTING.rst | 16 +---- synth.py => owlbot.py | 110 +++++++++++++++---------------- synth.metadata | 134 -------------------------------------- 6 files changed, 85 insertions(+), 207 deletions(-) create mode 100644 .github/.OwlBot.lock.yaml create mode 100644 .github/.OwlBot.yaml rename synth.py => owlbot.py (60%) delete mode 100644 synth.metadata diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml new file mode 100644 index 000000000..d49860b32 --- /dev/null +++ b/.github/.OwlBot.lock.yaml @@ -0,0 +1,4 @@ +docker: + digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e + image: gcr.io/repo-automation-bots/owlbot-python:latest + diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml new file mode 100644 index 000000000..2b6451c19 --- /dev/null +++ b/.github/.OwlBot.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker: + image: gcr.io/repo-automation-bots/owlbot-python:latest + +deep-remove-regex: + - /owl-bot-staging + +deep-copy-regex: + - source: /google/cloud/bigquery/(v.*)/.*-py/(.*) + dest: /owl-bot-staging/$1/$2 + +begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8912e9b5d..1bbd78783 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.0 + rev: 3.9.1 hooks: - id: flake8 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a0e330e44..20ba9e62e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -160,21 +160,7 @@ Running System Tests auth settings and change some configuration in your project to run all the tests. -- System tests will be run against an actual project and - so you'll need to provide some environment variables to facilitate - authentication to your project: - - - ``GOOGLE_APPLICATION_CREDENTIALS``: The path to a JSON key file; - Such a file can be downloaded directly from the developer's console by clicking - "Generate new JSON key". See private key - `docs `__ - for more details. - -- Once you have downloaded your json keys, set the environment variable - ``GOOGLE_APPLICATION_CREDENTIALS`` to the absolute path of the json file:: - - $ export GOOGLE_APPLICATION_CREDENTIALS="/Users//path/to/app_credentials.json" - +- System tests will be run against an actual project. You should use local credentials from gcloud when possible. See `Best practices for application authentication `__. Some tests require a service account. For those tests see `Authenticating as a service account `__. ************* Test Coverage diff --git a/synth.py b/owlbot.py similarity index 60% rename from synth.py rename to owlbot.py index d99f368cc..f45c24fbb 100644 --- a/synth.py +++ b/owlbot.py @@ -19,36 +19,61 @@ from synthtool import gcp from synthtool.languages import python -gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -version = "v2" -library = gapic.py_library( - service="bigquery", - version=version, - bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", - include_protos=True, -) - -s.move( - library, - excludes=[ - "*.tar.gz", - "docs/index.rst", - "docs/bigquery_v2/*_service.rst", - "docs/bigquery_v2/services.rst", - "README.rst", - "noxfile.py", - "setup.py", - "scripts/fixup_bigquery_v2_keywords.py", - library / f"google/cloud/bigquery/__init__.py", - library / f"google/cloud/bigquery/py.typed", - # There are no public API endpoints for the generated ModelServiceClient, - # thus there's no point in generating it and its tests. - library / f"google/cloud/bigquery_{version}/services/**", - library / f"tests/unit/gapic/bigquery_{version}/**", - ], -) +default_version = "v2" + +for library in s.get_staging_dirs(default_version): + # Do not expose ModelServiceClient, as there is no public API endpoint for the + # models service. + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceClient", + "", + ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceClient["'],""", + "", + ) + + # Adjust Model docstring so that Sphinx does not think that "predicted_" is + # a reference to something, issuing a false warning. + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/model.py", + r'will have a "predicted_"', + "will have a `predicted_`", + ) + + # Avoid breaking change due to change in field renames. + # https://github.com/googleapis/python-bigquery/issues/319 + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", + r"type_ ", + "type " + ) + + s.move( + library, + excludes=[ + "*.tar.gz", + "docs/index.rst", + f"docs/bigquery_{library.name}/*_service.rst", + f"docs/bigquery_{library.name}/services.rst", + "README.rst", + "noxfile.py", + "setup.py", + f"scripts/fixup_bigquery_{library.name}_keywords.py", + f"google/cloud/bigquery/__init__.py", + f"google/cloud/bigquery/py.typed", + # There are no public API endpoints for the generated ModelServiceClient, + # thus there's no point in generating it and its tests. + f"google/cloud/bigquery_{library.name}/services/**", + f"tests/unit/gapic/bigquery_{library.name}/**", + ], + ) + +s.remove_staging_dirs() # ---------------------------------------------------------------------------- # Add templated files @@ -79,41 +104,12 @@ python.py_samples() -# Do not expose ModelServiceClient, as there is no public API endpoint for the -# models service. -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"from \.services\.model_service import ModelServiceClient", - "", -) -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"""["']ModelServiceClient["'],""", - "", -) - -# Adjust Model docstring so that Sphinx does not think that "predicted_" is -# a reference to something, issuing a false warning. -s.replace( - "google/cloud/bigquery_v2/types/model.py", - r'will have a "predicted_"', - "will have a `predicted_`", -) - s.replace( "docs/conf.py", r'\{"members": True\}', '{"members": True, "inherited-members": True}' ) -# Avoid breaking change due to change in field renames. -# https://github.com/googleapis/python-bigquery/issues/319 -s.replace( - "google/cloud/bigquery_v2/types/standard_sql.py", - r"type_ ", - "type " -) - # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", diff --git a/synth.metadata b/synth.metadata deleted file mode 100644 index b031618b0..000000000 --- a/synth.metadata +++ /dev/null @@ -1,134 +0,0 @@ -{ - "sources": [ - { - "git": { - "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", - "internalRef": "361662015" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - } - ], - "destinations": [ - { - "client": { - "source": "googleapis", - "apiName": "bigquery", - "apiVersion": "v2", - "language": "python", - "generator": "bazel" - } - } - ], - "generatedFiles": [ - ".coveragerc", - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/header-checker-lint.yml", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic-head.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic-head.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic-head.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples-against-head.sh", - ".kokoro/test-samples-impl.sh", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".pre-commit-config.yaml", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/bigquery_v2/types.rst", - "docs/conf.py", - "google/cloud/bigquery_v2/__init__.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/table_reference.proto", - "google/cloud/bigquery_v2/py.typed", - "google/cloud/bigquery_v2/types/__init__.py", - "google/cloud/bigquery_v2/types/encryption_config.py", - "google/cloud/bigquery_v2/types/model.py", - "google/cloud/bigquery_v2/types/model_reference.py", - "google/cloud/bigquery_v2/types/standard_sql.py", - "google/cloud/bigquery_v2/types/table_reference.py", - "mypy.ini", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "samples/geography/noxfile.py", - "samples/snippets/noxfile.py", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" - ] -} \ No newline at end of file From 82f6c32ab9e75c86d7e27439016e634a484e4e9e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 16 May 2021 11:14:01 +0000 Subject: [PATCH 217/341] chore: new owl bot post processor docker image (#665) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa --- .github/.OwlBot.lock.yaml | 5 ++--- .pre-commit-config.yaml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d49860b32..864c17653 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,3 @@ docker: - digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e - image: gcr.io/repo-automation-bots/owlbot-python:latest - + image: gcr.io/repo-automation-bots/owlbot-python:latest + digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bbd78783..4f00c7cff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 From bd7dbdae5c972b16bafc53c67911eeaa3255a880 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 20 May 2021 09:56:53 +0200 Subject: [PATCH 218/341] feat: detect obsolete BQ Storage extra at runtime (#666) * feat: detect obsolete BQ Storage extra at runtime * Cover the changes with unit tests * Skip BQ Storage version tests if extra missing * Rename and improve _create_bqstorage_client() The method is renamed to _ensure_bqstorage_client() and now performs a check if BQ Storage dependency is recent enough. * Remove BQ Storage check from dbapi.Cursor The check is now performed in dbapi.Connection, which is sufficient. * Remove BQ Storage check in _pandas_helpers The methods in higher layers already do the same check before a BQ Storage client instance is passed to _pandas_helpers._download_table_bqstorage() helper. * Simplify BQ Storage client factory in magics Lean more heavily on client._ensure_bqstorage_client() to de-duplicate logic. * Cover missing code lines with tests --- google/cloud/bigquery/__init__.py | 3 + google/cloud/bigquery/_helpers.py | 30 +++++++++ google/cloud/bigquery/client.py | 57 +++++++++++++++-- google/cloud/bigquery/dbapi/connection.py | 6 +- google/cloud/bigquery/exceptions.py | 21 +++++++ google/cloud/bigquery/magics/magics.py | 11 ++-- google/cloud/bigquery/table.py | 14 ++++- tests/unit/test__helpers.py | 38 ++++++++++++ tests/unit/test_client.py | 76 +++++++++++++++++++++-- tests/unit/test_dbapi_connection.py | 20 +++++- tests/unit/test_dbapi_cursor.py | 12 +++- tests/unit/test_magics.py | 44 ++++++++++++- tests/unit/test_table.py | 61 +++++++++++++++--- 13 files changed, 357 insertions(+), 36 deletions(-) create mode 100644 google/cloud/bigquery/exceptions.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index f609468f5..ec08b2c84 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery import enums from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -152,6 +153,8 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4fe29291d..7602483c2 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -25,6 +25,10 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +import pkg_resources + +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -36,6 +40,32 @@ re.VERBOSE, ) +_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") + + +def _verify_bq_storage_version(): + """Verify that a recent enough version of BigQuery Storage extra is installed. + + The function assumes that google-cloud-bigquery-storage extra is installed, and + should thus be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the constraints + in setup.py, the the calling code can use this helper to verify the version + compatibility at runtime. + """ + from google.cloud import bigquery_storage + + installed_version = pkg_resources.parse_version( + getattr(bigquery_storage, "__version__", "legacy") + ) + + if installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8d0acb867..7ef3795a8 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -50,16 +50,25 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None + from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _verify_bq_storage_version from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -445,15 +454,38 @@ def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: ) return DatasetReference(project, dataset_id) - def _create_bqstorage_client(self): + def _ensure_bqstorage_client( + self, + bqstorage_client: Optional[ + "google.cloud.bigquery_storage.BigQueryReadClient" + ] = None, + client_options: Optional[google.api_core.client_options.ClientOptions] = None, + client_info: Optional[ + "google.api_core.gapic_v1.client_info.ClientInfo" + ] = DEFAULT_BQSTORAGE_CLIENT_INFO, + ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to missing dependencies, raise a - warning and return ``None``. + If a client cannot be created due to a missing or outdated dependency + `google-cloud-bigquery-storage`, raise a warning and return ``None``. + + If the `bqstorage_client` argument is not ``None``, still perform the version + check and return the argument back to the caller if the check passes. If it + fails, raise a warning and return ``None``. + + Args: + bqstorage_client: + An existing BigQuery Storage client instance to check for version + compatibility. If ``None``, a new instance is created and returned. + client_options: + Custom options used with a new BigQuery Storage client instance if one + is created. + client_info: + The client info used with a new BigQuery Storage client instance if one + is created. Returns: - Optional[google.cloud.bigquery_storage.BigQueryReadClient]: - A BigQuery Storage API client. + A BigQuery Storage API client. """ try: from google.cloud import bigquery_storage @@ -464,7 +496,20 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + try: + _verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None + + if bqstorage_client is None: + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=self._credentials, + client_options=client_options, + client_info=client_info, + ) + + return bqstorage_client def _dataset_from_arg(self, dataset): if isinstance(dataset, str): diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 459fc82aa..66dee7dfb 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -47,12 +47,14 @@ def __init__(self, client=None, bqstorage_client=None): else: self._owns_client = False + # A warning is already raised by the BQ Storage client factory factory if + # instantiation fails, or if the given BQ Storage client instance is outdated. if bqstorage_client is None: - # A warning is already raised by the factory if instantiation fails. - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: self._owns_bqstorage_client = False + bqstorage_client = client._ensure_bqstorage_client(bqstorage_client) self._client = client self._bqstorage_client = bqstorage_client diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000..6e5c27eb1 --- /dev/null +++ b/google/cloud/bigquery/exceptions.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 474d9a74a..2b8c2928e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -644,7 +644,7 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - use_bqstorage_api, context.credentials, bqstorage_client_options, + client, use_bqstorage_api, bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -762,12 +762,12 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): +def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage + from google.cloud import bigquery_storage # noqa: F401 except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -785,10 +785,9 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): ) raise customized_error from err - return bigquery_storage.BigQueryReadClient( - credentials=credentials, - client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + return client._ensure_bqstorage_client( client_options=client_options, + client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b91c91a39..b12209252 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -41,6 +41,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1519,6 +1520,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers._verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1655,7 +1667,7 @@ def to_arrow( owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: - bqstorage_client = self.client._create_bqstorage_client() + bqstorage_client = self.client._ensure_bqstorage_client() owns_bqstorage_client = bqstorage_client is not None try: diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2437f3568..0ac76d424 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,6 +19,44 @@ import mock +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") +class Test_verify_bq_storage_version(unittest.TestCase): + def _call_fut(self): + from google.cloud.bigquery._helpers import _verify_bq_storage_version + + return _verify_bq_storage_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: legacy" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8f535145b..1346a1ef6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -822,7 +822,7 @@ def test_get_dataset(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_create_bqstorage_client(self): + def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -832,12 +832,19 @@ def test_create_bqstorage_client(self): with mock.patch( "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client( + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) self.assertIs(bqstorage_client, mock_client_instance) - mock_client.assert_called_once_with(credentials=creds) + mock_client.assert_called_once_with( + credentials=creds, + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) - def test_create_bqstorage_client_missing_dependency(self): + def test_ensure_bqstorage_client_missing_dependency(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -850,7 +857,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self.assertIsNone(bqstorage_client) matching_warnings = [ @@ -861,6 +868,65 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_passes(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=mock_storage_client + ) + + self.assertIs(bqstorage_client, mock_storage_client) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 74da318bf..0576cad38 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -51,7 +51,7 @@ def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) @@ -66,9 +66,15 @@ def test_ctor_w_bqstorage_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = self._make_one( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -92,9 +98,11 @@ def test_connect_w_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect(client=mock_client) + + mock_client._ensure_bqstorage_client.assert_called_once_with() self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -108,9 +116,15 @@ def test_connect_w_both_clients(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = connect( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -140,7 +154,7 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_create_bqstorage_client", return_value=bqstorage_client, + client, "_ensure_bqstorage_client", return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 55e453254..a2d6693d0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -72,7 +72,7 @@ def _mock_client( mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None return mock_client @@ -311,6 +311,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -341,6 +342,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client = self._mock_client(rows=[]) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -365,7 +367,11 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=row_data, ) @@ -396,7 +402,11 @@ def test_fetchall_w_bqstorage_client_no_arrow_compression(self): row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index ff41fe720..5e9bf28a9 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -317,7 +317,10 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, False, {}) assert got is None @@ -328,7 +331,10 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, True, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -336,15 +342,46 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock, {}) + magics._make_bqstorage_client(test_client, True, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg assert "pyarrow" in error_msg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -887,6 +924,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): table_id = "bigquery-public-data.samples.shakespeare" with default_patch, client_patch as client_mock, bqstorage_client_patch: + client_mock()._ensure_bqstorage_client.return_value = bqstorage_instance_mock client_mock().list_rows.return_value = row_iterator_mock ip.run_cell_magic("bigquery", "--max_results=5", table_id) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ce4a15761..0f2ab00c1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -24,6 +24,7 @@ import pytz import google.api_core.exceptions +from test_utils.imports import maybe_fail_import try: from google.cloud import bigquery_storage @@ -1768,6 +1769,48 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2003,7 +2046,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( @@ -2099,7 +2142,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2114,11 +2157,11 @@ def test_to_arrow_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_arrow(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): + def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2133,14 +2176,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) tbl = row_iterator.to_arrow(create_bqstorage_client=True) # The client attempted to create a BQ Storage client, and even though # that was not possible, results were still returned without errors. - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -2824,7 +2867,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( @@ -2839,7 +2882,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2854,7 +2897,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_dataframe(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") From e983bda009e006e7544089ea2ceb15f01831dffb Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 20 May 2021 22:24:04 +0000 Subject: [PATCH 219/341] chore: upgrade gapic-generator-python to 0.46.3 (#664) PiperOrigin-RevId: 373649163 Source-Link: https://github.com/googleapis/googleapis/commit/7e1b14e6c7a9ab96d2db7e4a131981f162446d34 Source-Link: https://github.com/googleapis/googleapis-gen/commit/0a3c7d272d697796db75857bac73905c68e498c3 --- google/cloud/bigquery_v2/__init__.py | 2 - google/cloud/bigquery_v2/gapic_metadata.json | 63 +++ google/cloud/bigquery_v2/types/__init__.py | 2 - .../bigquery_v2/types/encryption_config.py | 10 +- google/cloud/bigquery_v2/types/model.py | 444 ++++++------------ .../bigquery_v2/types/model_reference.py | 11 +- .../cloud/bigquery_v2/types/standard_sql.py | 9 +- .../bigquery_v2/types/table_reference.py | 11 +- owlbot.py | 17 +- tests/__init__.py | 15 + tests/unit/__init__.py | 4 +- tests/unit/gapic/__init__.py | 15 + 12 files changed, 271 insertions(+), 332 deletions(-) create mode 100644 google/cloud/bigquery_v2/gapic_metadata.json create mode 100644 tests/unit/gapic/__init__.py diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index ebcc26bef..476bd5747 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,7 +28,6 @@ from .types.standard_sql import StandardSqlStructType from .types.table_reference import TableReference - __all__ = ( "DeleteModelRequest", "EncryptionConfiguration", diff --git a/google/cloud/bigquery_v2/gapic_metadata.json b/google/cloud/bigquery_v2/gapic_metadata.json new file mode 100644 index 000000000..3251a2630 --- /dev/null +++ b/google/cloud/bigquery_v2/gapic_metadata.json @@ -0,0 +1,63 @@ + { + "comment": "This file maps proto services/RPCs to the corresponding library clients/methods", + "language": "python", + "libraryPackage": "google.cloud.bigquery_v2", + "protoPackage": "google.cloud.bigquery.v2", + "schema": "1.0", + "services": { + "ModelService": { + "clients": { + "grpc": { + "libraryClient": "ModelServiceClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + }, + "grpc-async": { + "libraryClient": "ModelServiceAsyncClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + } + } + } + } +} diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index b76e65c65..9c850dca1 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - from .encryption_config import EncryptionConfiguration from .model import ( DeleteModelRequest, diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py index 2d801bde3..4b9139733 100644 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,11 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -28,7 +25,6 @@ class EncryptionConfiguration(proto.Message): r""" - Attributes: kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption @@ -38,7 +34,9 @@ class EncryptionConfiguration(proto.Message): this encryption key. """ - kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,) + kms_key_name = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 8ae158b64..17e101d25 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,16 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - from google.cloud.bigquery_v2.types import encryption_config from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference from google.cloud.bigquery_v2.types import standard_sql from google.cloud.bigquery_v2.types import table_reference -from google.protobuf import timestamp_pb2 as timestamp # type: ignore -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import timestamp_pb2 # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -41,7 +38,6 @@ class Model(proto.Message): r""" - Attributes: etag (str): Output only. A hash of this resource. @@ -251,7 +247,7 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r"""""" + r""" """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -264,7 +260,7 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r"""""" + r""" """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -293,22 +289,20 @@ class RegressionMetrics(proto.Message): """ mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + r_squared = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - - r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) class AggregateClassificationMetrics(proto.Message): r"""Aggregate metrics for classification/classifier models. For @@ -350,19 +344,25 @@ class AggregateClassificationMetrics(proto.Message): is a macro-averaged metric. """ - precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,) - - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,) - - accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,) - - threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,) - - f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) - - log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,) - - roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) + accuracy = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + threshold = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + f1_score = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + log_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + roc_auc = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + ) class BinaryClassificationMetrics(proto.Message): r"""Evaluation metrics for binary classification/classifier @@ -382,7 +382,6 @@ class BinaryClassificationMetrics(proto.Message): class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. - Attributes: positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of @@ -410,52 +409,43 @@ class BinaryConfusionMatrix(proto.Message): """ positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) - false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) - true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers.Int64Value, + proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, ) - precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, ) - - recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) - f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers.DoubleValue, + proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, ) - accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers.DoubleValue, + proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, ) aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - binary_confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", ) - - positive_label = proto.Field(proto.STRING, number=3) - - negative_label = proto.Field(proto.STRING, number=4) + positive_label = proto.Field(proto.STRING, number=3,) + negative_label = proto.Field(proto.STRING, number=4,) class MultiClassClassificationMetrics(proto.Message): r"""Evaluation metrics for multi-class classification/classifier @@ -470,7 +460,6 @@ class MultiClassClassificationMetrics(proto.Message): class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. - Attributes: confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the @@ -481,7 +470,6 @@ class ConfusionMatrix(proto.Message): class Entry(proto.Message): r"""A single entry in the confusion matrix. - Attributes: predicted_label (str): The predicted label. For confidence_threshold > 0, we will @@ -492,15 +480,13 @@ class Entry(proto.Message): label. """ - predicted_label = proto.Field(proto.STRING, number=1) - + predicted_label = proto.Field(proto.STRING, number=1,) item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) class Row(proto.Message): r"""A single row in the confusion matrix. - Attributes: actual_label (str): The original label of this row. @@ -508,8 +494,7 @@ class Row(proto.Message): Info describing predicted label distribution. """ - actual_label = proto.Field(proto.STRING, number=1) - + actual_label = proto.Field(proto.STRING, number=1,) entries = proto.RepeatedField( proto.MESSAGE, number=2, @@ -517,9 +502,8 @@ class Row(proto.Message): ) confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - rows = proto.RepeatedField( proto.MESSAGE, number=2, @@ -529,7 +513,6 @@ class Row(proto.Message): aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, @@ -538,7 +521,6 @@ class Row(proto.Message): class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. - Attributes: davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. @@ -551,7 +533,6 @@ class ClusteringMetrics(proto.Message): class Cluster(proto.Message): r"""Message containing the information about one cluster. - Attributes: centroid_id (int): Centroid id. @@ -565,7 +546,6 @@ class Cluster(proto.Message): class FeatureValue(proto.Message): r"""Representative value of a single feature within the cluster. - Attributes: feature_column (str): The feature column name. @@ -578,7 +558,6 @@ class FeatureValue(proto.Message): class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. - Attributes: category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If @@ -590,7 +569,6 @@ class CategoricalValue(proto.Message): class CategoryCount(proto.Message): r"""Represents the count of a single category within the cluster. - Attributes: category (str): The name of category. @@ -599,10 +577,9 @@ class CategoryCount(proto.Message): category within the cluster. """ - category = proto.Field(proto.STRING, number=1) - + category = proto.Field(proto.STRING, number=1,) count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) category_counts = proto.RepeatedField( @@ -611,15 +588,13 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", ) - feature_column = proto.Field(proto.STRING, number=1) - + feature_column = proto.Field(proto.STRING, number=1,) numerical_value = proto.Field( proto.MESSAGE, number=2, oneof="value", - message=wrappers.DoubleValue, + message=wrappers_pb2.DoubleValue, ) - categorical_value = proto.Field( proto.MESSAGE, number=3, @@ -627,24 +602,22 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", ) - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) feature_values = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ClusteringMetrics.Cluster.FeatureValue", ) - - count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,) + count = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - clusters = proto.RepeatedField( proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", ) @@ -677,24 +650,20 @@ class RankingMetrics(proto.Message): """ mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. - Attributes: non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. @@ -737,15 +706,11 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=3) - - time_series_id = proto.Field(proto.STRING, number=4) - + has_drift = proto.Field(proto.BOOL, number=3,) + time_series_id = proto.Field(proto.STRING, number=4,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) @@ -753,19 +718,14 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - - has_drift = proto.RepeatedField(proto.BOOL, number=4) - - time_series_id = proto.RepeatedField(proto.STRING, number=5) - + has_drift = proto.RepeatedField(proto.BOOL, number=4,) + time_series_id = proto.RepeatedField(proto.STRING, number=5,) arima_single_model_forecasting_metrics = proto.RepeatedField( proto.MESSAGE, number=6, @@ -800,29 +760,24 @@ class EvaluationMetrics(proto.Message): regression_metrics = proto.Field( proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", ) - binary_classification_metrics = proto.Field( proto.MESSAGE, number=2, oneof="metrics", message="Model.BinaryClassificationMetrics", ) - multi_class_classification_metrics = proto.Field( proto.MESSAGE, number=3, oneof="metrics", message="Model.MultiClassClassificationMetrics", ) - clustering_metrics = proto.Field( proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", ) - ranking_metrics = proto.Field( proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", ) - arima_forecasting_metrics = proto.Field( proto.MESSAGE, number=6, @@ -846,7 +801,6 @@ class DataSplitResult(proto.Message): training_table = proto.Field( proto.MESSAGE, number=1, message=table_reference.TableReference, ) - evaluation_table = proto.Field( proto.MESSAGE, number=2, message=table_reference.TableReference, ) @@ -864,15 +818,12 @@ class ArimaOrder(proto.Message): Order of the moving-average part. """ - p = proto.Field(proto.INT64, number=1) - - d = proto.Field(proto.INT64, number=2) - - q = proto.Field(proto.INT64, number=3) + p = proto.Field(proto.INT64, number=1,) + d = proto.Field(proto.INT64, number=2,) + q = proto.Field(proto.INT64, number=3,) class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. - Attributes: log_likelihood (float): Log-likelihood. @@ -882,11 +833,9 @@ class ArimaFittingMetrics(proto.Message): Variance. """ - log_likelihood = proto.Field(proto.DOUBLE, number=1) - - aic = proto.Field(proto.DOUBLE, number=2) - - variance = proto.Field(proto.DOUBLE, number=3) + log_likelihood = proto.Field(proto.DOUBLE, number=1,) + aic = proto.Field(proto.DOUBLE, number=2,) + variance = proto.Field(proto.DOUBLE, number=3,) class GlobalExplanation(proto.Message): r"""Global explanations containing the top most important @@ -906,7 +855,6 @@ class GlobalExplanation(proto.Message): class Explanation(proto.Message): r"""Explanation for a single feature. - Attributes: feature_name (str): Full name of the feature. For non-numerical features, will @@ -917,21 +865,18 @@ class Explanation(proto.Message): Attribution of feature. """ - feature_name = proto.Field(proto.STRING, number=1) - + feature_name = proto.Field(proto.STRING, number=1,) attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) explanations = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", ) - - class_label = proto.Field(proto.STRING, number=2) + class_label = proto.Field(proto.STRING, number=2,) class TrainingRun(proto.Message): r"""Information about a single training query run for the model. - Attributes: training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, @@ -957,7 +902,6 @@ class TrainingRun(proto.Message): class TrainingOptions(proto.Message): r""" - Attributes: max_iterations (int): The maximum number of iterations in training. @@ -1100,131 +1044,88 @@ class TrainingOptions(proto.Message): The max value of non-seasonal p and q. """ - max_iterations = proto.Field(proto.INT64, number=1) - + max_iterations = proto.Field(proto.INT64, number=1,) loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - - learn_rate = proto.Field(proto.DOUBLE, number=3) - + learn_rate = proto.Field(proto.DOUBLE, number=3,) l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) - l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers.BoolValue, + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, ) - early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers.BoolValue, + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, ) - - input_label_columns = proto.RepeatedField(proto.STRING, number=9) - + input_label_columns = proto.RepeatedField(proto.STRING, number=9,) data_split_method = proto.Field( proto.ENUM, number=10, enum="Model.DataSplitMethod", ) - - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11) - - data_split_column = proto.Field(proto.STRING, number=12) - + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) + data_split_column = proto.Field(proto.STRING, number=12,) learn_rate_strategy = proto.Field( proto.ENUM, number=13, enum="Model.LearnRateStrategy", ) - - initial_learn_rate = proto.Field(proto.DOUBLE, number=16) - - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) - - user_column = proto.Field(proto.STRING, number=18) - - item_column = proto.Field(proto.STRING, number=19) - + initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) + user_column = proto.Field(proto.STRING, number=18,) + item_column = proto.Field(proto.STRING, number=19,) distance_type = proto.Field( proto.ENUM, number=20, enum="Model.DistanceType", ) - - num_clusters = proto.Field(proto.INT64, number=21) - - model_uri = proto.Field(proto.STRING, number=22) - + num_clusters = proto.Field(proto.INT64, number=21,) + model_uri = proto.Field(proto.STRING, number=22,) optimization_strategy = proto.Field( proto.ENUM, number=23, enum="Model.OptimizationStrategy", ) - - hidden_units = proto.RepeatedField(proto.INT64, number=24) - - batch_size = proto.Field(proto.INT64, number=25) - + hidden_units = proto.RepeatedField(proto.INT64, number=24,) + batch_size = proto.Field(proto.INT64, number=25,) dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers.DoubleValue, + proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, ) - - max_tree_depth = proto.Field(proto.INT64, number=27) - - subsample = proto.Field(proto.DOUBLE, number=28) - + max_tree_depth = proto.Field(proto.INT64, number=27,) + subsample = proto.Field(proto.DOUBLE, number=28,) min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers.DoubleValue, + proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, ) - - num_factors = proto.Field(proto.INT64, number=30) - + num_factors = proto.Field(proto.INT64, number=30,) feedback_type = proto.Field( proto.ENUM, number=31, enum="Model.FeedbackType", ) - wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers.DoubleValue, + proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, ) - kmeans_initialization_method = proto.Field( proto.ENUM, number=33, enum="Model.KmeansEnums.KmeansInitializationMethod", ) - - kmeans_initialization_column = proto.Field(proto.STRING, number=34) - - time_series_timestamp_column = proto.Field(proto.STRING, number=35) - - time_series_data_column = proto.Field(proto.STRING, number=36) - - auto_arima = proto.Field(proto.BOOL, number=37) - + kmeans_initialization_column = proto.Field(proto.STRING, number=34,) + time_series_timestamp_column = proto.Field(proto.STRING, number=35,) + time_series_data_column = proto.Field(proto.STRING, number=36,) + auto_arima = proto.Field(proto.BOOL, number=37,) non_seasonal_order = proto.Field( proto.MESSAGE, number=38, message="Model.ArimaOrder", ) - data_frequency = proto.Field( proto.ENUM, number=39, enum="Model.DataFrequency", ) - - include_drift = proto.Field(proto.BOOL, number=41) - + include_drift = proto.Field(proto.BOOL, number=41,) holiday_region = proto.Field( proto.ENUM, number=42, enum="Model.HolidayRegion", ) - - time_series_id_column = proto.Field(proto.STRING, number=43) - - horizon = proto.Field(proto.INT64, number=44) - - preserve_input_structs = proto.Field(proto.BOOL, number=45) - - auto_arima_max_order = proto.Field(proto.INT64, number=46) + time_series_id_column = proto.Field(proto.STRING, number=43,) + horizon = proto.Field(proto.INT64, number=44,) + preserve_input_structs = proto.Field(proto.BOOL, number=45,) + auto_arima_max_order = proto.Field(proto.INT64, number=46,) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. - Attributes: index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. @@ -1248,7 +1149,6 @@ class IterationResult(proto.Message): class ClusterInfo(proto.Message): r"""Information about a single cluster for clustering model. - Attributes: centroid_id (int): Centroid id. @@ -1260,14 +1160,12 @@ class ClusterInfo(proto.Message): assigned to the cluster. """ - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) class ArimaResult(proto.Message): @@ -1287,7 +1185,6 @@ class ArimaResult(proto.Message): class ArimaCoefficients(proto.Message): r"""Arima coefficients. - Attributes: auto_regressive_coefficients (Sequence[float]): Auto-regressive coefficients, an array of @@ -1301,18 +1198,15 @@ class ArimaCoefficients(proto.Message): """ auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1 + proto.DOUBLE, number=1, ) - moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2 + proto.DOUBLE, number=2, ) - - intercept_coefficient = proto.Field(proto.DOUBLE, number=3) + intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) class ArimaModelInfo(proto.Message): r"""Arima model information. - Attributes: non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. @@ -1333,21 +1227,16 @@ class ArimaModelInfo(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_coefficients = proto.Field( proto.MESSAGE, number=2, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=4) - - time_series_id = proto.Field(proto.STRING, number=5) - + has_drift = proto.Field(proto.BOOL, number=4,) + time_series_id = proto.Field(proto.STRING, number=5,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, @@ -1359,35 +1248,30 @@ class ArimaModelInfo(proto.Message): number=1, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=2, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) - + index = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + ) duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - - learn_rate = proto.Field(proto.DOUBLE, number=7) - + learn_rate = proto.Field(proto.DOUBLE, number=7,) cluster_infos = proto.RepeatedField( proto.MESSAGE, number=8, message="Model.TrainingRun.IterationResult.ClusterInfo", ) - arima_result = proto.Field( proto.MESSAGE, number=9, @@ -1397,57 +1281,41 @@ class ArimaModelInfo(proto.Message): training_options = proto.Field( proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", ) - - start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,) - + start_time = proto.Field( + proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + ) results = proto.RepeatedField( proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", ) - evaluation_metrics = proto.Field( proto.MESSAGE, number=7, message="Model.EvaluationMetrics", ) - data_split_result = proto.Field( proto.MESSAGE, number=9, message="Model.DataSplitResult", ) - global_explanations = proto.RepeatedField( proto.MESSAGE, number=10, message="Model.GlobalExplanation", ) - etag = proto.Field(proto.STRING, number=1) - + etag = proto.Field(proto.STRING, number=1,) model_reference = proto.Field( proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, ) - - creation_time = proto.Field(proto.INT64, number=5) - - last_modified_time = proto.Field(proto.INT64, number=6) - - description = proto.Field(proto.STRING, number=12) - - friendly_name = proto.Field(proto.STRING, number=14) - - labels = proto.MapField(proto.STRING, proto.STRING, number=15) - - expiration_time = proto.Field(proto.INT64, number=16) - - location = proto.Field(proto.STRING, number=13) - + creation_time = proto.Field(proto.INT64, number=5,) + last_modified_time = proto.Field(proto.INT64, number=6,) + description = proto.Field(proto.STRING, number=12,) + friendly_name = proto.Field(proto.STRING, number=14,) + labels = proto.MapField(proto.STRING, proto.STRING, number=15,) + expiration_time = proto.Field(proto.INT64, number=16,) + location = proto.Field(proto.STRING, number=13,) encryption_configuration = proto.Field( proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) - feature_columns = proto.RepeatedField( proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, ) - label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) @@ -1455,7 +1323,6 @@ class ArimaModelInfo(proto.Message): class GetModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the requested model. @@ -1465,16 +1332,13 @@ class GetModelRequest(proto.Message): Required. Model ID of the requested model. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class PatchModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to patch. @@ -1489,18 +1353,14 @@ class PatchModelRequest(proto.Message): set to default value. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) - + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) model = proto.Field(proto.MESSAGE, number=4, message="Model",) class DeleteModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to delete. @@ -1510,16 +1370,13 @@ class DeleteModelRequest(proto.Message): Required. Model ID of the model to delete. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class ListModelsRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the models to list. @@ -1534,18 +1391,16 @@ class ListModelsRequest(proto.Message): request the next page of results """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,) - - page_token = proto.Field(proto.STRING, number=4) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + max_results = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field(proto.STRING, number=4,) class ListModelsResponse(proto.Message): r""" - Attributes: models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields @@ -1560,8 +1415,7 @@ def raw_page(self): return self models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - - next_page_token = proto.Field(proto.STRING, number=2) + next_page_token = proto.Field(proto.STRING, number=2,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py index e3891d6c1..a9ebad613 100644 --- a/google/cloud/bigquery_v2/types/model_reference.py +++ b/google/cloud/bigquery_v2/types/model_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class ModelReference(proto.Message): r"""Id path of a model. - Attributes: project_id (str): Required. The ID of the project containing @@ -39,11 +36,9 @@ class ModelReference(proto.Message): maximum length is 1,024 characters. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 3bc6afedc..b2191a417 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -63,11 +61,9 @@ class TypeKind(proto.Enum): STRUCT = 17 type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) - array_element_type = proto.Field( proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", ) - struct_type = proto.Field( proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", ) @@ -75,7 +71,6 @@ class TypeKind(proto.Enum): class StandardSqlField(proto.Message): r"""A field or a column. - Attributes: name (str): Optional. The name of this field. Can be @@ -88,14 +83,12 @@ class StandardSqlField(proto.Message): this "type" field). """ - name = proto.Field(proto.STRING, number=1) - + name = proto.Field(proto.STRING, number=1,) type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) class StandardSqlStructType(proto.Message): r""" - Attributes: fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index d213e8bb6..a0a8ee4c9 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class TableReference(proto.Message): r""" - Attributes: project_id (str): Required. The ID of the project containing @@ -41,11 +38,9 @@ class TableReference(proto.Message): as ``sample_table$20190123``. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - table_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + table_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index f45c24fbb..476c5ee5d 100644 --- a/owlbot.py +++ b/owlbot.py @@ -24,19 +24,32 @@ default_version = "v2" for library in s.get_staging_dirs(default_version): - # Do not expose ModelServiceClient, as there is no public API endpoint for the - # models service. + # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there + # is no public API endpoint for the models service. s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"from \.services\.model_service import ModelServiceClient", "", ) + + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceAsyncClient", + "", + ) + s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"""["']ModelServiceClient["'],""", "", ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceAsyncClient["'],""", + "", + ) + # Adjust Model docstring so that Sphinx does not think that "predicted_" is # a reference to something, issuing a false warning. s.replace( diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb..4de65971c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index df379f1e9..4de65971c 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,4 +1,5 @@ -# Copyright 2016 Google LLC +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# diff --git a/tests/unit/gapic/__init__.py b/tests/unit/gapic/__init__.py new file mode 100644 index 000000000..4de65971c --- /dev/null +++ b/tests/unit/gapic/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# From 49557da28b3c1fe2210f7d9e44235ef2d2eeb564 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 21 May 2021 00:26:03 +0200 Subject: [PATCH 220/341] chore(deps): update dependency grpcio to v1.38.0 (#674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.37.1` -> `==1.38.0` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/compatibility-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/confidence-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2dfee39b5..fcb481f48 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.1 +grpcio==1.38.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 591cdd851bb1321b048a05a378a0ef48d3ade462 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 21 May 2021 11:03:35 +0200 Subject: [PATCH 221/341] fix(tests): invalid path to strptime() (#672) --- samples/snippets/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 0d0299ee5..cb11eb68f 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -33,7 +33,7 @@ def resource_prefix() -> str: def resource_name_to_date(resource_name: str): start_date = len(RESOURCE_PREFIX) + 1 date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) @pytest.fixture(scope="session", autouse=True) From 45421e73bfcddb244822e6a5cd43be6bd1ca2256 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 10:50:55 -0600 Subject: [PATCH 222/341] feat: Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types (#673) * parse parameterized schema info * Fixed SchemaField repr/key * Fix code duplication between _parse_schema_resource and from_api_repr Move new parameterized-type code from _parse_schema_resource to from_api_repr and implement _parse_schema_resource in terms of from_api_repr. * empty schemas are lists now, just like non-empty schemas. * changed new parameterized-type tests to use from_api_repr Because that's more direct and it uncovered duplicate code. * paramaterized the from_api_repr tests and added to_api_repr tests * Test BYTES and _key (repr) too. * Added a round-trip parameterized types schema tests * handle BYTES in _key/repr * blacken * Move _get_int close to use * Updated documentation. * Oops, forgot BIGNUMERIC * Improve argument doc and better argument name to __get_int * doom tables before creating them. * Use max_length in the Python for the REST api maxLength --- google/cloud/bigquery/schema.py | 71 +++++++++++++----- tests/system/test_client.py | 29 ++++++++ tests/unit/test_query.py | 4 +- tests/unit/test_schema.py | 123 ++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index cb221d6de..919d78b23 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -67,6 +67,15 @@ class SchemaField(object): policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + precision (Optional[int]): + Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. + + scale (Optional[int]): + Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. + + max_length (Optional[int]): + Maximim length of fields with STRING or BYTES type. + """ def __init__( @@ -77,6 +86,9 @@ def __init__( description=_DEFAULT_VALUE, fields=(), policy_tags=None, + precision=_DEFAULT_VALUE, + scale=_DEFAULT_VALUE, + max_length=_DEFAULT_VALUE, ): self._properties = { "name": name, @@ -86,9 +98,22 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if precision is not _DEFAULT_VALUE: + self._properties["precision"] = precision + if scale is not _DEFAULT_VALUE: + self._properties["scale"] = scale + if max_length is not _DEFAULT_VALUE: + self._properties["maxLength"] = max_length self._fields = tuple(fields) self._policy_tags = policy_tags + @staticmethod + def __get_int(api_repr, name): + v = api_repr.get(name, _DEFAULT_VALUE) + if v is not _DEFAULT_VALUE: + v = int(v) + return v + @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. @@ -113,6 +138,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": description=description, name=api_repr["name"], policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + precision=cls.__get_int(api_repr, "precision"), + scale=cls.__get_int(api_repr, "scale"), + max_length=cls.__get_int(api_repr, "maxLength"), ) @property @@ -148,6 +176,21 @@ def description(self): """Optional[str]: description for the field.""" return self._properties.get("description") + @property + def precision(self): + """Optional[int]: Precision (number of digits) for the NUMERIC field.""" + return self._properties.get("precision") + + @property + def scale(self): + """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" + return self._properties.get("scale") + + @property + def max_length(self): + """Optional[int]: Maximum length for the STRING or BYTES field.""" + return self._properties.get("maxLength") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -191,9 +234,19 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ + field_type = self.field_type.upper() + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" return ( self.name, - self.field_type.upper(), + field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error self.description, @@ -269,21 +322,7 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ - if "fields" not in info: - return () - - schema = [] - for r_field in info["fields"]: - name = r_field["name"] - field_type = r_field["type"] - mode = r_field.get("mode", "NULLABLE") - description = r_field.get("description") - sub_fields = _parse_schema_resource(r_field) - policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields, policy_tags) - ) - return schema + return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] def _build_schema_resource(fields): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7c8ef50fa..b4b0c053d 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2173,6 +2173,35 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) + def test_parameterized_types_round_trip(self): + client = Config.CLIENT + table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) + client.query( + "create table {} ({})".format( + table_id, ", ".join(" ".join(f) for f in fields) + ) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 90fc30b20..9483fe8dd 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1302,7 +1302,7 @@ def _verifySchema(self, query, resource): self.assertEqual(found.description, expected.get("description")) self.assertEqual(found.fields, expected.get("fields", ())) else: - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) def test_ctor_defaults(self): query = self._make_one(self._make_resource()) @@ -1312,7 +1312,7 @@ def test_ctor_defaults(self): self.assertIsNone(query.page_token) self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 87baaf379..29c3bace5 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -15,6 +15,7 @@ import unittest import mock +import pytest class TestSchemaField(unittest.TestCase): @@ -715,3 +716,125 @@ def test___hash__not_equals(self): set_one = {policy1} set_two = {policy2} self.assertNotEqual(set_one, set_two) + + +@pytest.mark.parametrize( + "api,expect,key2", + [ + ( + dict(name="n", type="NUMERIC"), + ("n", "NUMERIC", None, None, None), + ("n", "NUMERIC"), + ), + ( + dict(name="n", type="NUMERIC", precision=9), + ("n", "NUMERIC", 9, None, None), + ("n", "NUMERIC(9)"), + ), + ( + dict(name="n", type="NUMERIC", precision=9, scale=2), + ("n", "NUMERIC", 9, 2, None), + ("n", "NUMERIC(9, 2)"), + ), + ( + dict(name="n", type="BIGNUMERIC"), + ("n", "BIGNUMERIC", None, None, None), + ("n", "BIGNUMERIC"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40), + ("n", "BIGNUMERIC", 40, None, None), + ("n", "BIGNUMERIC(40)"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40, scale=2), + ("n", "BIGNUMERIC", 40, 2, None), + ("n", "BIGNUMERIC(40, 2)"), + ), + ( + dict(name="n", type="STRING"), + ("n", "STRING", None, None, None), + ("n", "STRING"), + ), + ( + dict(name="n", type="STRING", maxLength=9), + ("n", "STRING", None, None, 9), + ("n", "STRING(9)"), + ), + ( + dict(name="n", type="BYTES"), + ("n", "BYTES", None, None, None), + ("n", "BYTES"), + ), + ( + dict(name="n", type="BYTES", maxLength=9), + ("n", "BYTES", None, None, 9), + ("n", "BYTES(9)"), + ), + ], +) +def test_from_api_repr_parameterized(api, expect, key2): + from google.cloud.bigquery.schema import SchemaField + + field = SchemaField.from_api_repr(api) + + assert ( + field.name, + field.field_type, + field.precision, + field.scale, + field.max_length, + ) == expect + + assert field._key()[:2] == key2 + + +@pytest.mark.parametrize( + "field,api", + [ + ( + dict(name="n", field_type="NUMERIC"), + dict(name="n", type="NUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9, scale=2), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + ), + ( + dict(name="n", field_type="BIGNUMERIC"), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + ), + ( + dict(name="n", field_type="STRING"), + dict(name="n", type="STRING", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="STRING", max_length=9), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + ), + ( + dict(name="n", field_type="BYTES"), + dict(name="n", type="BYTES", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BYTES", max_length=9), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + ), + ], +) +def test_to_api_repr_parameterized(field, api): + from google.cloud.bigquery.schema import SchemaField + + assert SchemaField(**field).to_api_repr() == api From 168e699dc091521edbd769825ede3b8c3ec9d2d9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 21 May 2021 11:37:20 -0600 Subject: [PATCH 223/341] chore: release 2.17.0 (#670) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef184dffb..2a0227118 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) + + +### Features + +* detect obsolete BQ Storage extra at runtime ([#666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) +* Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) + + +### Bug Fixes + +* **tests:** invalid path to strptime() ([#672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) + ### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 61e0c0a83..422b383cc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.1" +__version__ = "2.17.0" From eed50b0facee6b71d8334fc70279286d5a9400da Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 22 May 2021 08:42:02 +0200 Subject: [PATCH 224/341] chore(deps): update dependency google-cloud-bigquery to v2.17.0 (#675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.16.1` -> `==2.17.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/compatibility-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/confidence-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.17.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2170-httpswwwgithubcomgoogleapispython-bigquerycomparev2161v2170-2021-05-21) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) ##### Features - detect obsolete BQ Storage extra at runtime ([#​666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) - Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#​673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) ##### Bug Fixes - **tests:** invalid path to strptime() ([#​672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) ##### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) ##### Bug Fixes - executemany rowcount only reflected the last execution ([#​660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e494fbaae..9fbe85970 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fcb481f48..2ed5b0df8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From fcda42a72f999d9aa70a5a9f0296027e2868d17a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 22 May 2021 09:18:05 +0000 Subject: [PATCH 225/341] chore: new owl bot post processor docker image (#677) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 9 ++++++++- samples/snippets/noxfile.py | 9 ++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 864c17653..46e3f021c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa + digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index be1a3f251..160fe7286 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index be1a3f251..160fe7286 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) From 27e4d34a257e2110a3928931c7ea58b2607e85d0 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 24 May 2021 02:16:36 -0400 Subject: [PATCH 226/341] chore: delete unused protos (#676) --- .../bigquery_v2/proto/encryption_config.proto | 32 - .../proto/encryption_config_pb2.py | 104 - .../bigquery_v2/proto/location_metadata.proto | 34 - google/cloud/bigquery_v2/proto/model.proto | 1208 ----- google/cloud/bigquery_v2/proto/model_pb2.py | 4298 ----------------- .../bigquery_v2/proto/model_reference.proto | 38 - .../bigquery_v2/proto/model_reference_pb2.py | 142 - .../bigquery_v2/proto/standard_sql.proto | 112 - .../bigquery_v2/proto/standard_sql_pb2.py | 442 -- .../bigquery_v2/proto/table_reference.proto | 39 - 10 files changed, 6449 deletions(-) delete mode 100644 google/cloud/bigquery_v2/proto/encryption_config.proto delete mode 100644 google/cloud/bigquery_v2/proto/encryption_config_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/location_metadata.proto delete mode 100644 google/cloud/bigquery_v2/proto/model.proto delete mode 100644 google/cloud/bigquery_v2/proto/model_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/model_reference.proto delete mode 100644 google/cloud/bigquery_v2/proto/model_reference_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/standard_sql.proto delete mode 100644 google/cloud/bigquery_v2/proto/standard_sql_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/table_reference.proto diff --git a/google/cloud/bigquery_v2/proto/encryption_config.proto b/google/cloud/bigquery_v2/proto/encryption_config.proto deleted file mode 100644 index 1c0512a17..000000000 --- a/google/cloud/bigquery_v2/proto/encryption_config.proto +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "EncryptionConfigProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message EncryptionConfiguration { - // Optional. Describes the Cloud KMS encryption key that will be used to - // protect destination BigQuery table. The BigQuery Service Account associated - // with your project requires access to this encryption key. - google.protobuf.StringValue kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; -} diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py deleted file mode 100644 index 5ae21ea6f..000000000 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/encryption_config.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_ENCRYPTIONCONFIGURATION = _descriptor.Descriptor( - name="EncryptionConfiguration", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="kms_key_name", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration.kms_key_name", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=179, - serialized_end=261, -) - -_ENCRYPTIONCONFIGURATION.fields_by_name[ - "kms_key_name" -].message_type = google_dot_protobuf_dot_wrappers__pb2._STRINGVALUE -DESCRIPTOR.message_types_by_name["EncryptionConfiguration"] = _ENCRYPTIONCONFIGURATION -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -EncryptionConfiguration = _reflection.GeneratedProtocolMessageType( - "EncryptionConfiguration", - (_message.Message,), - { - "DESCRIPTOR": _ENCRYPTIONCONFIGURATION, - "__module__": "google.cloud.bigquery_v2.proto.encryption_config_pb2", - "__doc__": """Encryption configuration. - - Attributes: - kms_key_name: - Optional. Describes the Cloud KMS encryption key that will be - used to protect destination BigQuery table. The BigQuery - Service Account associated with your project requires access - to this encryption key. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration) - }, -) -_sym_db.RegisterMessage(EncryptionConfiguration) - - -DESCRIPTOR._options = None -_ENCRYPTIONCONFIGURATION.fields_by_name["kms_key_name"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/location_metadata.proto b/google/cloud/bigquery_v2/proto/location_metadata.proto deleted file mode 100644 index 95a3133c5..000000000 --- a/google/cloud/bigquery_v2/proto/location_metadata.proto +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "LocationMetadataProto"; -option java_package = "com.google.cloud.bigquery.v2"; - - -// BigQuery-specific metadata about a location. This will be set on -// google.cloud.location.Location.metadata in Cloud Location API -// responses. -message LocationMetadata { - // The legacy BigQuery location ID, e.g. “EU” for the “europe” location. - // This is for any API consumers that need the legacy “US” and “EU” locations. - string legacy_location_id = 1; -} diff --git a/google/cloud/bigquery_v2/proto/model.proto b/google/cloud/bigquery_v2/proto/model.proto deleted file mode 100644 index 2d400dddd..000000000 --- a/google/cloud/bigquery_v2/proto/model.proto +++ /dev/null @@ -1,1208 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/cloud/bigquery/v2/encryption_config.proto"; -import "google/cloud/bigquery/v2/model_reference.proto"; -import "google/cloud/bigquery/v2/standard_sql.proto"; -import "google/cloud/bigquery/v2/table_reference.proto"; -import "google/protobuf/empty.proto"; -import "google/protobuf/timestamp.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -service ModelService { - option (google.api.default_host) = "bigquery.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/bigquery," - "https://www.googleapis.com/auth/bigquery.readonly," - "https://www.googleapis.com/auth/cloud-platform," - "https://www.googleapis.com/auth/cloud-platform.read-only"; - - // Gets the specified model resource by model ID. - rpc GetModel(GetModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } - - // Lists all models in the specified dataset. Requires the READER dataset - // role. - rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { - option (google.api.method_signature) = "project_id,dataset_id,max_results"; - } - - // Patch specific fields in the specified model. - rpc PatchModel(PatchModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id,model"; - } - - // Deletes the model specified by modelId from the dataset. - rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } -} - -message Model { - message SeasonalPeriod { - enum SeasonalPeriodType { - SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; - - // No seasonality - NO_SEASONALITY = 1; - - // Daily period, 24 hours. - DAILY = 2; - - // Weekly period, 7 days. - WEEKLY = 3; - - // Monthly period, 30 days or irregular. - MONTHLY = 4; - - // Quarterly period, 90 days or irregular. - QUARTERLY = 5; - - // Yearly period, 365 days or irregular. - YEARLY = 6; - } - - - } - - message KmeansEnums { - // Indicates the method used to initialize the centroids for KMeans - // clustering algorithm. - enum KmeansInitializationMethod { - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0; - - // Initializes the centroids randomly. - RANDOM = 1; - - // Initializes the centroids using data specified in - // kmeans_initialization_column. - CUSTOM = 2; - - // Initializes with kmeans++. - KMEANS_PLUS_PLUS = 3; - } - - - } - - // Evaluation metrics for regression and explicit feedback type matrix - // factorization models. - message RegressionMetrics { - // Mean absolute error. - google.protobuf.DoubleValue mean_absolute_error = 1; - - // Mean squared error. - google.protobuf.DoubleValue mean_squared_error = 2; - - // Mean squared log error. - google.protobuf.DoubleValue mean_squared_log_error = 3; - - // Median absolute error. - google.protobuf.DoubleValue median_absolute_error = 4; - - // R^2 score. - google.protobuf.DoubleValue r_squared = 5; - } - - // Aggregate metrics for classification/classifier models. For multi-class - // models, the metrics are either macro-averaged or micro-averaged. When - // macro-averaged, the metrics are calculated for each label and then an - // unweighted average is taken of those values. When micro-averaged, the - // metric is calculated globally by counting the total number of correctly - // predicted rows. - message AggregateClassificationMetrics { - // Precision is the fraction of actual positive predictions that had - // positive actual labels. For multiclass this is a macro-averaged - // metric treating each class as a binary classifier. - google.protobuf.DoubleValue precision = 1; - - // Recall is the fraction of actual positive labels that were given a - // positive prediction. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue recall = 2; - - // Accuracy is the fraction of predictions given the correct label. For - // multiclass this is a micro-averaged metric. - google.protobuf.DoubleValue accuracy = 3; - - // Threshold at which the metrics are computed. For binary - // classification models this is the positive class threshold. - // For multi-class classfication models this is the confidence - // threshold. - google.protobuf.DoubleValue threshold = 4; - - // The F1 score is an average of recall and precision. For multiclass - // this is a macro-averaged metric. - google.protobuf.DoubleValue f1_score = 5; - - // Logarithmic Loss. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue log_loss = 6; - - // Area Under a ROC Curve. For multiclass this is a macro-averaged - // metric. - google.protobuf.DoubleValue roc_auc = 7; - } - - // Evaluation metrics for binary classification/classifier models. - message BinaryClassificationMetrics { - // Confusion matrix for binary classification models. - message BinaryConfusionMatrix { - // Threshold value used when computing each of the following metric. - google.protobuf.DoubleValue positive_class_threshold = 1; - - // Number of true samples predicted as true. - google.protobuf.Int64Value true_positives = 2; - - // Number of false samples predicted as true. - google.protobuf.Int64Value false_positives = 3; - - // Number of true samples predicted as false. - google.protobuf.Int64Value true_negatives = 4; - - // Number of false samples predicted as false. - google.protobuf.Int64Value false_negatives = 5; - - // The fraction of actual positive predictions that had positive actual - // labels. - google.protobuf.DoubleValue precision = 6; - - // The fraction of actual positive labels that were given a positive - // prediction. - google.protobuf.DoubleValue recall = 7; - - // The equally weighted average of recall and precision. - google.protobuf.DoubleValue f1_score = 8; - - // The fraction of predictions given the correct label. - google.protobuf.DoubleValue accuracy = 9; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Binary confusion matrix at multiple thresholds. - repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; - - // Label representing the positive class. - string positive_label = 3; - - // Label representing the negative class. - string negative_label = 4; - } - - // Evaluation metrics for multi-class classification/classifier models. - message MultiClassClassificationMetrics { - // Confusion matrix for multi-class classification models. - message ConfusionMatrix { - // A single entry in the confusion matrix. - message Entry { - // The predicted label. For confidence_threshold > 0, we will - // also add an entry indicating the number of items under the - // confidence threshold. - string predicted_label = 1; - - // Number of items being predicted as this label. - google.protobuf.Int64Value item_count = 2; - } - - // A single row in the confusion matrix. - message Row { - // The original label of this row. - string actual_label = 1; - - // Info describing predicted label distribution. - repeated Entry entries = 2; - } - - // Confidence threshold used when computing the entries of the - // confusion matrix. - google.protobuf.DoubleValue confidence_threshold = 1; - - // One row per actual label. - repeated Row rows = 2; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Confusion matrix at different thresholds. - repeated ConfusionMatrix confusion_matrix_list = 2; - } - - // Evaluation metrics for clustering models. - message ClusteringMetrics { - // Message containing the information about one cluster. - message Cluster { - // Representative value of a single feature within the cluster. - message FeatureValue { - // Representative value of a categorical feature. - message CategoricalValue { - // Represents the count of a single category within the cluster. - message CategoryCount { - // The name of category. - string category = 1; - - // The count of training samples matching the category within the - // cluster. - google.protobuf.Int64Value count = 2; - } - - // Counts of all categories for the categorical feature. If there are - // more than ten categories, we return top ten (by count) and return - // one more CategoryCount with category "_OTHER_" and count as - // aggregate counts of remaining categories. - repeated CategoryCount category_counts = 1; - } - - // The feature column name. - string feature_column = 1; - - oneof value { - // The numerical feature value. This is the centroid value for this - // feature. - google.protobuf.DoubleValue numerical_value = 2; - - // The categorical feature value. - CategoricalValue categorical_value = 3; - } - } - - // Centroid id. - int64 centroid_id = 1; - - // Values of highly variant features for this cluster. - repeated FeatureValue feature_values = 2; - - // Count of training data rows that were assigned to this cluster. - google.protobuf.Int64Value count = 3; - } - - // Davies-Bouldin index. - google.protobuf.DoubleValue davies_bouldin_index = 1; - - // Mean of squared distances between each sample to its cluster centroid. - google.protobuf.DoubleValue mean_squared_distance = 2; - - // [Beta] Information for all clusters. - repeated Cluster clusters = 3; - } - - // Evaluation metrics used by weighted-ALS models specified by - // feedback_type=implicit. - message RankingMetrics { - // Calculates a precision per user for all the items by ranking them and - // then averages all the precisions across all the users. - google.protobuf.DoubleValue mean_average_precision = 1; - - // Similar to the mean squared error computed in regression and explicit - // recommendation models except instead of computing the rating directly, - // the output from evaluate is computed against a preference which is 1 or 0 - // depending on if the rating exists or not. - google.protobuf.DoubleValue mean_squared_error = 2; - - // A metric to determine the goodness of a ranking calculated from the - // predicted confidence by comparing it to an ideal rank measured by the - // original ratings. - google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; - - // Determines the goodness of a ranking by computing the percentile rank - // from the predicted confidence and dividing it by the original rank. - google.protobuf.DoubleValue average_rank = 4; - } - - // Model evaluation metrics for ARIMA forecasting models. - message ArimaForecastingMetrics { - // Model evaluation metrics for a single ARIMA forecasting model. - message ArimaSingleModelForecastingMetrics { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 2; - - // Is arima model fitted with drift or not. It is always false when d - // is not 1. - bool has_drift = 3; - - // The id to indicate different time series. - string time_series_id = 4; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; - } - - // Non-seasonal order. - repeated ArimaOrder non_seasonal_order = 1; - - // Arima model fitting metrics. - repeated ArimaFittingMetrics arima_fitting_metrics = 2; - - // Seasonal periods. Repeated because multiple periods are supported for one - // time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3; - - // Whether Arima model fitted with drift or not. It is always false when d - // is not 1. - repeated bool has_drift = 4; - - // Id to differentiate different time series for the large-scale case. - repeated string time_series_id = 5; - - // Repeated as there can be many metric sets (one for each model) in - // auto-arima and the large-scale case. - repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; - } - - // Evaluation metrics of a model. These are either computed on all training - // data or just the eval data based on whether eval data was used during - // training. These are not present for imported models. - message EvaluationMetrics { - oneof metrics { - // Populated for regression models and explicit feedback type matrix - // factorization models. - RegressionMetrics regression_metrics = 1; - - // Populated for binary classification/classifier models. - BinaryClassificationMetrics binary_classification_metrics = 2; - - // Populated for multi-class classification/classifier models. - MultiClassClassificationMetrics multi_class_classification_metrics = 3; - - // Populated for clustering models. - ClusteringMetrics clustering_metrics = 4; - - // Populated for implicit feedback type matrix factorization models. - RankingMetrics ranking_metrics = 5; - - // Populated for ARIMA models. - ArimaForecastingMetrics arima_forecasting_metrics = 6; - } - } - - // Data split result. This contains references to the training and evaluation - // data tables that were used to train the model. - message DataSplitResult { - // Table reference of the training data after split. - TableReference training_table = 1; - - // Table reference of the evaluation data after split. - TableReference evaluation_table = 2; - } - - // Arima order, can be used for both non-seasonal and seasonal parts. - message ArimaOrder { - // Order of the autoregressive part. - int64 p = 1; - - // Order of the differencing part. - int64 d = 2; - - // Order of the moving-average part. - int64 q = 3; - } - - // ARIMA model fitting metrics. - message ArimaFittingMetrics { - // Log-likelihood. - double log_likelihood = 1; - - // AIC. - double aic = 2; - - // Variance. - double variance = 3; - } - - // Global explanations containing the top most important features - // after training. - message GlobalExplanation { - // Explanation for a single feature. - message Explanation { - // Full name of the feature. For non-numerical features, will be - // formatted like .. Overall size of - // feature name will always be truncated to first 120 characters. - string feature_name = 1; - - // Attribution of feature. - google.protobuf.DoubleValue attribution = 2; - } - - // A list of the top global explanations. Sorted by absolute value of - // attribution in descending order. - repeated Explanation explanations = 1; - - // Class label for this set of global explanations. Will be empty/null for - // binary logistic and linear regression models. Sorted alphabetically in - // descending order. - string class_label = 2; - } - - // Information about a single training query run for the model. - message TrainingRun { - message TrainingOptions { - // The maximum number of iterations in training. Used only for iterative - // training algorithms. - int64 max_iterations = 1; - - // Type of loss function used during training run. - LossType loss_type = 2; - - // Learning rate in training. Used only for iterative training algorithms. - double learn_rate = 3; - - // L1 regularization coefficient. - google.protobuf.DoubleValue l1_regularization = 4; - - // L2 regularization coefficient. - google.protobuf.DoubleValue l2_regularization = 5; - - // When early_stop is true, stops training when accuracy improvement is - // less than 'min_relative_progress'. Used only for iterative training - // algorithms. - google.protobuf.DoubleValue min_relative_progress = 6; - - // Whether to train a model from the last checkpoint. - google.protobuf.BoolValue warm_start = 7; - - // Whether to stop early when the loss doesn't improve significantly - // any more (compared to min_relative_progress). Used only for iterative - // training algorithms. - google.protobuf.BoolValue early_stop = 8; - - // Name of input label columns in training data. - repeated string input_label_columns = 9; - - // The data split type for training and evaluation, e.g. RANDOM. - DataSplitMethod data_split_method = 10; - - // The fraction of evaluation data over the whole input data. The rest - // of data will be used as training data. The format should be double. - // Accurate to two decimal places. - // Default value is 0.2. - double data_split_eval_fraction = 11; - - // The column to split data with. This column won't be used as a - // feature. - // 1. When data_split_method is CUSTOM, the corresponding column should - // be boolean. The rows with true value tag are eval data, and the false - // are training data. - // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION - // rows (from smallest to largest) in the corresponding column are used - // as training data, and the rest are eval data. It respects the order - // in Orderable data types: - // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - string data_split_column = 12; - - // The strategy to determine learn rate for the current iteration. - LearnRateStrategy learn_rate_strategy = 13; - - // Specifies the initial learning rate for the line search learn rate - // strategy. - double initial_learn_rate = 16; - - // Weights associated with each label class, for rebalancing the - // training data. Only applicable for classification models. - map label_class_weights = 17; - - // User column specified for matrix factorization models. - string user_column = 18; - - // Item column specified for matrix factorization models. - string item_column = 19; - - // Distance type for clustering models. - DistanceType distance_type = 20; - - // Number of clusters for clustering models. - int64 num_clusters = 21; - - // [Beta] Google Cloud Storage URI from which the model was imported. Only - // applicable for imported models. - string model_uri = 22; - - // Optimization strategy for training linear regression models. - OptimizationStrategy optimization_strategy = 23; - - // Hidden units for dnn models. - repeated int64 hidden_units = 24; - - // Batch size for dnn models. - int64 batch_size = 25; - - // Dropout probability for dnn models. - google.protobuf.DoubleValue dropout = 26; - - // Maximum depth of a tree for boosted tree models. - int64 max_tree_depth = 27; - - // Subsample fraction of the training data to grow tree to prevent - // overfitting for boosted tree models. - double subsample = 28; - - // Minimum split loss for boosted tree models. - google.protobuf.DoubleValue min_split_loss = 29; - - // Num factors specified for matrix factorization models. - int64 num_factors = 30; - - // Feedback type that specifies which algorithm to run for matrix - // factorization. - FeedbackType feedback_type = 31; - - // Hyperparameter for matrix factoration when implicit feedback type is - // specified. - google.protobuf.DoubleValue wals_alpha = 32; - - // The method used to initialize the centroids for kmeans algorithm. - KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; - - // The column used to provide the initial centroids for kmeans algorithm - // when kmeans_initialization_method is CUSTOM. - string kmeans_initialization_column = 34; - - // Column to be designated as time series timestamp for ARIMA model. - string time_series_timestamp_column = 35; - - // Column to be designated as time series data for ARIMA model. - string time_series_data_column = 36; - - // Whether to enable auto ARIMA or not. - bool auto_arima = 37; - - // A specification of the non-seasonal part of the ARIMA model: the three - // components (p, d, q) are the AR order, the degree of differencing, and - // the MA order. - ArimaOrder non_seasonal_order = 38; - - // The data frequency of a time series. - DataFrequency data_frequency = 39; - - // Include drift when fitting an ARIMA model. - bool include_drift = 41; - - // The geographical region based on which the holidays are considered in - // time series modeling. If a valid value is specified, then holiday - // effects modeling is enabled. - HolidayRegion holiday_region = 42; - - // The id column that will be used to indicate different time series to - // forecast in parallel. - string time_series_id_column = 43; - - // The number of periods ahead that need to be forecasted. - int64 horizon = 44; - - // Whether to preserve the input structs in output feature names. - // Suppose there is a struct A with field b. - // When false (default), the output feature name is A_b. - // When true, the output feature name is A.b. - bool preserve_input_structs = 45; - - // The max value of non-seasonal p and q. - int64 auto_arima_max_order = 46; - } - - // Information about a single iteration of the training run. - message IterationResult { - // Information about a single cluster for clustering model. - message ClusterInfo { - // Centroid id. - int64 centroid_id = 1; - - // Cluster radius, the average distance from centroid - // to each point assigned to the cluster. - google.protobuf.DoubleValue cluster_radius = 2; - - // Cluster size, the total number of points assigned to the cluster. - google.protobuf.Int64Value cluster_size = 3; - } - - // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier - // refactoring if we want to use model-specific iteration results. - message ArimaResult { - // Arima coefficients. - message ArimaCoefficients { - // Auto-regressive coefficients, an array of double. - repeated double auto_regressive_coefficients = 1; - - // Moving-average coefficients, an array of double. - repeated double moving_average_coefficients = 2; - - // Intercept coefficient, just a double not an array. - double intercept_coefficient = 3; - } - - // Arima model information. - message ArimaModelInfo { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima coefficients. - ArimaCoefficients arima_coefficients = 2; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 3; - - // Whether Arima model fitted with drift or not. It is always false - // when d is not 1. - bool has_drift = 4; - - // The id to indicate different time series. - string time_series_id = 5; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; - } - - // This message is repeated because there are multiple arima models - // fitted in auto-arima. For non-auto-arima model, its size is one. - repeated ArimaModelInfo arima_model_info = 1; - - // Seasonal periods. Repeated because multiple periods are supported for - // one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; - } - - // Index of the iteration, 0 based. - google.protobuf.Int32Value index = 1; - - // Time taken to run the iteration in milliseconds. - google.protobuf.Int64Value duration_ms = 4; - - // Loss computed on the training data at the end of iteration. - google.protobuf.DoubleValue training_loss = 5; - - // Loss computed on the eval data at the end of iteration. - google.protobuf.DoubleValue eval_loss = 6; - - // Learn rate used for this iteration. - double learn_rate = 7; - - // Information about top clusters for clustering models. - repeated ClusterInfo cluster_infos = 8; - - ArimaResult arima_result = 9; - } - - // Options that were used for this training run, includes - // user specified and default options that were used. - TrainingOptions training_options = 1; - - // The start time of this training run. - google.protobuf.Timestamp start_time = 8; - - // Output of each iteration run, results.size() <= max_iterations. - repeated IterationResult results = 6; - - // The evaluation metrics over training/eval data that were computed at the - // end of training. - EvaluationMetrics evaluation_metrics = 7; - - // Data split result of the training run. Only set when the input data is - // actually split. - DataSplitResult data_split_result = 9; - - // Global explanations for important features of the model. For multi-class - // models, there is one entry for each label class. For other models, there - // is only one entry in the list. - repeated GlobalExplanation global_explanations = 10; - } - - // Indicates the type of the Model. - enum ModelType { - MODEL_TYPE_UNSPECIFIED = 0; - - // Linear regression model. - LINEAR_REGRESSION = 1; - - // Logistic regression based classification model. - LOGISTIC_REGRESSION = 2; - - // K-means clustering model. - KMEANS = 3; - - // Matrix factorization model. - MATRIX_FACTORIZATION = 4; - - // [Beta] DNN classifier model. - DNN_CLASSIFIER = 5; - - // [Beta] An imported TensorFlow model. - TENSORFLOW = 6; - - // [Beta] DNN regressor model. - DNN_REGRESSOR = 7; - - // [Beta] Boosted tree regressor model. - BOOSTED_TREE_REGRESSOR = 9; - - // [Beta] Boosted tree classifier model. - BOOSTED_TREE_CLASSIFIER = 10; - - // [Beta] ARIMA model. - ARIMA = 11; - - // [Beta] AutoML Tables regression model. - AUTOML_REGRESSOR = 12; - - // [Beta] AutoML Tables classification model. - AUTOML_CLASSIFIER = 13; - } - - // Loss metric to evaluate model training performance. - enum LossType { - LOSS_TYPE_UNSPECIFIED = 0; - - // Mean squared loss, used for linear regression. - MEAN_SQUARED_LOSS = 1; - - // Mean log loss, used for logistic regression. - MEAN_LOG_LOSS = 2; - } - - // Distance metric used to compute the distance between two points. - enum DistanceType { - DISTANCE_TYPE_UNSPECIFIED = 0; - - // Eculidean distance. - EUCLIDEAN = 1; - - // Cosine distance. - COSINE = 2; - } - - // Indicates the method to split input data into multiple tables. - enum DataSplitMethod { - DATA_SPLIT_METHOD_UNSPECIFIED = 0; - - // Splits data randomly. - RANDOM = 1; - - // Splits data with the user provided tags. - CUSTOM = 2; - - // Splits data sequentially. - SEQUENTIAL = 3; - - // Data split will be skipped. - NO_SPLIT = 4; - - // Splits data automatically: Uses NO_SPLIT if the data size is small. - // Otherwise uses RANDOM. - AUTO_SPLIT = 5; - } - - // Type of supported data frequency for time series forecasting models. - enum DataFrequency { - DATA_FREQUENCY_UNSPECIFIED = 0; - - // Automatically inferred from timestamps. - AUTO_FREQUENCY = 1; - - // Yearly data. - YEARLY = 2; - - // Quarterly data. - QUARTERLY = 3; - - // Monthly data. - MONTHLY = 4; - - // Weekly data. - WEEKLY = 5; - - // Daily data. - DAILY = 6; - - // Hourly data. - HOURLY = 7; - } - - // Type of supported holiday regions for time series forecasting models. - enum HolidayRegion { - // Holiday region unspecified. - HOLIDAY_REGION_UNSPECIFIED = 0; - - // Global. - GLOBAL = 1; - - // North America. - NA = 2; - - // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New - // Zealand. - JAPAC = 3; - - // Europe, the Middle East and Africa. - EMEA = 4; - - // Latin America and the Caribbean. - LAC = 5; - - // United Arab Emirates - AE = 6; - - // Argentina - AR = 7; - - // Austria - AT = 8; - - // Australia - AU = 9; - - // Belgium - BE = 10; - - // Brazil - BR = 11; - - // Canada - CA = 12; - - // Switzerland - CH = 13; - - // Chile - CL = 14; - - // China - CN = 15; - - // Colombia - CO = 16; - - // Czechoslovakia - CS = 17; - - // Czech Republic - CZ = 18; - - // Germany - DE = 19; - - // Denmark - DK = 20; - - // Algeria - DZ = 21; - - // Ecuador - EC = 22; - - // Estonia - EE = 23; - - // Egypt - EG = 24; - - // Spain - ES = 25; - - // Finland - FI = 26; - - // France - FR = 27; - - // Great Britain (United Kingdom) - GB = 28; - - // Greece - GR = 29; - - // Hong Kong - HK = 30; - - // Hungary - HU = 31; - - // Indonesia - ID = 32; - - // Ireland - IE = 33; - - // Israel - IL = 34; - - // India - IN = 35; - - // Iran - IR = 36; - - // Italy - IT = 37; - - // Japan - JP = 38; - - // Korea (South) - KR = 39; - - // Latvia - LV = 40; - - // Morocco - MA = 41; - - // Mexico - MX = 42; - - // Malaysia - MY = 43; - - // Nigeria - NG = 44; - - // Netherlands - NL = 45; - - // Norway - NO = 46; - - // New Zealand - NZ = 47; - - // Peru - PE = 48; - - // Philippines - PH = 49; - - // Pakistan - PK = 50; - - // Poland - PL = 51; - - // Portugal - PT = 52; - - // Romania - RO = 53; - - // Serbia - RS = 54; - - // Russian Federation - RU = 55; - - // Saudi Arabia - SA = 56; - - // Sweden - SE = 57; - - // Singapore - SG = 58; - - // Slovenia - SI = 59; - - // Slovakia - SK = 60; - - // Thailand - TH = 61; - - // Turkey - TR = 62; - - // Taiwan - TW = 63; - - // Ukraine - UA = 64; - - // United States - US = 65; - - // Venezuela - VE = 66; - - // Viet Nam - VN = 67; - - // South Africa - ZA = 68; - } - - // Indicates the learning rate optimization strategy to use. - enum LearnRateStrategy { - LEARN_RATE_STRATEGY_UNSPECIFIED = 0; - - // Use line search to determine learning rate. - LINE_SEARCH = 1; - - // Use a constant learning rate. - CONSTANT = 2; - } - - // Indicates the optimization strategy used for training. - enum OptimizationStrategy { - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0; - - // Uses an iterative batch gradient descent algorithm. - BATCH_GRADIENT_DESCENT = 1; - - // Uses a normal equation to solve linear regression problem. - NORMAL_EQUATION = 2; - } - - // Indicates the training algorithm to use for matrix factorization models. - enum FeedbackType { - FEEDBACK_TYPE_UNSPECIFIED = 0; - - // Use weighted-als for implicit feedback problems. - IMPLICIT = 1; - - // Use nonweighted-als for explicit feedback problems. - EXPLICIT = 2; - } - - // Output only. A hash of this resource. - string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Required. Unique identifier for this model. - ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED]; - - // Output only. The time when this model was created, in millisecs since the epoch. - int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. The time when this model was last modified, in millisecs since the epoch. - int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Optional. A user-friendly description of this model. - string description = 12 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. A descriptive name for this model. - string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL]; - - // The labels associated with this model. You can use these to organize - // and group your models. Label keys and values can be no longer - // than 63 characters, can only contain lowercase letters, numeric - // characters, underscores and dashes. International characters are allowed. - // Label values are optional. Label keys must start with a letter and each - // label in the list must have a different key. - map labels = 15; - - // Optional. The time when this model expires, in milliseconds since the epoch. - // If not present, the model will persist indefinitely. Expired models - // will be deleted and their storage reclaimed. The defaultTableExpirationMs - // property of the encapsulating dataset can be used to set a default - // expirationTime on newly created models. - int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL]; - - // Output only. The geographic location where the model resides. This value - // is inherited from the dataset. - string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Custom encryption configuration (e.g., Cloud KMS keys). This shows the - // encryption configuration of the model data while stored in BigQuery - // storage. This field can be used with PatchModel to update encryption key - // for an already encrypted model. - EncryptionConfiguration encryption_configuration = 17; - - // Output only. Type of the model resource. - ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Information for all training runs in increasing order of start_time. - repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Input feature columns that were used to train this model. - repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Label columns that were used to train this model. - // The output of the model will have a "predicted_" prefix to these columns. - repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; -} - -message GetModelRequest { - // Required. Project ID of the requested model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the requested model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the requested model. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message PatchModelRequest { - // Required. Project ID of the model to patch. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to patch. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to patch. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; - - // Required. Patched model. - // Follows RFC5789 patch semantics. Missing fields are not updated. - // To clear a field, explicitly set to default value. - Model model = 4 [(google.api.field_behavior) = REQUIRED]; -} - -message DeleteModelRequest { - // Required. Project ID of the model to delete. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to delete. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to delete. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message ListModelsRequest { - // Required. Project ID of the models to list. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the models to list. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // The maximum number of results to return in a single response page. - // Leverage the page tokens to iterate through the entire collection. - google.protobuf.UInt32Value max_results = 3; - - // Page token, returned by a previous call to request the next page of - // results - string page_token = 4; -} - -message ListModelsResponse { - // Models in the requested dataset. Only the following fields are populated: - // model_reference, model_type, creation_time, last_modified_time and - // labels. - repeated Model models = 1; - - // A token to request the next page of results. - string next_page_token = 2; -} diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py deleted file mode 100644 index 7b66be8f7..000000000 --- a/google/cloud/bigquery_v2/proto/model_pb2.py +++ /dev/null @@ -1,4298 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.cloud.bigquery_v2.proto import ( - encryption_config_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2, -) -from google.cloud.bigquery_v2.proto import ( - model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2, -) -from google.cloud.bigquery_v2.proto import ( - standard_sql_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR, - google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD = _descriptor.EnumDescriptor( - name="KmeansInitializationMethod", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1132, - serialized_end=1230, -) -_sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD) - -_MODEL_MODELTYPE = _descriptor.EnumDescriptor( - name="ModelType", - full_name="google.cloud.bigquery.v2.Model.ModelType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="MODEL_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINEAR_REGRESSION", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LOGISTIC_REGRESSION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="KMEANS", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TENSORFLOW", - index=4, - number=6, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6632, - serialized_end=6747, -) -_sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) - -_MODEL_LOSSTYPE = _descriptor.EnumDescriptor( - name="LossType", - full_name="google.cloud.bigquery.v2.Model.LossType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LOSS_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_SQUARED_LOSS", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_LOG_LOSS", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6749, - serialized_end=6828, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) - -_MODEL_DISTANCETYPE = _descriptor.EnumDescriptor( - name="DistanceType", - full_name="google.cloud.bigquery.v2.Model.DistanceType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DISTANCE_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="EUCLIDEAN", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="COSINE", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6830, - serialized_end=6902, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) - -_MODEL_DATASPLITMETHOD = _descriptor.EnumDescriptor( - name="DataSplitMethod", - full_name="google.cloud.bigquery.v2.Model.DataSplitMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_SPLIT_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="SEQUENTIAL", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NO_SPLIT", - index=4, - number=4, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AUTO_SPLIT", - index=5, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6904, - serialized_end=7026, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) - -_MODEL_LEARNRATESTRATEGY = _descriptor.EnumDescriptor( - name="LearnRateStrategy", - full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LEARN_RATE_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINE_SEARCH", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CONSTANT", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7028, - serialized_end=7115, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) - -_MODEL_OPTIMIZATIONSTRATEGY = _descriptor.EnumDescriptor( - name="OptimizationStrategy", - full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="OPTIMIZATION_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BATCH_GRADIENT_DESCENT", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NORMAL_EQUATION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7117, - serialized_end=7227, -) -_sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) - - -_MODEL_KMEANSENUMS = _descriptor.Descriptor( - name="KmeansEnums", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - extensions=[], - nested_types=[], - enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1117, - serialized_end=1230, -) - -_MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( - name="RegressionMetrics", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="mean_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_absolute_error", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_error", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_log_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_log_error", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="median_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.median_absolute_error", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="r_squared", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.r_squared", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1233, - serialized_end=1541, -) - -_MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="AggregateClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.precision", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.recall", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.accuracy", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="threshold", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.threshold", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.f1_score", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="log_loss", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.log_loss", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="roc_auc", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.roc_auc", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1544, - serialized_end=1911, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( - name="BinaryConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="positive_class_threshold", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.positive_class_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_positives", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_positives", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_negatives", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_negatives", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.precision", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.recall", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.f1_score", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.accuracy", - index=8, - number=9, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2221, - serialized_end=2713, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="BinaryClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.binary_confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="positive_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.positive_label", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="negative_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.negative_label", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1914, - serialized_end=2713, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( - name="Entry", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="predicted_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.predicted_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="item_count", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.item_count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3148, - serialized_end=3229, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( - name="Row", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="actual_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.actual_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="entries", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.entries", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3232, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( - name="ConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="confidence_threshold", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.confidence_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="rows", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.rows", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2970, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="MultiClassClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2716, - serialized_end=3363, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor( - name="CategoryCount", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.category", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4123, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor( - name="CategoricalValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category_counts", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.category_counts", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3975, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor( - name="FeatureValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="feature_column", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.feature_column", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="numerical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.numerical_value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="categorical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.categorical_value", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=3759, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor( - name="Cluster", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_values", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.feature_values", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.count", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3586, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( - name="ClusteringMetrics", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="davies_bouldin_index", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.davies_bouldin_index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_distance", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.mean_squared_distance", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clusters", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.clusters", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3366, - serialized_end=4209, -) - -_MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( - name="EvaluationMetrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="regression_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.regression_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.binary_classification_metrics", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="multi_class_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.multi_class_classification_metrics", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clustering_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.clustering_metrics", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=4212, - serialized_end=4617, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( - name="LabelClassWeightsEntry", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.value", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6053, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( - name="TrainingOptions", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="max_iterations", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.max_iterations", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="loss_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.loss_type", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate", - index=2, - number=3, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l1_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l1_regularization", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l2_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l2_regularization", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="min_relative_progress", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.min_relative_progress", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="warm_start", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.warm_start", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="early_stop", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.early_stop", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="input_label_columns", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.input_label_columns", - index=8, - number=9, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_method", - index=9, - number=10, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_eval_fraction", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_eval_fraction", - index=10, - number=11, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_column", - index=11, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate_strategy", - index=12, - number=13, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="initial_learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.initial_learn_rate", - index=13, - number=16, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_class_weights", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.label_class_weights", - index=14, - number=17, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="distance_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.distance_type", - index=15, - number=20, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="num_clusters", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.num_clusters", - index=16, - number=21, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_uri", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.model_uri", - index=17, - number=22, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="optimization_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.optimization_strategy", - index=18, - number=23, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_method", - index=19, - number=33, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_column", - index=20, - number=34, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4928, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( - name="ClusterInfo", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_radius", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_radius", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_size", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_size", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6444, - serialized_end=6583, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( - name="IterationResult", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="index", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="duration_ms", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.duration_ms", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.training_loss", - index=2, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="eval_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.eval_loss", - index=3, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.learn_rate", - index=4, - number=7, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_infos", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.cluster_infos", - index=5, - number=8, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6112, - serialized_end=6583, -) - -_MODEL_TRAININGRUN = _descriptor.Descriptor( - name="TrainingRun", - full_name="google.cloud.bigquery.v2.Model.TrainingRun", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="training_options", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.training_options", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="start_time", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.start_time", - index=1, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="results", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.results", - index=2, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="evaluation_metrics", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.evaluation_metrics", - index=3, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_TRAININGRUN_TRAININGOPTIONS, - _MODEL_TRAININGRUN_ITERATIONRESULT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4620, - serialized_end=6583, -) - -_MODEL_LABELSENTRY = _descriptor.Descriptor( - name="LabelsEntry", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6585, - serialized_end=6630, -) - -_MODEL = _descriptor.Descriptor( - name="Model", - full_name="google.cloud.bigquery.v2.Model", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="etag", - full_name="google.cloud.bigquery.v2.Model.etag", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_reference", - full_name="google.cloud.bigquery.v2.Model.model_reference", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="creation_time", - full_name="google.cloud.bigquery.v2.Model.creation_time", - index=2, - number=5, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="last_modified_time", - full_name="google.cloud.bigquery.v2.Model.last_modified_time", - index=3, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="description", - full_name="google.cloud.bigquery.v2.Model.description", - index=4, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="friendly_name", - full_name="google.cloud.bigquery.v2.Model.friendly_name", - index=5, - number=14, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="labels", - full_name="google.cloud.bigquery.v2.Model.labels", - index=6, - number=15, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expiration_time", - full_name="google.cloud.bigquery.v2.Model.expiration_time", - index=7, - number=16, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="location", - full_name="google.cloud.bigquery.v2.Model.location", - index=8, - number=13, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="encryption_configuration", - full_name="google.cloud.bigquery.v2.Model.encryption_configuration", - index=9, - number=17, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_type", - full_name="google.cloud.bigquery.v2.Model.model_type", - index=10, - number=7, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_runs", - full_name="google.cloud.bigquery.v2.Model.training_runs", - index=11, - number=9, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_columns", - full_name="google.cloud.bigquery.v2.Model.feature_columns", - index=12, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_columns", - full_name="google.cloud.bigquery.v2.Model.label_columns", - index=13, - number=11, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_KMEANSENUMS, - _MODEL_REGRESSIONMETRICS, - _MODEL_AGGREGATECLASSIFICATIONMETRICS, - _MODEL_BINARYCLASSIFICATIONMETRICS, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - _MODEL_CLUSTERINGMETRICS, - _MODEL_EVALUATIONMETRICS, - _MODEL_TRAININGRUN, - _MODEL_LABELSENTRY, - ], - enum_types=[ - _MODEL_MODELTYPE, - _MODEL_LOSSTYPE, - _MODEL_DISTANCETYPE, - _MODEL_DATASPLITMETHOD, - _MODEL_LEARNRATESTRATEGY, - _MODEL_OPTIMIZATIONSTRATEGY, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=416, - serialized_end=7227, -) - - -_GETMODELREQUEST = _descriptor.Descriptor( - name="GetModelRequest", - full_name="google.cloud.bigquery.v2.GetModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7229, - serialized_end=7319, -) - - -_PATCHMODELREQUEST = _descriptor.Descriptor( - name="PatchModelRequest", - full_name="google.cloud.bigquery.v2.PatchModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7322, - serialized_end=7467, -) - - -_DELETEMODELREQUEST = _descriptor.Descriptor( - name="DeleteModelRequest", - full_name="google.cloud.bigquery.v2.DeleteModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7469, - serialized_end=7562, -) - - -_LISTMODELSREQUEST = _descriptor.Descriptor( - name="ListModelsRequest", - full_name="google.cloud.bigquery.v2.ListModelsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="max_results", - full_name="google.cloud.bigquery.v2.ListModelsRequest.max_results", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="page_token", - full_name="google.cloud.bigquery.v2.ListModelsRequest.page_token", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7565, - serialized_end=7705, -) - - -_LISTMODELSRESPONSE = _descriptor.Descriptor( - name="ListModelsResponse", - full_name="google.cloud.bigquery.v2.ListModelsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="models", - full_name="google.cloud.bigquery.v2.ListModelsResponse.models", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="next_page_token", - full_name="google.cloud.bigquery.v2.ListModelsResponse.next_page_token", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7707, - serialized_end=7801, -) - -_MODEL_KMEANSENUMS.containing_type = _MODEL -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD.containing_type = _MODEL_KMEANSENUMS -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_log_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "median_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "r_squared" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.containing_type = _MODEL -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "log_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "roc_auc" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "positive_class_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = ( - _MODEL_BINARYCLASSIFICATIONMETRICS -) -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "binary_confusion_matrix_list" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX -_MODEL_BINARYCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.fields_by_name[ - "item_count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.fields_by_name[ - "entries" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "confidence_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "rows" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "confusion_matrix_list" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.fields_by_name[ - "category_counts" -].message_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["numerical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["categorical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "feature_values" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.containing_type = _MODEL_CLUSTERINGMETRICS -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "davies_bouldin_index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "mean_squared_distance" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "clusters" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER -_MODEL_CLUSTERINGMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].message_type = _MODEL_REGRESSIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].message_type = _MODEL_CLUSTERINGMETRICS -_MODEL_EVALUATIONMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["regression_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["binary_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["multi_class_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["clustering_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY.containing_type = ( - _MODEL_TRAININGRUN_TRAININGOPTIONS -) -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "loss_type" -].enum_type = _MODEL_LOSSTYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l1_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l2_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "min_relative_progress" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "warm_start" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "early_stop" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "data_split_method" -].enum_type = _MODEL_DATASPLITMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "learn_rate_strategy" -].enum_type = _MODEL_LEARNRATESTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "label_class_weights" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "distance_type" -].enum_type = _MODEL_DISTANCETYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "optimization_strategy" -].enum_type = _MODEL_OPTIMIZATIONSTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "kmeans_initialization_method" -].enum_type = _MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_radius" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_size" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.containing_type = ( - _MODEL_TRAININGRUN_ITERATIONRESULT -) -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT32VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "duration_ms" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "training_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "eval_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "cluster_infos" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO -_MODEL_TRAININGRUN_ITERATIONRESULT.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN.fields_by_name[ - "training_options" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS -_MODEL_TRAININGRUN.fields_by_name[ - "start_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_MODEL_TRAININGRUN.fields_by_name[ - "results" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT -_MODEL_TRAININGRUN.fields_by_name[ - "evaluation_metrics" -].message_type = _MODEL_EVALUATIONMETRICS -_MODEL_TRAININGRUN.containing_type = _MODEL -_MODEL_LABELSENTRY.containing_type = _MODEL -_MODEL.fields_by_name[ - "model_reference" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE -) -_MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY -_MODEL.fields_by_name[ - "encryption_configuration" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2._ENCRYPTIONCONFIGURATION -) -_MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE -_MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN -_MODEL.fields_by_name[ - "feature_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL.fields_by_name[ - "label_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL_MODELTYPE.containing_type = _MODEL -_MODEL_LOSSTYPE.containing_type = _MODEL -_MODEL_DISTANCETYPE.containing_type = _MODEL -_MODEL_DATASPLITMETHOD.containing_type = _MODEL -_MODEL_LEARNRATESTRATEGY.containing_type = _MODEL -_MODEL_OPTIMIZATIONSTRATEGY.containing_type = _MODEL -_PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL -_LISTMODELSREQUEST.fields_by_name[ - "max_results" -].message_type = google_dot_protobuf_dot_wrappers__pb2._UINT32VALUE -_LISTMODELSRESPONSE.fields_by_name["models"].message_type = _MODEL -DESCRIPTOR.message_types_by_name["Model"] = _MODEL -DESCRIPTOR.message_types_by_name["GetModelRequest"] = _GETMODELREQUEST -DESCRIPTOR.message_types_by_name["PatchModelRequest"] = _PATCHMODELREQUEST -DESCRIPTOR.message_types_by_name["DeleteModelRequest"] = _DELETEMODELREQUEST -DESCRIPTOR.message_types_by_name["ListModelsRequest"] = _LISTMODELSREQUEST -DESCRIPTOR.message_types_by_name["ListModelsResponse"] = _LISTMODELSRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Model = _reflection.GeneratedProtocolMessageType( - "Model", - (_message.Message,), - { - "KmeansEnums": _reflection.GeneratedProtocolMessageType( - "KmeansEnums", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_KMEANSENUMS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums) - }, - ), - "RegressionMetrics": _reflection.GeneratedProtocolMessageType( - "RegressionMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_REGRESSIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for regression and explicit feedback type matrix - factorization models. - - Attributes: - mean_absolute_error: - Mean absolute error. - mean_squared_error: - Mean squared error. - mean_squared_log_error: - Mean squared log error. - median_absolute_error: - Median absolute error. - r_squared: - R^2 score. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics) - }, - ), - "AggregateClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "AggregateClassificationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_AGGREGATECLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Aggregate metrics for classification/classifier models. For multi- - class models, the metrics are either macro-averaged or micro-averaged. - When macro-averaged, the metrics are calculated for each label and - then an unweighted average is taken of those values. When micro- - averaged, the metric is calculated globally by counting the total - number of correctly predicted rows. - - Attributes: - precision: - Precision is the fraction of actual positive predictions that - had positive actual labels. For multiclass this is a macro- - averaged metric treating each class as a binary classifier. - recall: - Recall is the fraction of actual positive labels that were - given a positive prediction. For multiclass this is a macro- - averaged metric. - accuracy: - Accuracy is the fraction of predictions given the correct - label. For multiclass this is a micro-averaged metric. - threshold: - Threshold at which the metrics are computed. For binary - classification models this is the positive class threshold. - For multi-class classfication models this is the confidence - threshold. - f1_score: - The F1 score is an average of recall and precision. For - multiclass this is a macro-averaged metric. - log_loss: - Logarithmic Loss. For multiclass this is a macro-averaged - metric. - roc_auc: - Area Under a ROC Curve. For multiclass this is a macro- - averaged metric. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics) - }, - ), - "BinaryClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "BinaryClassificationMetrics", - (_message.Message,), - { - "BinaryConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "BinaryConfusionMatrix", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for binary classification models. - - Attributes: - positive_class_threshold: - Threshold value used when computing each of the following - metric. - true_positives: - Number of true samples predicted as true. - false_positives: - Number of false samples predicted as true. - true_negatives: - Number of true samples predicted as false. - false_negatives: - Number of false samples predicted as false. - precision: - The fraction of actual positive predictions that had positive - actual labels. - recall: - The fraction of actual positive labels that were given a - positive prediction. - f1_score: - The equally weighted average of recall and precision. - accuracy: - The fraction of predictions given the correct label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for binary classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - binary_confusion_matrix_list: - Binary confusion matrix at multiple thresholds. - positive_label: - Label representing the positive class. - negative_label: - Label representing the negative class. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) - }, - ), - "MultiClassClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "MultiClassClassificationMetrics", - (_message.Message,), - { - "ConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "ConfusionMatrix", - (_message.Message,), - { - "Entry": _reflection.GeneratedProtocolMessageType( - "Entry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single entry in the confusion matrix. - - Attributes: - predicted_label: - The predicted label. For confidence_threshold > 0, we will - also add an entry indicating the number of items under the - confidence threshold. - item_count: - Number of items being predicted as this label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) - }, - ), - "Row": _reflection.GeneratedProtocolMessageType( - "Row", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single row in the confusion matrix. - - Attributes: - actual_label: - The original label of this row. - entries: - Info describing predicted label distribution. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for multi-class classification models. - - Attributes: - confidence_threshold: - Confidence threshold used when computing the entries of the - confusion matrix. - rows: - One row per actual label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for multi-class classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - confusion_matrix_list: - Confusion matrix at different thresholds. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics) - }, - ), - "ClusteringMetrics": _reflection.GeneratedProtocolMessageType( - "ClusteringMetrics", - (_message.Message,), - { - "Cluster": _reflection.GeneratedProtocolMessageType( - "Cluster", - (_message.Message,), - { - "FeatureValue": _reflection.GeneratedProtocolMessageType( - "FeatureValue", - (_message.Message,), - { - "CategoricalValue": _reflection.GeneratedProtocolMessageType( - "CategoricalValue", - (_message.Message,), - { - "CategoryCount": _reflection.GeneratedProtocolMessageType( - "CategoryCount", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Represents the count of a single category within the cluster. - - Attributes: - category: - The name of category. - count: - The count of training samples matching the category within the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a categorical feature. - - Attributes: - category_counts: - Counts of all categories for the categorical feature. If there - are more than ten categories, we return top ten (by count) and - return one more CategoryCount with category ``*OTHER*`` and - count as aggregate counts of remaining categories. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a single feature within the cluster. - - Attributes: - feature_column: - The feature column name. - numerical_value: - The numerical feature value. This is the centroid value for - this feature. - categorical_value: - The categorical feature value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Message containing the information about one cluster. - - Attributes: - centroid_id: - Centroid id. - feature_values: - Values of highly variant features for this cluster. - count: - Count of training data rows that were assigned to this - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for clustering models. - - Attributes: - davies_bouldin_index: - Davies-Bouldin index. - mean_squared_distance: - Mean of squared distances between each sample to its cluster - centroid. - clusters: - [Beta] Information for all clusters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) - }, - ), - "EvaluationMetrics": _reflection.GeneratedProtocolMessageType( - "EvaluationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_EVALUATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics of a model. These are either computed on all - training data or just the eval data based on whether eval data was - used during training. These are not present for imported models. - - Attributes: - regression_metrics: - Populated for regression models and explicit feedback type - matrix factorization models. - binary_classification_metrics: - Populated for binary classification/classifier models. - multi_class_classification_metrics: - Populated for multi-class classification/classifier models. - clustering_metrics: - Populated for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) - }, - ), - "TrainingRun": _reflection.GeneratedProtocolMessageType( - "TrainingRun", - (_message.Message,), - { - "TrainingOptions": _reflection.GeneratedProtocolMessageType( - "TrainingOptions", - (_message.Message,), - { - "LabelClassWeightsEntry": _reflection.GeneratedProtocolMessageType( - "LabelClassWeightsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - max_iterations: - The maximum number of iterations in training. Used only for - iterative training algorithms. - loss_type: - Type of loss function used during training run. - learn_rate: - Learning rate in training. Used only for iterative training - algorithms. - l1_regularization: - L1 regularization coefficient. - l2_regularization: - L2 regularization coefficient. - min_relative_progress: - When early_stop is true, stops training when accuracy - improvement is less than ‘min_relative_progress’. Used only - for iterative training algorithms. - warm_start: - Whether to train a model from the last checkpoint. - early_stop: - Whether to stop early when the loss doesn’t improve - significantly any more (compared to min_relative_progress). - Used only for iterative training algorithms. - input_label_columns: - Name of input label columns in training data. - data_split_method: - The data split type for training and evaluation, e.g. RANDOM. - data_split_eval_fraction: - The fraction of evaluation data over the whole input data. The - rest of data will be used as training data. The format should - be double. Accurate to two decimal places. Default value is - 0.2. - data_split_column: - The column to split data with. This column won’t be used as a - feature. 1. When data_split_method is CUSTOM, the - corresponding column should be boolean. The rows with true - value tag are eval data, and the false are training data. 2. - When data_split_method is SEQ, the first - DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in - the corresponding column are used as training data, and the - rest are eval data. It respects the order in Orderable data - types: - https://cloud.google.com/bigquery/docs/reference/standard- - sql/data-types#data-type-properties - learn_rate_strategy: - The strategy to determine learn rate for the current - iteration. - initial_learn_rate: - Specifies the initial learning rate for the line search learn - rate strategy. - label_class_weights: - Weights associated with each label class, for rebalancing the - training data. Only applicable for classification models. - distance_type: - Distance type for clustering models. - num_clusters: - Number of clusters for clustering models. - model_uri: - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. - optimization_strategy: - Optimization strategy for training linear regression models. - kmeans_initialization_method: - The method used to initialize the centroids for kmeans - algorithm. - kmeans_initialization_column: - The column used to provide the initial centroids for kmeans - algorithm when kmeans_initialization_method is CUSTOM. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) - }, - ), - "IterationResult": _reflection.GeneratedProtocolMessageType( - "IterationResult", - (_message.Message,), - { - "ClusterInfo": _reflection.GeneratedProtocolMessageType( - "ClusterInfo", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single cluster for clustering model. - - Attributes: - centroid_id: - Centroid id. - cluster_radius: - Cluster radius, the average distance from centroid to each - point assigned to the cluster. - cluster_size: - Cluster size, the total number of points assigned to the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single iteration of the training run. - - Attributes: - index: - Index of the iteration, 0 based. - duration_ms: - Time taken to run the iteration in milliseconds. - training_loss: - Loss computed on the training data at the end of iteration. - eval_loss: - Loss computed on the eval data at the end of iteration. - learn_rate: - Learn rate used for this iteration. - cluster_infos: - Information about top clusters for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single training query run for the model. - - Attributes: - training_options: - Options that were used for this training run, includes user - specified and default options that were used. - start_time: - The start time of this training run. - results: - Output of each iteration run, results.size() <= - max_iterations. - evaluation_metrics: - The evaluation metrics over training/eval data that were - computed at the end of training. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun) - }, - ), - "LabelsEntry": _reflection.GeneratedProtocolMessageType( - "LabelsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_LABELSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry) - }, - ), - "DESCRIPTOR": _MODEL, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - etag: - Output only. A hash of this resource. - model_reference: - Required. Unique identifier for this model. - creation_time: - Output only. The time when this model was created, in - millisecs since the epoch. - last_modified_time: - Output only. The time when this model was last modified, in - millisecs since the epoch. - description: - Optional. A user-friendly description of this model. - friendly_name: - Optional. A descriptive name for this model. - labels: - The labels associated with this model. You can use these to - organize and group your models. Label keys and values can be - no longer than 63 characters, can only contain lowercase - letters, numeric characters, underscores and dashes. - International characters are allowed. Label values are - optional. Label keys must start with a letter and each label - in the list must have a different key. - expiration_time: - Optional. The time when this model expires, in milliseconds - since the epoch. If not present, the model will persist - indefinitely. Expired models will be deleted and their storage - reclaimed. The defaultTableExpirationMs property of the - encapsulating dataset can be used to set a default - expirationTime on newly created models. - location: - Output only. The geographic location where the model resides. - This value is inherited from the dataset. - encryption_configuration: - Custom encryption configuration (e.g., Cloud KMS keys). This - shows the encryption configuration of the model data while - stored in BigQuery storage. - model_type: - Output only. Type of the model resource. - training_runs: - Output only. Information for all training runs in increasing - order of start_time. - feature_columns: - Output only. Input feature columns that were used to train - this model. - label_columns: - Output only. Label columns that were used to train this model. - The output of the model will have a ``predicted\_`` prefix to - these columns. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) - }, -) -_sym_db.RegisterMessage(Model) -_sym_db.RegisterMessage(Model.KmeansEnums) -_sym_db.RegisterMessage(Model.RegressionMetrics) -_sym_db.RegisterMessage(Model.AggregateClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics.BinaryConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) -_sym_db.RegisterMessage(Model.ClusteringMetrics) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) -_sym_db.RegisterMessage( - Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount -) -_sym_db.RegisterMessage(Model.EvaluationMetrics) -_sym_db.RegisterMessage(Model.TrainingRun) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult.ClusterInfo) -_sym_db.RegisterMessage(Model.LabelsEntry) - -GetModelRequest = _reflection.GeneratedProtocolMessageType( - "GetModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _GETMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the requested model. - dataset_id: - Required. Dataset ID of the requested model. - model_id: - Required. Model ID of the requested model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) - }, -) -_sym_db.RegisterMessage(GetModelRequest) - -PatchModelRequest = _reflection.GeneratedProtocolMessageType( - "PatchModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _PATCHMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to patch. - dataset_id: - Required. Dataset ID of the model to patch. - model_id: - Required. Model ID of the model to patch. - model: - Required. Patched model. Follows RFC5789 patch semantics. - Missing fields are not updated. To clear a field, explicitly - set to default value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) - }, -) -_sym_db.RegisterMessage(PatchModelRequest) - -DeleteModelRequest = _reflection.GeneratedProtocolMessageType( - "DeleteModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _DELETEMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to delete. - dataset_id: - Required. Dataset ID of the model to delete. - model_id: - Required. Model ID of the model to delete. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) - }, -) -_sym_db.RegisterMessage(DeleteModelRequest) - -ListModelsRequest = _reflection.GeneratedProtocolMessageType( - "ListModelsRequest", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the models to list. - dataset_id: - Required. Dataset ID of the models to list. - max_results: - The maximum number of results to return in a single response - page. Leverage the page tokens to iterate through the entire - collection. - page_token: - Page token, returned by a previous call to request the next - page of results - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest) - }, -) -_sym_db.RegisterMessage(ListModelsRequest) - -ListModelsResponse = _reflection.GeneratedProtocolMessageType( - "ListModelsResponse", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSRESPONSE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - models: - Models in the requested dataset. Only the following fields are - populated: model_reference, model_type, creation_time, - last_modified_time and labels. - next_page_token: - A token to request the next page of results. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse) - }, -) -_sym_db.RegisterMessage(ListModelsResponse) - - -DESCRIPTOR._options = None -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None -_MODEL_LABELSENTRY._options = None -_MODEL.fields_by_name["etag"]._options = None -_MODEL.fields_by_name["model_reference"]._options = None -_MODEL.fields_by_name["creation_time"]._options = None -_MODEL.fields_by_name["last_modified_time"]._options = None -_MODEL.fields_by_name["description"]._options = None -_MODEL.fields_by_name["friendly_name"]._options = None -_MODEL.fields_by_name["expiration_time"]._options = None -_MODEL.fields_by_name["location"]._options = None -_MODEL.fields_by_name["model_type"]._options = None -_MODEL.fields_by_name["training_runs"]._options = None -_MODEL.fields_by_name["feature_columns"]._options = None -_MODEL.fields_by_name["label_columns"]._options = None -_GETMODELREQUEST.fields_by_name["project_id"]._options = None -_GETMODELREQUEST.fields_by_name["dataset_id"]._options = None -_GETMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["project_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["dataset_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model"]._options = None -_DELETEMODELREQUEST.fields_by_name["project_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["dataset_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["model_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["project_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["dataset_id"]._options = None - -_MODELSERVICE = _descriptor.ServiceDescriptor( - name="ModelService", - full_name="google.cloud.bigquery.v2.ModelService", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only", - create_key=_descriptor._internal_create_key, - serialized_start=7804, - serialized_end=8566, - methods=[ - _descriptor.MethodDescriptor( - name="GetModel", - full_name="google.cloud.bigquery.v2.ModelService.GetModel", - index=0, - containing_service=None, - input_type=_GETMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="ListModels", - full_name="google.cloud.bigquery.v2.ModelService.ListModels", - index=1, - containing_service=None, - input_type=_LISTMODELSREQUEST, - output_type=_LISTMODELSRESPONSE, - serialized_options=b"\332A!project_id,dataset_id,max_results", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="PatchModel", - full_name="google.cloud.bigquery.v2.ModelService.PatchModel", - index=2, - containing_service=None, - input_type=_PATCHMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A$project_id,dataset_id,model_id,model", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="DeleteModel", - full_name="google.cloud.bigquery.v2.ModelService.DeleteModel", - index=3, - containing_service=None, - input_type=_DELETEMODELREQUEST, - output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_MODELSERVICE) - -DESCRIPTOR.services_by_name["ModelService"] = _MODELSERVICE - -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/model_reference.proto b/google/cloud/bigquery_v2/proto/model_reference.proto deleted file mode 100644 index c3d1a49a8..000000000 --- a/google/cloud/bigquery_v2/proto/model_reference.proto +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// Id path of a model. -message ModelReference { - // Required. The ID of the project containing this model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the model. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py deleted file mode 100644 index 2411c4863..000000000 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ /dev/null @@ -1,142 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model_reference.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODELREFERENCE = _descriptor.Descriptor( - name="ModelReference", - full_name="google.cloud.bigquery.v2.ModelReference", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ModelReference.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ModelReference.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.ModelReference.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=145, - serialized_end=234, -) - -DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ModelReference = _reflection.GeneratedProtocolMessageType( - "ModelReference", - (_message.Message,), - { - "DESCRIPTOR": _MODELREFERENCE, - "__module__": "google.cloud.bigquery_v2.proto.model_reference_pb2", - "__doc__": """Id path of a model. - - Attributes: - project_id: - Required. The ID of the project containing this model. - dataset_id: - Required. The ID of the dataset containing this model. - model_id: - Required. The ID of the model. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (_). The - maximum length is 1,024 characters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference) - }, -) -_sym_db.RegisterMessage(ModelReference) - - -DESCRIPTOR._options = None -_MODELREFERENCE.fields_by_name["project_id"]._options = None -_MODELREFERENCE.fields_by_name["dataset_id"]._options = None -_MODELREFERENCE.fields_by_name["model_id"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/standard_sql.proto b/google/cloud/bigquery_v2/proto/standard_sql.proto deleted file mode 100644 index 1514eccbb..000000000 --- a/google/cloud/bigquery_v2/proto/standard_sql.proto +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "StandardSqlProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// The type of a variable, e.g., a function argument. -// Examples: -// INT64: {type_kind="INT64"} -// ARRAY: {type_kind="ARRAY", array_element_type="STRING"} -// STRUCT>: -// {type_kind="STRUCT", -// struct_type={fields=[ -// {name="x", type={type_kind="STRING"}}, -// {name="y", type={type_kind="ARRAY", array_element_type="DATE"}} -// ]}} -message StandardSqlDataType { - enum TypeKind { - // Invalid type. - TYPE_KIND_UNSPECIFIED = 0; - - // Encoded as a string in decimal format. - INT64 = 2; - - // Encoded as a boolean "false" or "true". - BOOL = 5; - - // Encoded as a number, or string "NaN", "Infinity" or "-Infinity". - FLOAT64 = 7; - - // Encoded as a string value. - STRING = 8; - - // Encoded as a base64 string per RFC 4648, section 4. - BYTES = 9; - - // Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: - // 1985-04-12T23:20:50.52Z - TIMESTAMP = 19; - - // Encoded as RFC 3339 full-date format string: 1985-04-12 - DATE = 10; - - // Encoded as RFC 3339 partial-time format string: 23:20:50.52 - TIME = 20; - - // Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 - DATETIME = 21; - - // Encoded as WKT - GEOGRAPHY = 22; - - // Encoded as a decimal string. - NUMERIC = 23; - - // Encoded as a decimal string. - BIGNUMERIC = 24; - - // Encoded as a list with types matching Type.array_type. - ARRAY = 16; - - // Encoded as a list with fields of type Type.struct_type[i]. List is used - // because a JSON object cannot have duplicate field names. - STRUCT = 17; - } - - // Required. The top level type of this field. - // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - TypeKind type_kind = 1 [(google.api.field_behavior) = REQUIRED]; - - oneof sub_type { - // The type of the array's elements, if type_kind = "ARRAY". - StandardSqlDataType array_element_type = 2; - - // The fields of this struct, in order, if type_kind = "STRUCT". - StandardSqlStructType struct_type = 3; - } -} - -// A field or a column. -message StandardSqlField { - // Optional. The name of this field. Can be absent for struct fields. - string name = 1 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. The type of this parameter. Absent if not explicitly - // specified (e.g., CREATE FUNCTION statement can omit the return type; - // in this case the output parameter does not have this "type" field). - StandardSqlDataType type = 2 [(google.api.field_behavior) = OPTIONAL]; -} - -message StandardSqlStructType { - repeated StandardSqlField fields = 1; -} diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py deleted file mode 100644 index bfe77f934..000000000 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ /dev/null @@ -1,442 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/standard_sql.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_STANDARDSQLDATATYPE_TYPEKIND = _descriptor.EnumDescriptor( - name="TypeKind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="TYPE_KIND_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="INT64", - index=1, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BOOL", - index=2, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="FLOAT64", - index=3, - number=7, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRING", - index=4, - number=8, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BYTES", - index=5, - number=9, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIMESTAMP", - index=6, - number=19, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATE", - index=7, - number=10, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIME", - index=8, - number=20, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATETIME", - index=9, - number=21, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="GEOGRAPHY", - index=10, - number=22, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NUMERIC", - index=11, - number=23, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARRAY", - index=12, - number=16, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRUCT", - index=13, - number=17, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=396, - serialized_end=590, -) -_sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND) - - -_STANDARDSQLDATATYPE = _descriptor.Descriptor( - name="StandardSqlDataType", - full_name="google.cloud.bigquery.v2.StandardSqlDataType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="type_kind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.type_kind", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="array_element_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.array_element_type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="struct_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.struct_type", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_STANDARDSQLDATATYPE_TYPEKIND,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="sub_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=143, - serialized_end=602, -) - - -_STANDARDSQLFIELD = _descriptor.Descriptor( - name="StandardSqlField", - full_name="google.cloud.bigquery.v2.StandardSqlField", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.v2.StandardSqlField.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.bigquery.v2.StandardSqlField.type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=604, - serialized_end=707, -) - - -_STANDARDSQLSTRUCTTYPE = _descriptor.Descriptor( - name="StandardSqlStructType", - full_name="google.cloud.bigquery.v2.StandardSqlStructType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="fields", - full_name="google.cloud.bigquery.v2.StandardSqlStructType.fields", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=709, - serialized_end=792, -) - -_STANDARDSQLDATATYPE.fields_by_name[ - "type_kind" -].enum_type = _STANDARDSQLDATATYPE_TYPEKIND -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.fields_by_name["struct_type"].message_type = _STANDARDSQLSTRUCTTYPE -_STANDARDSQLDATATYPE_TYPEKIND.containing_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["array_element_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["struct_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "struct_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLFIELD.fields_by_name["type"].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLSTRUCTTYPE.fields_by_name["fields"].message_type = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlDataType"] = _STANDARDSQLDATATYPE -DESCRIPTOR.message_types_by_name["StandardSqlField"] = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlStructType"] = _STANDARDSQLSTRUCTTYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -StandardSqlDataType = _reflection.GeneratedProtocolMessageType( - "StandardSqlDataType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLDATATYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """The type of a variable, e.g., a function argument. Examples: INT64: - {type_kind=``INT64``} ARRAY: {type_kind=``ARRAY``, - array_element_type=``STRING``} STRUCT: - {type_kind=``STRUCT``, struct_type={fields=[ {name=``x``, - type={type_kind=``STRING``}}, {name=``y``, type={type_kind=``ARRAY``, - array_element_type=``DATE``}} ]}} - - Attributes: - type_kind: - Required. The top level type of this field. Can be any - standard SQL data type (e.g., ``INT64``, ``DATE``, ``ARRAY``). - array_element_type: - The type of the array’s elements, if type_kind = ``ARRAY``. - struct_type: - The fields of this struct, in order, if type_kind = ``STRUCT``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType) - }, -) -_sym_db.RegisterMessage(StandardSqlDataType) - -StandardSqlField = _reflection.GeneratedProtocolMessageType( - "StandardSqlField", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLFIELD, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """A field or a column. - - Attributes: - name: - Optional. The name of this field. Can be absent for struct - fields. - type: - Optional. The type of this parameter. Absent if not explicitly - specified (e.g., CREATE FUNCTION statement can omit the return - type; in this case the output parameter does not have this - ``type`` field). - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField) - }, -) -_sym_db.RegisterMessage(StandardSqlField) - -StandardSqlStructType = _reflection.GeneratedProtocolMessageType( - "StandardSqlStructType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLSTRUCTTYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType) - }, -) -_sym_db.RegisterMessage(StandardSqlStructType) - - -DESCRIPTOR._options = None -_STANDARDSQLDATATYPE.fields_by_name["type_kind"]._options = None -_STANDARDSQLFIELD.fields_by_name["name"]._options = None -_STANDARDSQLFIELD.fields_by_name["type"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/table_reference.proto b/google/cloud/bigquery_v2/proto/table_reference.proto deleted file mode 100644 index ba02f80c4..000000000 --- a/google/cloud/bigquery_v2/proto/table_reference.proto +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "TableReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message TableReference { - // Required. The ID of the project containing this table. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this table. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the table. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. Certain operations allow - // suffixing of the table ID with a partition decorator, such as - // `sample_table$20190123`. - string table_id = 3 [(google.api.field_behavior) = REQUIRED]; -} From a0a9fa23037d20699a82716ce791cbe10a54ee30 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 28 May 2021 16:56:04 +0000 Subject: [PATCH 227/341] chore: new owl bot post processor docker image (#680) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 46e3f021c..da616c91a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 + digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 diff --git a/docs/conf.py b/docs/conf.py index fdea01aad..1275fe3f1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -364,6 +364,7 @@ "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), } From d792ce09388a6ee3706777915dd2818d4c854f79 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 2 Jun 2021 09:17:20 +0200 Subject: [PATCH 228/341] feat: add support for Parquet options (#679) * feat: add support for Parquet options For load jobs and external tables config. * Simplify ParquetOptions.to_api_repr() Co-authored by Tres Seaver. * Expose ParquetOptions in top level namespace * Parquet options should be reflected in options --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/external_config.py | 28 ++++++- google/cloud/bigquery/format_options.py | 80 ++++++++++++++++++ google/cloud/bigquery/job/load.py | 21 +++++ tests/unit/job/test_load_config.py | 35 ++++++++ tests/unit/test_external_config.py | 100 +++++++++++++++++++++++ tests/unit/test_format_options.py | 41 ++++++++++ 7 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 google/cloud/bigquery/format_options.py create mode 100644 tests/unit/test_format_options.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ec08b2c84..f031cd81d 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -47,6 +47,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig @@ -136,6 +137,7 @@ "BigtableColumn", "CSVOptions", "GoogleSheetsOptions", + "ParquetOptions", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index ef4d569fa..0c49d2d76 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -27,6 +27,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -53,6 +54,12 @@ class ExternalSourceFormat(object): DATASTORE_BACKUP = "DATASTORE_BACKUP" """Specifies datastore backup format""" + ORC = "ORC" + """Specifies ORC format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + BIGTABLE = "BIGTABLE" """Specifies Bigtable format.""" @@ -540,7 +547,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": return config -_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions) class HivePartitioningOptions(object): @@ -784,6 +791,25 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options + """ + if self.source_format != ExternalSourceFormat.PARQUET: + return None + return self._options + + @parquet_options.setter + def parquet_options(self, value): + if self.source_format != ExternalSourceFormat.PARQUET: + msg = f"Cannot set Parquet options, source format is {self.source_format}" + raise TypeError(msg) + self._options = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py new file mode 100644 index 000000000..2c9a2ce20 --- /dev/null +++ b/google/cloud/bigquery/format_options.py @@ -0,0 +1,80 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from typing import Dict + + +class ParquetOptions: + """Additional options if the PARQUET source format is used.""" + + _SOURCE_FORMAT = "PARQUET" + _RESOURCE_NAME = "parquetOptions" + + def __init__(self): + self._properties = {} + + @property + def enum_as_string(self) -> bool: + """Indicates whether to infer Parquet ENUM logical type as STRING instead of + BYTES by default. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enum_as_string + """ + return self._properties.get("enumAsString") + + @enum_as_string.setter + def enum_as_string(self, value: bool) -> None: + self._properties["enumAsString"] = value + + @property + def enable_list_inference(self) -> bool: + """Indicates whether to use schema inference specifically for Parquet LIST + logical type. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enable_list_inference + """ + return self._properties.get("enableListInference") + + @enable_list_inference.setter + def enable_list_inference(self, value: bool) -> None: + self._properties["enableListInference"] = value + + @classmethod + def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions": + """Factory: construct an instance from a resource dict. + + Args: + resource (Dict[str, bool]): + Definition of a :class:`~.format_options.ParquetOptions` instance in + the same representation as is returned from the API. + + Returns: + :class:`~.format_options.ParquetOptions`: + Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, bool]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index b8174af3e..41d38dd74 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -16,6 +16,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _to_schema_fields @@ -439,6 +440,26 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options + """ + prop = self._get_sub_prop("parquetOptions") + if prop is not None: + prop = ParquetOptions.from_api_repr(prop) + return prop + + @parquet_options.setter + def parquet_options(self, value): + if value is not None: + self._set_sub_prop("parquetOptions", value.to_api_repr()) + else: + self._del_sub_prop("parquetOptions") + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 63f15ec5a..b0729e428 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -700,3 +700,38 @@ def test_write_disposition_setter(self): self.assertEqual( config._properties["load"]["writeDisposition"], write_disposition ) + + def test_parquet_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.parquet_options) + + def test_parquet_options_hit(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=True, enableListInference=False + ) + self.assertTrue(config.parquet_options.enum_as_string) + self.assertFalse(config.parquet_options.enable_list_inference) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + config = self._get_target_class()() + + config.parquet_options = parquet_options + self.assertEqual( + config._properties["load"]["parquetOptions"], + {"enumAsString": False, "enableListInference": True}, + ) + + def test_parquet_options_setter_clearing(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=False, enableListInference=True + ) + + config.parquet_options = None + self.assertNotIn("parquetOptions", config._properties["load"]) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 648a8717e..7178367ea 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -425,6 +425,106 @@ def test_to_api_repr_bigtable(self): self.assertEqual(got_resource, exp_resource) + def test_parquet_options_getter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": True, "enableListInference": False} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + self.assertIsNone(ec.parquet_options.enum_as_string) + self.assertIsNone(ec.parquet_options.enable_list_inference) + + ec._options = parquet_options + + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + self.assertIs(ec.parquet_options, ec.options) + + def test_parquet_options_getter_non_parquet_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.parquet_options) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + ec.parquet_options = parquet_options + + # Setting Parquet options should be reflected in the generic options attribute. + self.assertFalse(ec.options.enum_as_string) + self.assertTrue(ec.options.enable_list_inference) + + def test_parquet_options_setter_non_parquet_format(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.parquet_options = parquet_options + + def test_from_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "PARQUET", + "parquetOptions": {"enumAsString": True, "enableListInference": False}, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, external_config.ExternalSourceFormat.PARQUET) + self.assertIsInstance(ec.options, ParquetOptions) + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + del resource["parquetOptions"]["enableListInference"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.options.enable_list_inference) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + ec._options = options + + exp_resource = { + "sourceFormat": external_config.ExternalSourceFormat.PARQUET, + "parquetOptions": {"enumAsString": False, "enableListInference": True}, + } + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, exp_resource) + def _copy_and_update(d, u): d = copy.deepcopy(d) diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py new file mode 100644 index 000000000..ab5f9e05c --- /dev/null +++ b/tests/unit/test_format_options.py @@ -0,0 +1,41 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class TestParquetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.format_options import ParquetOptions + + return ParquetOptions + + def test_ctor(self): + config = self._get_target_class()() + assert config.enum_as_string is None + assert config.enable_list_inference is None + + def test_from_api_repr(self): + config = self._get_target_class().from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + assert not config.enum_as_string + assert config.enable_list_inference + + def test_to_api_repr(self): + config = self._get_target_class()() + config.enum_as_string = True + config.enable_list_inference = False + + result = config.to_api_repr() + assert result == {"enumAsString": True, "enableListInference": False} From 102d867dab4d0ee64a4e69a8f071c0d040edfe71 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Jun 2021 09:48:10 +0200 Subject: [PATCH 229/341] chore: release 2.18.0 (#682) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a0227118..b8abc5abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) + + +### Features + +* add support for Parquet options ([#679](https://www.github.com/googleapis/python-bigquery/issues/679)) ([d792ce0](https://www.github.com/googleapis/python-bigquery/commit/d792ce09388a6ee3706777915dd2818d4c854f79)) + ## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 422b383cc..a613e5ea2 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.17.0" +__version__ = "2.18.0" From 0cf4e31288e3adea7f64b01d4ddbd3a1026bb056 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 00:02:00 +0200 Subject: [PATCH 230/341] chore(deps): update dependency pyarrow to v4.0.1 (#681) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2ed5b0df8..31a4ca5b8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.0 +pyarrow==4.0.1 pytz==2021.1 From dea2402ef62bcc00f2a392b16330a595db38ffb7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 10:29:17 +0200 Subject: [PATCH 231/341] chore(deps): update dependency google-cloud-bigquery to v2.18.0 (#683) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9fbe85970..4577dff02 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 31a4ca5b8..94fe39299 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 6 Jun 2021 10:18:13 -0400 Subject: [PATCH 232/341] feat: list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size (#686) --- google/cloud/bigquery/client.py | 39 ++- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/test_client.py | 494 ------------------------------- tests/unit/test_list_datasets.py | 124 ++++++++ tests/unit/test_list_jobs.py | 291 ++++++++++++++++++ tests/unit/test_list_models.py | 11 +- tests/unit/test_list_projects.py | 119 ++++++++ tests/unit/test_list_routines.py | 11 +- tests/unit/test_list_tables.py | 19 ++ 10 files changed, 605 insertions(+), 507 deletions(-) create mode 100644 tests/unit/test_list_datasets.py create mode 100644 tests/unit/test_list_jobs.py create mode 100644 tests/unit/test_list_projects.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 7ef3795a8..2b7a5273e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -286,6 +286,7 @@ def list_projects( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -294,8 +295,8 @@ def list_projects( Args: max_results (Optional[int]): - Maximum number of projects to return, If not passed, - defaults to a value set by the API. + Maximum number of projects to return. + Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the projects. If not passed, @@ -310,6 +311,10 @@ def list_projects( The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of projects to return in each page. + Defaults to a value set by the API. + Returns: google.api_core.page_iterator.Iterator: Iterator of :class:`~google.cloud.bigquery.client.Project` @@ -335,6 +340,7 @@ def api_request(*args, **kwargs): items_key="projects", page_token=page_token, max_results=max_results, + page_size=page_size, ) def list_datasets( @@ -346,6 +352,7 @@ def list_datasets( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -375,6 +382,8 @@ def list_datasets( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of datasets to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -414,6 +423,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: @@ -1270,6 +1280,7 @@ def list_models( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1288,7 +1299,7 @@ def list_models( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of models to return. If not passed, defaults to a + Maximum number of models to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the models. If not passed, @@ -1301,6 +1312,9 @@ def list_models( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of models to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1331,6 +1345,7 @@ def api_request(*args, **kwargs): items_key="models", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1342,6 +1357,7 @@ def list_routines( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1360,7 +1376,7 @@ def list_routines( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of routines to return. If not passed, defaults + Maximum number of routines to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the routines. If not passed, @@ -1373,6 +1389,9 @@ def list_routines( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of routines to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1403,6 +1422,7 @@ def api_request(*args, **kwargs): items_key="routines", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1414,6 +1434,7 @@ def list_tables( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1432,7 +1453,7 @@ def list_tables( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of tables to return. If not passed, defaults + Maximum number of tables to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the tables. If not passed, @@ -1445,6 +1466,9 @@ def list_tables( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of tables to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1474,6 +1498,7 @@ def api_request(*args, **kwargs): items_key="tables", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -2112,6 +2137,7 @@ def list_jobs( timeout: float = None, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, + page_size: int = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2157,6 +2183,8 @@ def list_jobs( Max value for job creation time. If set, only jobs created before or at this timestamp are returned. If the datetime has no time zone assumes UTC time. + page_size (Optional[int]): + Maximum number of jobs to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -2208,6 +2236,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def load_table_from_uri( diff --git a/setup.py b/setup.py index 6a6202ef9..963eb73ec 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 322373eba..71c9ff49a 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -google-api-core==1.23.0 +google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1346a1ef6..7a28ef248 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -471,221 +471,6 @@ def test_get_service_account_email_w_custom_retry(self): ], ) - def test_list_projects_defaults(self): - from google.cloud.bigquery.client import Project - - PROJECT_1 = "PROJECT_ONE" - PROJECT_2 = "PROJECT_TWO" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [ - { - "kind": "bigquery#project", - "id": PROJECT_1, - "numericId": 1, - "projectReference": {"projectId": PROJECT_1}, - "friendlyName": "One", - }, - { - "kind": "bigquery#project", - "id": PROJECT_2, - "numericId": 2, - "projectReference": {"projectId": PROJECT_2}, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - iterator = client.list_projects() - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), len(DATA["projects"])) - for found, expected in zip(projects, DATA["projects"]): - self.assertIsInstance(found, Project) - self.assertEqual(found.project_id, expected["id"]) - self.assertEqual(found.numeric_id, expected["numericId"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=None - ) - - def test_list_projects_w_timeout(self): - PROJECT_1 = "PROJECT_ONE" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(timeout=7.5) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=7.5 - ) - - def test_list_projects_explicit_response_missing_projects_key(self): - TOKEN = "TOKEN" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(max_results=3, page_token=TOKEN) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects", - query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, - ) - - def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import DatasetListItem - - DATASET_1 = "dataset_one" - DATASET_2 = "dataset_two" - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "datasets": [ - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_1), - "datasetReference": { - "datasetId": DATASET_1, - "projectId": self.PROJECT, - }, - "friendlyName": None, - }, - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_2), - "datasetReference": { - "datasetId": DATASET_2, - "projectId": self.PROJECT, - }, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), len(DATA["datasets"])) - for found, expected in zip(datasets, DATA["datasets"]): - self.assertIsInstance(found, DatasetListItem) - self.assertEqual(found.full_dataset_id, expected["id"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None - ) - - def test_list_datasets_w_project_and_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - list(client.list_datasets(project="other-project", timeout=7.5)) - - final_attributes.assert_called_once_with( - {"path": "/projects/other-project/datasets"}, client, None - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/datasets", - query_params={}, - timeout=7.5, - ) - - def test_list_datasets_explicit_response_missing_datasets_key(self): - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - FILTER = "FILTER" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets( - include_all=True, filter=FILTER, max_results=3, page_token=TOKEN - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "all": True, - "filter": FILTER, - "maxResults": 3, - "pageToken": TOKEN, - }, - timeout=None, - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3239,285 +3024,6 @@ def test_cancel_job_w_timeout(self): timeout=7.5, ) - def test_list_jobs_defaults(self): - from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import ExtractJob - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery.job import WriteDisposition - - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - QUERY_DESTINATION_TABLE = "query_destination_table" - SOURCE_URI = "gs://test_bucket/src_object*" - DESTINATION_URI = "gs://test_bucket/dst_object*" - JOB_TYPES = { - "load_job": LoadJob, - "copy_job": CopyJob, - "extract_job": ExtractJob, - "query_job": QueryJob, - } - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - QUERY = "SELECT * from test_dataset:test_table" - ASYNC_QUERY_DATA = { - "id": "%s:%s" % (self.PROJECT, "query_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, - "state": "DONE", - "configuration": { - "query": { - "query": QUERY, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": QUERY_DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_IF_NEEDED, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - }, - } - EXTRACT_DATA = { - "id": "%s:%s" % (self.PROJECT, "extract_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"}, - "state": "DONE", - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "destinationUris": [DESTINATION_URI], - } - }, - } - COPY_DATA = { - "id": "%s:%s" % (self.PROJECT, "copy_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"}, - "state": "DONE", - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": DESTINATION_TABLE, - }, - } - }, - } - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "sourceUris": [SOURCE_URI], - } - }, - } - DATA = { - "nextPageToken": TOKEN, - "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_load_job_wo_sourceUris(self): - from google.cloud.bigquery.job import LoadJob - - SOURCE_TABLE = "source_table" - JOB_TYPES = {"load_job": LoadJob} - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - } - }, - } - DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_explicit_missing(self): - PATH = "projects/%s/jobs" % self.PROJECT - DATA = {} - TOKEN = "TOKEN" - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs( - max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "projection": "full", - "maxResults": 1000, - "pageToken": TOKEN, - "allUsers": True, - "stateFilter": "done", - }, - timeout=None, - ) - - def test_list_jobs_w_project(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(project="other-project")) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs", - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_w_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(timeout=7.5)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/{}/jobs".format(self.PROJECT), - query_params={"projection": "full"}, - timeout=7.5, - ) - - def test_list_jobs_w_time_filter(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - # One millisecond after the unix epoch. - start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) - # One millisecond after the the 2038 31-bit signed int rollover - end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 - - list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={ - "projection": "full", - "minCreationTime": "1", - "maxCreationTime": str(end_time_millis), - }, - timeout=None, - ) - - def test_list_jobs_w_parent_job_filter(self): - from google.cloud.bigquery import job - - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}, {}) - - parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] - - for parent_job in parent_job_args: - list(client.list_jobs(parent_job=parent_job)) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, - ) - conn.api_request.reset_mock() - def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob, LoadJobConfig diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py new file mode 100644 index 000000000..7793a7ba6 --- /dev/null +++ b/tests/unit/test_list_datasets.py @@ -0,0 +1,124 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_datasets_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.dataset import DatasetListItem + + DATASET_1 = "dataset_one" + DATASET_2 = "dataset_two" + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "datasets": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_1), + "datasetReference": {"datasetId": DATASET_1, "projectId": PROJECT}, + "friendlyName": None, + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_2), + "datasetReference": {"datasetId": DATASET_2, "projectId": PROJECT}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == len(DATA["datasets"]) + for found, expected in zip(datasets, DATA["datasets"]): + assert isinstance(found, DatasetListItem) + assert found.full_dataset_id == expected["id"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params=query, timeout=None + ) + + +def test_list_datasets_w_project_and_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + list(client.list_datasets(project="other-project", timeout=7.5)) + + final_attributes.assert_called_once_with( + {"path": "/projects/other-project/datasets"}, client, None + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/datasets", + query_params={}, + timeout=7.5, + ) + + +def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + FILTER = "FILTER" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets( + include_all=True, filter=FILTER, max_results=3, page_token=TOKEN + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "all": True, + "filter": FILTER, + "maxResults": 3, + "pageToken": TOKEN, + }, + timeout=None, + ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py new file mode 100644 index 000000000..f348be724 --- /dev/null +++ b/tests/unit/test_list_jobs.py @@ -0,0 +1,291 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition + + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + QUERY_DESTINATION_TABLE = "query_destination_table" + SOURCE_URI = "gs://test_bucket/src_object*" + DESTINATION_URI = "gs://test_bucket/dst_object*" + JOB_TYPES = { + "load_job": LoadJob, + "copy_job": CopyJob, + "extract_job": ExtractJob, + "query_job": QueryJob, + } + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + QUERY = "SELECT * from test_dataset:test_table" + ASYNC_QUERY_DATA = { + "id": "%s:%s" % (PROJECT, "query_job"), + "jobReference": {"projectId": PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": QUERY_DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + }, + } + EXTRACT_DATA = { + "id": "%s:%s" % (PROJECT, "extract_job"), + "jobReference": {"projectId": PROJECT, "jobId": "extract_job"}, + "state": "DONE", + "configuration": { + "extract": { + "sourceTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "destinationUris": [DESTINATION_URI], + } + }, + } + COPY_DATA = { + "id": "%s:%s" % (PROJECT, "copy_job"), + "jobReference": {"projectId": PROJECT, "jobId": "copy_job"}, + "state": "DONE", + "configuration": { + "copy": { + "sourceTables": [ + {"projectId": PROJECT, "datasetId": DS_ID, "tableId": SOURCE_TABLE} + ], + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": DESTINATION_TABLE, + }, + } + }, + } + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "sourceUris": [SOURCE_URI], + } + }, + } + DATA = { + "nextPageToken": TOKEN, + "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params=dict({"projection": "full"}, **query), + timeout=None, + ) + + +def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): + from google.cloud.bigquery.job import LoadJob + + SOURCE_TABLE = "source_table" + JOB_TYPES = {"load_job": LoadJob} + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + } + } + }, + } + DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_explicit_missing(client, PROJECT): + PATH = "projects/%s/jobs" % PROJECT + DATA = {} + TOKEN = "TOKEN" + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs( + max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "projection": "full", + "maxResults": 1000, + "pageToken": TOKEN, + "allUsers": True, + "stateFilter": "done", + }, + timeout=None, + ) + + +def test_list_jobs_w_project(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(project="other-project")) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs", + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_w_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(timeout=7.5)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs".format(PROJECT), + query_params={"projection": "full"}, + timeout=7.5, + ) + + +def test_list_jobs_w_time_filter(client, PROJECT): + conn = client._connection = make_connection({}) + + # One millisecond after the unix epoch. + start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) + # One millisecond after the the 2038 31-bit signed int rollover + end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) + end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + + list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={ + "projection": "full", + "minCreationTime": "1", + "maxCreationTime": str(end_time_millis), + }, + timeout=None, + ) + + +def test_list_jobs_w_parent_job_filter(client, PROJECT): + from google.cloud.bigquery import job + + conn = client._connection = make_connection({}, {}) + + parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] + + for parent_job in parent_job_args: + list(client.list_jobs(parent_job=parent_job)) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={"projection": "full", "parentJobId": "parent-job-123"}, + timeout=None, + ) + conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 56aa66126..4ede9a7dd 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -33,8 +33,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): +def test_list_models_defaults( + make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, +): from google.cloud.bigquery.model import Model MODEL_1 = "model_one" @@ -64,7 +69,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I conn = client._connection = make_connection(DATA) dataset = make_dataset(PROJECT, DS_ID) - iterator = client.list_models(dataset) + iterator = client.list_models(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) models = list(page) @@ -77,7 +82,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py new file mode 100644 index 000000000..a88540dd5 --- /dev/null +++ b/tests/unit/test_list_projects.py @@ -0,0 +1,119 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_projects_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.client import Project + + PROJECT_2 = "PROJECT_TWO" + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [ + { + "kind": "bigquery#project", + "id": PROJECT, + "numericId": 1, + "projectReference": {"projectId": PROJECT}, + "friendlyName": "One", + }, + { + "kind": "bigquery#project", + "id": PROJECT_2, + "numericId": 2, + "projectReference": {"projectId": PROJECT_2}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + iterator = client.list_projects(**extra) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == len(DATA["projects"]) + for found, expected in zip(projects, DATA["projects"]): + assert isinstance(found, Project) + assert found.project_id == expected["id"] + assert found.numeric_id == expected["numericId"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params=query, timeout=None + ) + + +def test_list_projects_w_timeout(client): + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(timeout=7.5) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params={}, timeout=7.5 + ) + + +def test_list_projects_explicit_response_missing_projects_key(client): + TOKEN = "TOKEN" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(max_results=3, page_token=TOKEN) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects", + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 714ede0d4..069966542 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -34,8 +34,13 @@ def test_list_routines_empty_w_timeout(client): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): +def test_list_routines_defaults( + make_dataset, get_reference, client, PROJECT, extra, query +): from google.cloud.bigquery.routine import Routine project_id = PROJECT @@ -67,7 +72,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): conn = client._connection = make_connection(resource) dataset = make_dataset(client.project, dataset_id) - iterator = client.list_routines(dataset) + iterator = client.list_routines(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) routines = list(page) @@ -80,7 +85,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="GET", path=path, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 9acee9580..45d15bed3 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -157,3 +157,22 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): def test_list_tables_wrong_type(client): with pytest.raises(TypeError): client.list_tables(42) + + +@dataset_polymorphic +def test_list_tables_page_size(make_dataset, get_reference, client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5, page_size=42) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params=dict(maxResults=42), timeout=7.5 + ) From d034a4d34be500f665bfa75c53d1badcb6750445 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Sun, 6 Jun 2021 08:57:21 -0600 Subject: [PATCH 233/341] chore: release 2.19.0 (#688) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8abc5abb..a7d62cd36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) + + +### Features + +* list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size ([#686](https://www.github.com/googleapis/python-bigquery/issues/686)) ([1f1c4b7](https://www.github.com/googleapis/python-bigquery/commit/1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06)) + ## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a613e5ea2..2605c08a3 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.18.0" +__version__ = "2.19.0" From 1259e16394784315368e8be959c1ac097782b62e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 7 Jun 2021 14:55:58 +0200 Subject: [PATCH 234/341] feat: support script options in query job config (#690) --- google/cloud/bigquery/__init__.py | 4 ++ google/cloud/bigquery/enums.py | 13 ++++ google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 96 ++++++++++++++++++++++++++- tests/unit/job/test_query_config.py | 56 ++++++++++++++++ 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index f031cd81d..94f87304a 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -62,6 +63,7 @@ from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority from google.cloud.bigquery.job import SchemaUpdateOption +from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition @@ -138,6 +140,7 @@ "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", + "ScriptOptions", "DEFAULT_RETRY", # Enum Constants "enums", @@ -147,6 +150,7 @@ "DeterminismLevel", "ExternalSourceFormat", "Encoding", + "KeyResultStatementKind", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 787c2449d..edf991b6f 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -142,6 +142,19 @@ class SourceFormat(object): """Specifies Orc format.""" +class KeyResultStatementKind: + """Determines which statement in the script represents the "key result". + + The "key result" is used to populate the schema and query results of the script job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#keyresultstatementkind + """ + + KEY_RESULT_STATEMENT_KIND_UNSPECIFIED = "KEY_RESULT_STATEMENT_KIND_UNSPECIFIED" + LAST = "LAST" + FIRST_SELECT = "FIRST_SELECT" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4945841d9..cdab92e05 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition @@ -67,6 +68,7 @@ "QueryJobConfig", "QueryPlanEntry", "QueryPlanEntryStep", + "ScriptOptions", "TimelineEntry", "Compression", "CreateDisposition", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f52f9c621..455ef4632 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -18,7 +18,7 @@ import copy import re import typing -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -28,6 +28,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr @@ -113,6 +114,82 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class ScriptOptions: + """Options controlling the execution of scripts. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ScriptOptions + """ + + def __init__( + self, + statement_timeout_ms: Optional[int] = None, + statement_byte_budget: Optional[int] = None, + key_result_statement: Optional[KeyResultStatementKind] = None, + ): + self._properties = {} + self.statement_timeout_ms = statement_timeout_ms + self.statement_byte_budget = statement_byte_budget + self.key_result_statement = key_result_statement + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "ScriptOptions": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: Any]): + ScriptOptions representation returned from API. + + Returns: + google.cloud.bigquery.ScriptOptions: + ScriptOptions sample parsed from ``resource``. + """ + entry = cls() + entry._properties = copy.deepcopy(resource) + return entry + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation.""" + return copy.deepcopy(self._properties) + + @property + def statement_timeout_ms(self) -> Union[int, None]: + """Timeout period for each statement in a script.""" + return _helpers._int_or_none(self._properties.get("statementTimeoutMs")) + + @statement_timeout_ms.setter + def statement_timeout_ms(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementTimeoutMs"] = value + + @property + def statement_byte_budget(self) -> Union[int, None]: + """Limit on the number of bytes billed per statement. + + Exceeding this budget results in an error. + """ + return _helpers._int_or_none(self._properties.get("statementByteBudget")) + + @statement_byte_budget.setter + def statement_byte_budget(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementByteBudget"] = value + + @property + def key_result_statement(self) -> Union[KeyResultStatementKind, None]: + """Determines which statement in the script represents the "key result". + + This is used to populate the schema and query results of the script job. + Default is ``KeyResultStatementKind.LAST``. + """ + return self._properties.get("keyResultStatement") + + @key_result_statement.setter + def key_result_statement(self, value: Union[KeyResultStatementKind, None]): + self._properties["keyResultStatement"] = value + + class QueryJobConfig(_JobConfig): """Configuration options for query jobs. @@ -502,6 +579,23 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) + @property + def script_options(self) -> ScriptOptions: + """Connection properties which can modify the query behavior. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions + """ + prop = self._get_sub_prop("scriptOptions") + if prop is not None: + prop = ScriptOptions.from_api_repr(prop) + return prop + + @script_options.setter + def script_options(self, value: Union[ScriptOptions, None]): + if value is not None: + value = value.to_api_repr() + self._set_sub_prop("scriptOptions", value) + def to_api_repr(self) -> dict: """Build an API representation of the query job config. diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index db03d6a3b..109cf7e44 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -253,3 +253,59 @@ def test_from_api_repr_with_encryption(self): self.assertEqual( config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME ) + + def test_to_api_repr_with_script_options_none(self): + config = self._make_one() + config.script_options = None + + resource = config.to_api_repr() + + self.assertEqual(resource, {"query": {"scriptOptions": None}}) + self.assertIsNone(config.script_options) + + def test_to_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + config = self._make_one() + config.script_options = ScriptOptions( + statement_timeout_ms=60, + statement_byte_budget=999, + key_result_statement=KeyResultStatementKind.FIRST_SELECT, + ) + + resource = config.to_api_repr() + + expected_script_options_repr = { + "statementTimeoutMs": "60", + "statementByteBudget": "999", + "keyResultStatement": KeyResultStatementKind.FIRST_SELECT, + } + self.assertEqual( + resource, {"query": {"scriptOptions": expected_script_options_repr}} + ) + + def test_from_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + resource = { + "query": { + "scriptOptions": { + "statementTimeoutMs": "42", + "statementByteBudget": "123", + "keyResultStatement": KeyResultStatementKind.LAST, + }, + }, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + script_options = config.script_options + self.assertIsInstance(script_options, ScriptOptions) + self.assertEqual(script_options.statement_timeout_ms, 42) + self.assertEqual(script_options.statement_byte_budget, 123) + self.assertEqual( + script_options.key_result_statement, KeyResultStatementKind.LAST + ) From ed4286dcd39f8d60c2d6552b89bc12352b9bea91 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 7 Jun 2021 14:56:55 +0200 Subject: [PATCH 235/341] chore(deps): update dependency google-cloud-bigquery to v2.19.0 (#691) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4577dff02..b4dae32e9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 94fe39299..0188bde52 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 23140d943fc06652a19fa8866ccf6d8ba126318d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Jun 2021 15:30:36 +0200 Subject: [PATCH 236/341] chore: release 2.20.0 (#693) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7d62cd36..b08cd98c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) + + +### Features + +* support script options in query job config ([#690](https://www.github.com/googleapis/python-bigquery/issues/690)) ([1259e16](https://www.github.com/googleapis/python-bigquery/commit/1259e16394784315368e8be959c1ac097782b62e)) + ## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2605c08a3..9fea4fece 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.19.0" +__version__ = "2.20.0" From 790d11bdbbf50d2f30c09ba4660d420ba2212866 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Jun 2021 13:38:51 +0200 Subject: [PATCH 237/341] chore(deps): update dependency google-cloud-bigquery to v2.20.0 (#694) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b4dae32e9..80fa8e454 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0188bde52..391c85ae3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 8cfdda8e30a50780ea27bc515a43502fd402280c Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 16 Jun 2021 07:17:37 -0400 Subject: [PATCH 238/341] chore: pin sphinx to version 4.0.1 (#701) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: pin sphinx to version 4.0.1 Fixes #700 * 🦉 Updates from OwlBot Co-authored-by: Owl Bot --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index a52025635..662abbd78 100644 --- a/noxfile.py +++ b/noxfile.py @@ -271,7 +271,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme") + session.install("ipython", "recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -295,7 +295,9 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") + session.install( + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From b85c8d36e94e0aaf80ba3830ec767eea7153f14c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 17 Jun 2021 06:02:02 +0000 Subject: [PATCH 239/341] chore: remove u'' prefixes from strings in docs/conf.py (#702) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index da616c91a..ea06d395e 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 + digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce diff --git a/docs/conf.py b/docs/conf.py index 1275fe3f1..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -80,9 +80,9 @@ master_doc = "index" # General information about the project. -project = u"google-cloud-bigquery" -copyright = u"2019, Google" -author = u"Google APIs" +project = "google-cloud-bigquery" +copyright = "2019, Google" +author = "Google APIs" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -282,7 +282,7 @@ ( master_doc, "google-cloud-bigquery.tex", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "manual", ) @@ -317,7 +317,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", [author], 1, ) @@ -336,7 +336,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "google-cloud-bigquery", "google-cloud-bigquery Library", From b2a689b05b4fdc3fc10767f44534d183137b9d21 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 17 Jun 2021 22:59:26 +0200 Subject: [PATCH 240/341] test: add column ACLs test with real policy tag (#678) * test: add column ACLs test with real policy tag * Use v1 version of the datacatalog client * Install datacatalog in pre-releease tests * Adjust test to actually make it work * Make sure taxonomy is properly cleaned up --- noxfile.py | 4 +++ tests/system/test_client.py | 68 +++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/noxfile.py b/noxfile.py index 662abbd78..0dfe7bf93 100644 --- a/noxfile.py +++ b/noxfile.py @@ -142,6 +142,9 @@ def system(session): else: session.install("google-cloud-storage", "-c", constraints_path) + # Data Catalog needed for the column ACL test with a real Policy Tag. + session.install("google-cloud-datacatalog", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -211,6 +214,7 @@ def prerelease_deps(session): session.install("--pre", "grpcio", "pandas") session.install( "freezegun", + "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", "IPython", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index b4b0c053d..f91004eac 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -68,6 +68,8 @@ from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums from google.cloud import storage +from google.cloud.datacatalog_v1 import types as datacatalog_types +from google.cloud.datacatalog_v1 import PolicyTagManagerClient from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -167,6 +169,8 @@ def setUp(self): self.to_delete = [dataset] def tearDown(self): + policy_tag_client = PolicyTagManagerClient() + def _still_in_use(bad_request): return any( error["reason"] == "resourceInUse" for error in bad_request._errors @@ -183,6 +187,8 @@ def _still_in_use(bad_request): retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) + elif isinstance(doomed, datacatalog_types.Taxonomy): + policy_tag_client.delete_taxonomy(name=doomed.name) else: doomed.delete() @@ -381,6 +387,68 @@ def test_create_table_with_policy(self): table2 = Config.CLIENT.update_table(table, ["schema"]) self.assertEqual(policy_2, table2.schema[1].policy_tags) + def test_create_table_with_real_custom_policy(self): + from google.cloud.bigquery.schema import PolicyTagList + + policy_tag_client = PolicyTagManagerClient() + taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" + + new_taxonomy = datacatalog_types.Taxonomy( + display_name="Custom test taxonomy", + description="This taxonomy is ony used for a test.", + activated_policy_types=[ + datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL + ], + ) + + taxonomy = policy_tag_client.create_taxonomy( + parent=taxonomy_parent, taxonomy=new_taxonomy + ) + self.to_delete.insert(0, taxonomy) + + parent_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Parent policy tag", parent_policy_tag=None + ), + ) + child_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Child policy tag", + parent_policy_tag=parent_policy_tag.name, + ), + ) + + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_real_custom_policy") + ) + table_id = "test_table" + policy_1 = PolicyTagList(names=[parent_policy_tag.name]) + policy_2 = PolicyTagList(names=[child_policy_tag.name]) + + schema = [ + bigquery.SchemaField( + "first_name", "STRING", mode="REQUIRED", policy_tags=policy_1 + ), + bigquery.SchemaField( + "age", "INTEGER", mode="REQUIRED", policy_tags=policy_2 + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertCountEqual( + list(table.schema[0].policy_tags.names), [parent_policy_tag.name] + ) + self.assertCountEqual( + list(table.schema[1].policy_tags.names), [child_policy_tag.name] + ) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType From 9cdeee0255f62cab0d0394430ff211403773d0a0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 18 Jun 2021 22:35:35 +0200 Subject: [PATCH 241/341] chore: resolve deprecation warnings from sys tests (#705) --- tests/system/test_client.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f91004eac..c4caadbe9 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1123,7 +1123,7 @@ def test_extract_table(self): job.result(timeout=100) self.to_delete.insert(0, destination) - got_bytes = retry_storage_errors(destination.download_as_string)() + got_bytes = retry_storage_errors(destination.download_as_bytes)() got = got_bytes.decode("utf-8") self.assertIn("Bharney Rhubble", got) @@ -2178,15 +2178,11 @@ def test_nested_table_to_arrow(self): self.assertEqual(tbl.num_rows, 1) self.assertEqual(tbl.num_columns, 3) # Columns may not appear in the requested order. - self.assertTrue( - pyarrow.types.is_float64(tbl.schema.field_by_name("float_col").type) - ) - self.assertTrue( - pyarrow.types.is_string(tbl.schema.field_by_name("string_col").type) - ) - record_col = tbl.schema.field_by_name("record_col").type + self.assertTrue(pyarrow.types.is_float64(tbl.schema.field("float_col").type)) + self.assertTrue(pyarrow.types.is_string(tbl.schema.field("string_col").type)) + record_col = tbl.schema.field("record_col").type self.assertTrue(pyarrow.types.is_struct(record_col)) - self.assertEqual(record_col.num_children, 2) + self.assertEqual(record_col.num_fields, 2) self.assertEqual(record_col[0].name, "nested_string") self.assertTrue(pyarrow.types.is_string(record_col[0].type)) self.assertEqual(record_col[1].name, "nested_repeated") From 27d6839ee8a40909e4199cfa0da8b6b64705b2e9 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 19 Jun 2021 02:26:03 +0000 Subject: [PATCH 242/341] docs: omit mention of Python 2.7 in `CONTRIBUTING.rst` (#706) Source-Link: https://github.com/googleapis/synthtool/commit/b91f129527853d5b756146a0b5044481fb4e09a8 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index ea06d395e..cc49c6a3d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce + digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 20ba9e62e..a9b389e83 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -69,7 +69,6 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: - $ nox -s unit-2.7 $ nox -s unit-3.8 $ ... @@ -144,7 +143,6 @@ Running System Tests # Run all system tests $ nox -s system-3.8 - $ nox -s system-2.7 # Run a single system test $ nox -s system-3.8 -- -k @@ -152,9 +150,8 @@ Running System Tests .. note:: - System tests are only configured to run under Python 2.7 and - Python 3.8. For expediency, we do not run them in older versions - of Python 3. + System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to From 0b20015c1727a2d7cd4234b18210db8d04d7ca77 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:04:03 +0000 Subject: [PATCH 243/341] chore: update precommit hook pre-commit/pre-commit-hooks to v4 (#1083) (#709) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pre-commit/pre-commit-hooks](https://togithub.com/pre-commit/pre-commit-hooks) | repository | major | `v3.4.0` -> `v4.0.1` | --- ### Release Notes
pre-commit/pre-commit-hooks ### [`v4.0.1`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.1) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v4.0.0...v4.0.1) ##### Fixes - `check-shebang-scripts-are-executable` fix entry point. - [#​602](https://togithub.com/pre-commit/pre-commit-hooks/issues/602) issue by [@​Person-93](https://togithub.com/Person-93). - [#​603](https://togithub.com/pre-commit/pre-commit-hooks/issues/603) PR by [@​scop](https://togithub.com/scop). ### [`v4.0.0`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.0) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v3.4.0...v4.0.0) ##### Features - `check-json`: report duplicate keys. - [#​558](https://togithub.com/pre-commit/pre-commit-hooks/issues/558) PR by [@​AdityaKhursale](https://togithub.com/AdityaKhursale). - [#​554](https://togithub.com/pre-commit/pre-commit-hooks/issues/554) issue by [@​adamchainz](https://togithub.com/adamchainz). - `no-commit-to-branch`: add `main` to default blocked branches. - [#​565](https://togithub.com/pre-commit/pre-commit-hooks/issues/565) PR by [@​ndevenish](https://togithub.com/ndevenish). - `check-case-conflict`: check conflicts in directory names as well. - [#​575](https://togithub.com/pre-commit/pre-commit-hooks/issues/575) PR by [@​slsyy](https://togithub.com/slsyy). - [#​70](https://togithub.com/pre-commit/pre-commit-hooks/issues/70) issue by [@​andyjack](https://togithub.com/andyjack). - `check-vcs-permalinks`: forbid other branch names. - [#​582](https://togithub.com/pre-commit/pre-commit-hooks/issues/582) PR by [@​jack1142](https://togithub.com/jack1142). - [#​581](https://togithub.com/pre-commit/pre-commit-hooks/issues/581) issue by [@​jack1142](https://togithub.com/jack1142). - `check-shebang-scripts-are-executable`: new hook which ensures shebang'd scripts are executable. - [#​545](https://togithub.com/pre-commit/pre-commit-hooks/issues/545) PR by [@​scop](https://togithub.com/scop). ##### Fixes - `check-executables-have-shebangs`: Short circuit shebang lookup on windows. - [#​544](https://togithub.com/pre-commit/pre-commit-hooks/issues/544) PR by [@​scop](https://togithub.com/scop). - `requirements-txt-fixer`: Fix comments which have indentation - [#​549](https://togithub.com/pre-commit/pre-commit-hooks/issues/549) PR by [@​greshilov](https://togithub.com/greshilov). - [#​548](https://togithub.com/pre-commit/pre-commit-hooks/issues/548) issue by [@​greshilov](https://togithub.com/greshilov). - `pretty-format-json`: write to stdout using UTF-8 encoding. - [#​571](https://togithub.com/pre-commit/pre-commit-hooks/issues/571) PR by [@​jack1142](https://togithub.com/jack1142). - [#​570](https://togithub.com/pre-commit/pre-commit-hooks/issues/570) issue by [@​jack1142](https://togithub.com/jack1142). - Use more inclusive language. - [#​599](https://togithub.com/pre-commit/pre-commit-hooks/issues/599) PR by [@​asottile](https://togithub.com/asottile). ##### Breaking changes - Remove deprecated hooks: `flake8`, `pyflakes`, `autopep8-wrapper`. - [#​597](https://togithub.com/pre-commit/pre-commit-hooks/issues/597) PR by [@​asottile](https://togithub.com/asottile).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Link: https://github.com/googleapis/synthtool/commit/333fd90856f1454380514bc59fc0936cdaf1c202 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 --- .github/.OwlBot.lock.yaml | 2 +- .pre-commit-config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cc49c6a3d..9602d5405 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd + digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f00c7cff..62eb5a77d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: trailing-whitespace - id: end-of-file-fixer From 18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 21 Jun 2021 16:18:27 +0200 Subject: [PATCH 244/341] feat: enable unsetting policy tags on schema fields (#703) * feat: enable unsetting policy tags on schema fields * Adjust API representation for STRUCT schema fields * De-dup logic for converting None policy tags --- google/cloud/bigquery/schema.py | 45 +++++++-- tests/system/test_client.py | 50 ++++++++++ tests/unit/job/test_load_config.py | 4 + tests/unit/test_client.py | 49 +++++++-- tests/unit/test_external_config.py | 9 +- tests/unit/test_schema.py | 154 +++++++++++++++++++++++++---- 6 files changed, 279 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 919d78b23..157db7ce6 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,6 +15,7 @@ """Schemas for BigQuery tables / queries.""" import collections +from typing import Optional from google.cloud.bigquery_v2 import types @@ -105,7 +106,26 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length self._fields = tuple(fields) - self._policy_tags = policy_tags + + self._policy_tags = self._determine_policy_tags(field_type, policy_tags) + + @staticmethod + def _determine_policy_tags( + field_type: str, given_policy_tags: Optional["PolicyTagList"] + ) -> Optional["PolicyTagList"]: + """Return the given policy tags, or their suitable representation if `None`. + + Args: + field_type: The type of the schema field. + given_policy_tags: The policy tags to maybe ajdust. + """ + if given_policy_tags is not None: + return given_policy_tags + + if field_type is not None and field_type.upper() in _STRUCT_TYPES: + return None + + return PolicyTagList() @staticmethod def __get_int(api_repr, name): @@ -126,18 +146,24 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ + field_type = api_repr["type"].upper() + # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = cls._determine_policy_tags( + field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) + ) + return cls( - field_type=api_repr["type"].upper(), + field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, name=api_repr["name"], - policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + policy_tags=policy_tags, precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), @@ -218,9 +244,9 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - - # If this contains a policy tag definition, include that as well: - if self.policy_tags is not None: + else: + # Explicitly include policy tag definition (we must not do it for RECORD + # fields, because those are not leaf fields). answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. @@ -244,6 +270,11 @@ def _key(self): field_type = f"{field_type}({self.precision}, {self.scale})" else: field_type = f"{field_type}({self.precision})" + + policy_tags = ( + () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + ) + return ( self.name, field_type, @@ -251,7 +282,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, - self._policy_tags, + policy_tags, ) def to_standard_sql(self) -> types.StandardSqlField: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c4caadbe9..ce3021399 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -653,6 +653,56 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_unset_table_schema_attributes(self): + from google.cloud.bigquery.schema import PolicyTagList + + dataset = self.temp_dataset(_make_dataset_id("unset_policy_tags")) + table_id = "test_table" + policy_tags = PolicyTagList( + names=[ + "projects/{}/locations/us/taxonomies/1/policyTags/2".format( + Config.CLIENT.project + ), + ] + ) + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField( + "secret_int", + "INTEGER", + mode="REQUIRED", + description="This field is numeric", + policy_tags=policy_tags, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(policy_tags, table.schema[1].policy_tags) + + # Amend the schema to replace the policy tags + new_schema = table.schema[:] + old_field = table.schema[1] + new_schema[1] = bigquery.SchemaField( + name=old_field.name, + field_type=old_field.field_type, + mode=old_field.mode, + description=None, + fields=old_field.fields, + policy_tags=None, + ) + + table.schema = new_schema + updated_table = Config.CLIENT.update_table(table, ["schema"]) + + self.assertFalse(updated_table.schema[1].description) # Empty string or None. + self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index b0729e428..eafe7e046 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -434,11 +434,13 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -451,11 +453,13 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 7a28ef248..f6811e207 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1019,8 +1019,18 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query}, @@ -1054,8 +1064,18 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2000,12 +2020,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2047,12 +2069,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2173,14 +2197,21 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", + "policyTags": {"names": []}, + }, + { + "name": "country", + "type": "STRING", + "mode": "NULLABLE", + "policyTags": {"names": []}, }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6516,10 +6547,10 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) + assert field["policyTags"]["names"] == [] # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field - assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -7718,18 +7749,21 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -7762,18 +7796,21 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7178367ea..393df931e 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,7 +78,14 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + } + ] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 29c3bace5..d0b5ca54c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud.bigquery.schema import PolicyTagList import unittest import mock @@ -41,6 +42,7 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, PolicyTagList()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") @@ -104,7 +106,14 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "policyTags": {"names": []}, + } + ], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -404,6 +413,23 @@ def test___eq___hit_w_fields(self): other = self._make_one("test", "RECORD", fields=[sub1, sub2]) self.assertEqual(field, other) + def test___eq___hit_w_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["bar", "foo"]), + ) + self.assertEqual(field, other) # Policy tags order does not matter. + def test___ne___wrong_type(self): field = self._make_one("toast", "INTEGER") other = object() @@ -426,6 +452,23 @@ def test___ne___different_values(self): ) self.assertNotEqual(field1, field2) + def test___ne___different_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "baz"]), + ) + self.assertNotEqual(field, other) + def test___hash__set_equality(self): sub1 = self._make_one("sub1", "STRING") sub2 = self._make_one("sub2", "STRING") @@ -446,7 +489,7 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), ())" self.assertEqual(repr(field1), expected) @@ -524,10 +567,22 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( - resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} + resource[1], + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) def test_w_description(self): @@ -553,11 +608,18 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, + "policyTags": {"names": []}, }, ) self.assertEqual( resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + "policyTags": {"names": []}, + }, ) def test_w_subfields(self): @@ -572,7 +634,13 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( resource[1], @@ -581,8 +649,18 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "type", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "number", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ], }, ) @@ -794,43 +872,83 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE"), + dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + dict( + name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE"), + dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="STRING", max_length=9), - dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="STRING", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE"), + dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="BYTES", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ], ) From ebb066f90ed3b35da0bc9f5f77e65cde35987d67 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 22 Jun 2021 18:30:12 +0000 Subject: [PATCH 245/341] chore: add kokoro 3.9 config templates (#712) Source-Link: https://github.com/googleapis/synthtool/commit/b0eb8a8b30b46a3c98d23c23107acb748c6601a1 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/samples/python3.9/common.cfg | 40 +++++++++++++++++++++ .kokoro/samples/python3.9/continuous.cfg | 6 ++++ .kokoro/samples/python3.9/periodic-head.cfg | 11 ++++++ .kokoro/samples/python3.9/periodic.cfg | 6 ++++ .kokoro/samples/python3.9/presubmit.cfg | 6 ++++ 6 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 .kokoro/samples/python3.9/common.cfg create mode 100644 .kokoro/samples/python3.9/continuous.cfg create mode 100644 .kokoro/samples/python3.9/periodic-head.cfg create mode 100644 .kokoro/samples/python3.9/periodic.cfg create mode 100644 .kokoro/samples/python3.9/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9602d5405..0954585f2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 + digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 diff --git a/.kokoro/samples/python3.9/common.cfg b/.kokoro/samples/python3.9/common.cfg new file mode 100644 index 000000000..f179577a5 --- /dev/null +++ b/.kokoro/samples/python3.9/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.9" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py39" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.9/continuous.cfg b/.kokoro/samples/python3.9/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.9/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.9/periodic-head.cfg b/.kokoro/samples/python3.9/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.9/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.9/periodic.cfg b/.kokoro/samples/python3.9/periodic.cfg new file mode 100644 index 000000000..50fec9649 --- /dev/null +++ b/.kokoro/samples/python3.9/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.9/presubmit.cfg b/.kokoro/samples/python3.9/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.9/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file From 92fbd4ade37e0be49dc278080ef73c83eafeea18 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 23 Jun 2021 20:24:30 +0000 Subject: [PATCH 246/341] feat: add always_use_jwt_access (#714) ... chore: update gapic-generator-ruby to the latest commit chore: release gapic-generator-typescript 1.5.0 Committer: @miraleung PiperOrigin-RevId: 380641501 Source-Link: https://github.com/googleapis/googleapis/commit/076f7e9f0b258bdb54338895d7251b202e8f0de3 Source-Link: https://github.com/googleapis/googleapis-gen/commit/27e4c88b4048e5f56508d4e1aa417d60a3380892 --- .coveragerc | 1 - 1 file changed, 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 23861a8eb..33ea00ba9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,7 +2,6 @@ branch = True [report] -fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 5e9494eb51ca5d31b7277f9f5d6d2d58ea2dd018 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 08:08:14 -0400 Subject: [PATCH 247/341] chore: pin sphinx plugin version to working one (#715) --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..2bc2afde1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From ab4921347972256dbf5a9737dd42f32e90a38cad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 24 Jun 2021 20:07:55 +0200 Subject: [PATCH 248/341] chore(deps): update dependency grpcio to v1.38.1 (#713) Co-authored-by: Anthonios Partheniou --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 391c85ae3..669b3ac85 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.0 +grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From b35e1ad47a93ff0997a78496cc59fcdd91345643 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 16:10:31 -0400 Subject: [PATCH 249/341] Revert "chore: pin sphinx plugin version to working one (#715)" (#719) This reverts commit 5e9494eb51ca5d31b7277f9f5d6d2d58ea2dd018. --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2bc2afde1..0dfe7bf93 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From 2a9618f4daaa4a014161e1a2f7376844eec9e8da Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 25 Jun 2021 08:56:40 +0200 Subject: [PATCH 250/341] feat: add max_results parameter to some of the QueryJob methods (#698) * feat: add max_results to a few QueryJob methods It is now possible to cap the number of result rows returned when invoking `to_dataframe()` or `to_arrow()` method on a `QueryJob` instance. * Work around a pytype complaint * Make _EmptyRowIterator a subclass of RowIterator Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- google/cloud/bigquery/_tqdm_helpers.py | 37 +++++++- google/cloud/bigquery/job/query.py | 22 ++++- google/cloud/bigquery/table.py | 53 ++++++++++- tests/unit/job/test_query_pandas.py | 101 ++++++++++++++++++++- tests/unit/test_signature_compatibility.py | 31 +++++-- tests/unit/test_table.py | 19 ++++ 6 files changed, 240 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index 2fcf2a981..99e720e2b 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -16,6 +16,8 @@ import concurrent.futures import time +import typing +from typing import Optional import warnings try: @@ -23,6 +25,10 @@ except ImportError: # pragma: NO COVER tqdm = None +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud.bigquery import QueryJob + from google.cloud.bigquery.table import RowIterator + _NO_TQDM_ERROR = ( "A progress bar was requested, but there was an error loading the tqdm " "library. Please install tqdm to use the progress bar functionality." @@ -32,7 +38,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): - """Construct a tqdm progress bar object, if tqdm is .""" + """Construct a tqdm progress bar object, if tqdm is installed.""" if tqdm is None: if progress_bar_type is not None: warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) @@ -53,16 +59,34 @@ def get_progress_bar(progress_bar_type, description, total, unit): return None -def wait_for_query(query_job, progress_bar_type=None): - """Return query result and display a progress bar while the query running, if tqdm is installed.""" +def wait_for_query( + query_job: "QueryJob", + progress_bar_type: Optional[str] = None, + max_results: Optional[int] = None, +) -> "RowIterator": + """Return query result and display a progress bar while the query running, if tqdm is installed. + + Args: + query_job: + The job representing the execution of the query on the server. + progress_bar_type: + The type of progress bar to use to show query progress. + max_results: + The maximum number of rows the row iterator should return. + + Returns: + A row iterator over the query results. + """ default_total = 1 current_stage = None start_time = time.time() + progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) if progress_bar is None: - return query_job.result() + return query_job.result(max_results=max_results) + i = 0 while True: if query_job.query_plan: @@ -75,7 +99,9 @@ def wait_for_query(query_job, progress_bar_type=None): ), ) try: - query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + query_result = query_job.result( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results + ) progress_bar.update(default_total) progress_bar.set_description( "Query complete after {:0.2f}s".format(time.time() - start_time), @@ -89,5 +115,6 @@ def wait_for_query(query_job, progress_bar_type=None): progress_bar.update(i + 1) i += 1 continue + progress_bar.close() return query_result diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 455ef4632..6ff9f2647 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1300,12 +1300,14 @@ def result( return rows # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() + # changes to table.RowIterator.to_arrow(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_arrow( self, progress_bar_type: str = None, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, create_bqstorage_client: bool = True, + max_results: Optional[int] = None, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1349,6 +1351,11 @@ def to_arrow( ..versionadded:: 1.24.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: pyarrow.Table A :class:`pyarrow.Table` populated with row data and column @@ -1361,7 +1368,7 @@ def to_arrow( ..versionadded:: 1.17.0 """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, @@ -1369,7 +1376,8 @@ def to_arrow( ) # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_dataframe( self, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, @@ -1377,6 +1385,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + max_results: Optional[int] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1423,6 +1432,11 @@ def to_dataframe( ..versionadded:: 1.26.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived @@ -1431,7 +1445,7 @@ def to_dataframe( Raises: ValueError: If the `pandas` library cannot be imported. """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b12209252..a1c13c85d 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -22,7 +22,7 @@ import operator import pytz import typing -from typing import Any, Dict, Iterable, Tuple +from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings try: @@ -1415,7 +1415,9 @@ class RowIterator(HTTPIterator): """A class for iterating through HTTP/JSON API row list responses. Args: - client (google.cloud.bigquery.Client): The API client. + client (Optional[google.cloud.bigquery.Client]): + The API client instance. This should always be non-`None`, except for + subclasses that do not use it, namely the ``_EmptyRowIterator``. api_request (Callable[google.cloud._http.JSONConnection.api_request]): The function to use to make API requests. path (str): The method path to query for the list of items. @@ -1480,7 +1482,7 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False - self._project = client.project + self._project = client.project if client is not None else None self._schema = schema self._selected_fields = selected_fields self._table = table @@ -1895,7 +1897,7 @@ def to_dataframe( return df -class _EmptyRowIterator(object): +class _EmptyRowIterator(RowIterator): """An empty row iterator. This class prevents API requests when there are no rows to fetch or rows @@ -1907,6 +1909,18 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 + def __init__( + self, client=None, api_request=None, path=None, schema=(), *args, **kwargs + ): + super().__init__( + client=client, + api_request=api_request, + path=path, + schema=schema, + *args, + **kwargs, + ) + def to_arrow( self, progress_bar_type=None, @@ -1951,6 +1965,37 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_dataframe_iterable( + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + dtypes: Optional[Dict[str, Any]] = None, + max_queue_size: Optional[int] = None, + ) -> Iterator["pandas.DataFrame"]: + """Create an iterable of pandas DataFrames, to process the table as a stream. + + ..versionadded:: 2.21.0 + + Args: + bqstorage_client: + Ignored. Added for compatibility with RowIterator. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + Ignored. Added for compatibility with RowIterator. + + max_queue_size: + Ignored. Added for compatibility with RowIterator. + + Returns: + An iterator yielding a single empty :class:`~pandas.DataFrame`. + + Raises: + ValueError: + If the :mod:`pandas` library cannot be imported. + """ + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) + return iter((pandas.DataFrame(),)) + def __iter__(self): return iter(()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 0f9623203..c537802f4 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -238,6 +238,41 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow_max_results_no_progress_bar(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + + connection = _make_connection({}) + client = _make_client(connection=connection) + begun_resource = _make_job_resource(job_type="query") + job = target_class.from_api_repr(begun_resource, client) + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, + ) + with result_patch as result_patch_tqdm: + tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) + + result_patch_tqdm.assert_called_once_with(max_results=123) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): @@ -290,7 +325,9 @@ def test_to_arrow_w_tqdm_w_query_plan(): assert result_patch_tqdm.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -341,7 +378,9 @@ def test_to_arrow_w_tqdm_w_pending_status(): assert result_patch_tqdm.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -716,7 +755,9 @@ def test_to_dataframe_w_tqdm_pending(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -774,4 +815,56 @@ def test_to_dataframe_w_tqdm(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm_max_results(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + job.to_dataframe( + progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 + ) + + assert result_patch_tqdm.call_count == 2 + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 + ) diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py index e5016b0e5..07b823e2c 100644 --- a/tests/unit/test_signature_compatibility.py +++ b/tests/unit/test_signature_compatibility.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import OrderedDict import inspect import pytest @@ -32,12 +33,30 @@ def row_iterator_class(): def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_arrow) - sig2 = inspect.signature(row_iterator_class.to_arrow) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_arrow) + iterator_sig = inspect.signature(row_iterator_class.to_arrow) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_dataframe) - sig2 = inspect.signature(row_iterator_class.to_dataframe) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_dataframe) + iterator_sig = inspect.signature(row_iterator_class.to_dataframe) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0f2ab00c1..f4038835c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1571,6 +1571,25 @@ def test_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows + @mock.patch("google.cloud.bigquery.table.pandas", new=None) + def test_to_dataframe_iterable_error_if_pandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_dataframe_iterable() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable(self): + row_iterator = self._make_one() + df_iter = row_iterator.to_dataframe_iterable() + + result = list(df_iter) + + self.assertEqual(len(result), 1) + df = result[0] + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # Verify the number of rows. + self.assertEqual(len(df.columns), 0) + class TestRowIterator(unittest.TestCase): def _class_under_test(self): From 36b6a852a59d7022d85446b013b535e6d705f4c3 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 25 Jun 2021 16:59:16 -0400 Subject: [PATCH 251/341] Revert "feat: add always_use_jwt_access (#714)" (#722) This reverts commit 92fbd4ade37e0be49dc278080ef73c83eafeea18. --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 33ea00ba9..23861a8eb 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,7 @@ branch = True [report] +fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 3c355909b4c9e7666e047eff983e682b3f8fd348 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 25 Jun 2021 22:52:15 +0000 Subject: [PATCH 252/341] chore(python): simplify nox steps in CONTRIBUTING.rst (#721) Source-Link: https://github.com/googleapis/synthtool/commit/26558bae8976a985d73c2d98c31d8612273f907d Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 0954585f2..e2b39f946 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 + digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a9b389e83..102355b3a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -68,14 +68,12 @@ Using ``nox`` We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: + $ nox -s unit - $ nox -s unit-3.8 - $ ... +- To run a single unit test:: -- Args to pytest can be passed through the nox command separated by a `--`. For - example, to run a single test:: + $ nox -s unit-3.9 -- -k - $ nox -s unit-3.8 -- -k .. note:: @@ -142,7 +140,7 @@ Running System Tests - To run system tests, you can execute:: # Run all system tests - $ nox -s system-3.8 + $ nox -s system # Run a single system test $ nox -s system-3.8 -- -k @@ -215,8 +213,8 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -We also explicitly decided to support Python 3 beginning with version -3.6. Reasons for this include: +We also explicitly decided to support Python 3 beginning with version 3.6. +Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ From ddd9ce714ebc496e22bc122e89f3bbee36ea15be Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 28 Jun 2021 18:26:24 +0200 Subject: [PATCH 253/341] chore: require grpcio >= 1.38.1 (#725) --- setup.py | 3 ++- testing/constraints-3.6.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 963eb73ec..fcb1dd966 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ + "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", @@ -46,7 +47,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.32.0, < 2.0dev", + "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 1.0.0, < 5.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 71c9ff49a..af6e82efd 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -9,7 +9,7 @@ google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -grpcio==1.32.0 +grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 From d047419879e807e123296da2eee89a5253050166 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 09:24:21 +0200 Subject: [PATCH 254/341] fix: inserting non-finite floats with insert_rows() (#728) --- google/cloud/bigquery/_helpers.py | 8 +++++++- tests/unit/test__helpers.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 7602483c2..77054542a 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -17,6 +17,7 @@ import base64 import datetime import decimal +import math import re from google.cloud._helpers import UTC @@ -305,7 +306,12 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value if value is None else float(value) + if value is None: + return None + elif math.isnan(value) or math.isinf(value): + return str(value) + else: + return float(value) def _decimal_to_json(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0ac76d424..c62947d37 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -656,9 +656,24 @@ def _call_fut(self, value): return _float_to_json(value) + def test_w_none(self): + self.assertEqual(self._call_fut(None), None) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_nan(self): + result = self._call_fut(float("nan")) + self.assertEqual(result.lower(), "nan") + + def test_w_infinity(self): + result = self._call_fut(float("inf")) + self.assertEqual(result.lower(), "inf") + + def test_w_negative_infinity(self): + result = self._call_fut(float("-inf")) + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From e99abbbca8d3f234325e86857b4ba71403f4be6b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Jul 2021 12:22:26 +0200 Subject: [PATCH 255/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.5.0 (#731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.4.0` -> `==2.5.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/compatibility-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/confidence-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.5.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​250-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev240v250-2021-06-29) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.4.0...v2.5.0) ##### ⚠ BREAKING CHANGES - remove default deadline for AppendRows API ([#​205](https://togithub.com/googleapis/python-bigquery-storage/issues/205)) ##### Features - Add ZSTD compression as an option for Arrow ([#​197](https://www.github.com/googleapis/python-bigquery-storage/issues/197)) ([f941446](https://www.github.com/googleapis/python-bigquery-storage/commit/f9414469fac37bf05db28230a1a6c1e3f7342e8d)) - new JSON type through BigQuery Write ([#​178](https://www.github.com/googleapis/python-bigquery-storage/issues/178)) ([a6d6afa](https://www.github.com/googleapis/python-bigquery-storage/commit/a6d6afa8654907701aab2724f940be8f63edd0ea)) ##### Bug Fixes - **deps:** add packaging requirement ([#​200](https://www.github.com/googleapis/python-bigquery-storage/issues/200)) ([f2203fe](https://www.github.com/googleapis/python-bigquery-storage/commit/f2203fefe36dd043a258adb85e970fef14cf6ebc)) - remove default deadline for AppendRows API ([#​205](https://www.github.com/googleapis/python-bigquery-storage/issues/205)) ([cd4e637](https://www.github.com/googleapis/python-bigquery-storage/commit/cd4e637c4c74f21be50c3b0ebdfeebb1dfb88cbb)) ##### Documentation - omit mention of Python 2.7 in 'CONTRIBUTING.rst' ([#​1127](https://www.github.com/googleapis/python-bigquery-storage/issues/1127)) ([#​212](https://www.github.com/googleapis/python-bigquery-storage/issues/212)) ([8bcc4cd](https://www.github.com/googleapis/python-bigquery-storage/commit/8bcc4cd298eb0f5da03ecf66670982ab41e35c88)) ##### Miscellaneous Chores - release 2.5.0 ([#​220](https://www.github.com/googleapis/python-bigquery-storage/issues/220)) ([946c8a9](https://www.github.com/googleapis/python-bigquery-storage/commit/946c8a91c2d74c6bf37b333a4d0483f4483dcbce))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 80fa8e454..83ab92ee5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 669b3ac85..6b966fb07 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 38b3ef96c3dedc139b84f0ff06885141ae7ce78c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 1 Jul 2021 10:49:50 -0400 Subject: [PATCH 256/341] feat: Support passing struct data to the DB API (#718) --- docs/dbapi.rst | 11 +- google/cloud/bigquery/dbapi/_helpers.py | 252 ++++++++++++++++++--- google/cloud/bigquery/dbapi/cursor.py | 28 ++- tests/system/conftest.py | 7 +- tests/system/test_pandas.py | 11 +- tests/system/test_structs.py | 31 +++ tests/unit/test_dbapi__helpers.py | 282 +++++++++++++++++++++++- tests/unit/test_dbapi_cursor.py | 26 +++ 8 files changed, 597 insertions(+), 51 deletions(-) create mode 100644 tests/system/test_structs.py diff --git a/docs/dbapi.rst b/docs/dbapi.rst index 41ec85833..81f000bc7 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -25,7 +25,7 @@ and using named parameters:: Providing explicit type information ----------------------------------- -BigQuery requires type information for parameters. The The BigQuery +BigQuery requires type information for parameters. The BigQuery DB-API can usually determine parameter types for parameters based on provided values. Sometimes, however, types can't be determined (for example when `None` is passed) or are determined incorrectly (for @@ -37,7 +37,14 @@ colon, as in:: insert into people (name, income) values (%(name:string)s, %(income:numeric)s) -For unnamed parameters, use the named syntax with a type, but now +For unnamed parameters, use the named syntax with a type, but no name, as in:: insert into people (name, income) values (%(:string)s, %(:numeric)s) + +Providing type information is the *only* way to pass `struct` data:: + + cursor.execute( + "insert into points (point) values (%(:struct)s)", + [{"x": 10, "y": 20}], + ) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 3b0d8134c..9c134b47c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -18,18 +18,34 @@ import decimal import functools import numbers +import re +import typing from google.cloud import bigquery -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import table, enums, query from google.cloud.bigquery.dbapi import exceptions _NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") +type_parameters_re = re.compile( + r""" + \( + \s*[0-9]+\s* + (, + \s*[0-9]+\s* + )* + \) + """, + re.VERBOSE, +) + def _parameter_type(name, value, query_parameter_type=None, value_doc=""): if query_parameter_type: + # Strip type parameters + query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( enums.SqlParameterScalarTypes, query_parameter_type.upper() @@ -113,6 +129,197 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): return bigquery.ArrayQueryParameter(name, array_type, value) +def _parse_struct_fields( + fields, + base, + parse_struct_field=re.compile( + r""" + (?:(\w+)\s+) # field name + ([A-Z0-9<> ,()]+) # Field type + $""", + re.VERBOSE | re.IGNORECASE, + ).match, +): + # Split a string of struct fields. They're defined by commas, but + # we have to avoid splitting on commas internal to fields. For + # example: + # name string, children array> + # + # only has 2 top-level fields. + fields = fields.split(",") + fields = list(reversed(fields)) # in the off chance that there are very many + while fields: + field = fields.pop() + while fields and field.count("<") != field.count(">"): + field += "," + fields.pop() + + m = parse_struct_field(field.strip()) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field}, in {base}" + ) + yield m.group(1, 2) + + +SCALAR, ARRAY, STRUCT = "sar" + + +def _parse_type( + type_, + name, + base, + complex_query_parameter_parse=re.compile( + r""" + \s* + (ARRAY|STRUCT|RECORD) # Type + \s* + <([A-Z0-9<> ,()]+)> # Subtype(s) + \s*$ + """, + re.IGNORECASE | re.VERBOSE, + ).match, +): + if "<" not in type_: + # Scalar + + # Strip type parameters + type_ = type_parameters_re.sub("", type_).strip() + try: + type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {type_}," + f"{' for ' + name if name else ''}" + f" is not a valid BigQuery scalar type, in {base}." + ) + if name: + type_ = type_.with_name(name) + return SCALAR, type_ + + m = complex_query_parameter_parse(type_) + if not m: + raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") + tname, sub = m.group(1, 2) + if tname.upper() == "ARRAY": + sub_type = complex_query_parameter_type(None, sub, base) + if isinstance(sub_type, query.ArrayQueryParameterType): + raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") + sub_type._complex__src = sub + return ARRAY, sub_type + else: + return STRUCT, _parse_struct_fields(sub, base) + + +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): + """Construct a parameter type (`StructQueryParameterType`) for a complex type + + or a non-complex type that's part of a complex type. + + Examples: + + array> + + struct>> + + This is used for computing array types. + """ + + type_type, sub_type = _parse_type(type_, name, base) + if type_type == SCALAR: + type_ = sub_type + elif type_type == ARRAY: + type_ = query.ArrayQueryParameterType(sub_type, name=name) + elif type_type == STRUCT: + fields = [ + complex_query_parameter_type(field_name, field_type, base) + for field_name, field_type in sub_type + ] + type_ = query.StructQueryParameterType(*fields, name=name) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return type_ + + +def complex_query_parameter( + name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None +): + """ + Construct a query parameter for a complex type (array or struct record) + + or for a subtype, which may not be complex + + Examples: + + array> + + struct>> + + """ + base = base or type_ + + type_type, sub_type = _parse_type(type_, name, base) + + if type_type == SCALAR: + param = query.ScalarQueryParameter(name, sub_type._type, value) + elif type_type == ARRAY: + if not array_like(value): + raise exceptions.ProgrammingError( + f"Array type with non-array-like value" + f" with type {type(value).__name__}" + ) + param = query.ArrayQueryParameter( + name, + sub_type, + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ], + ) + elif type_type == STRUCT: + if not isinstance(value, collections_abc.Mapping): + raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") + value_keys = set(value) + fields = [] + for field_name, field_type in sub_type: + if field_name not in value: + raise exceptions.ProgrammingError( + f"No field value for {field_name} in {type_}" + ) + value_keys.remove(field_name) + fields.append( + complex_query_parameter(field_name, value[field_name], field_type, base) + ) + if value_keys: + raise exceptions.ProgrammingError(f"Extra data keys for {type_}") + + param = query.StructQueryParameter(name, *fields) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return param + + +def _dispatch_parameter(type_, value, name=None): + if type_ is not None and "<" in type_: + param = complex_query_parameter(name, value, type_) + elif isinstance(value, collections_abc.Mapping): + raise NotImplementedError( + f"STRUCT-like parameter values are not supported" + f"{' (parameter ' + name + ')' if name else ''}," + f" unless an explicit type is give in the parameter placeholder" + f" (e.g. '%({name if name else ''}:struct<...>)s')." + ) + elif array_like(value): + param = array_to_query_parameter(value, name, type_) + else: + param = scalar_to_query_parameter(value, name, type_) + + return param + + def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. @@ -126,19 +333,10 @@ def to_query_parameters_list(parameters, parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of query parameters. """ - result = [] - - for value, type_ in zip(parameters, parameter_types): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError("STRUCT-like parameter values are not supported.") - elif array_like(value): - param = array_to_query_parameter(value, None, type_) - else: - param = scalar_to_query_parameter(value, None, type_) - - result.append(param) - - return result + return [ + _dispatch_parameter(type_, value) + for value, type_ in zip(parameters, parameter_types) + ] def to_query_parameters_dict(parameters, query_parameter_types): @@ -154,28 +352,10 @@ def to_query_parameters_dict(parameters, query_parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of named query parameters. """ - result = [] - - for name, value in parameters.items(): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError( - "STRUCT-like parameter values are not supported " - "(parameter {}).".format(name) - ) - else: - query_parameter_type = query_parameter_types.get(name) - if array_like(value): - param = array_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type - ) - else: - param = scalar_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type, - ) - - result.append(param) - - return result + return [ + _dispatch_parameter(query_parameter_types.get(name), value, name) + for name, value in parameters.items() + ] def to_query_parameters(parameters, parameter_types): diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index c8fc49378..587598d5f 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -483,7 +483,33 @@ def _format_operation(operation, parameters): def _extract_types( - operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub + operation, + extra_type_sub=re.compile( + r""" + (%*) # Extra %s. We'll deal with these in the replacement code + + % # Beginning of replacement, %s, %(...)s + + (?:\( # Begin of optional name and/or type + ([^:)]*) # name + (?:: # ':' introduces type + ( # start of type group + [a-zA-Z0-9<>, ]+ # First part, no parens + + (?: # start sets of parens + non-paren text + \([0-9 ,]+\) # comma-separated groups of digits in parens + # (e.g. string(10)) + (?=[, >)]) # Must be followed by ,>) or space + [a-zA-Z0-9<>, ]* # Optional non-paren chars + )* # Can be zero or more of parens and following text + ) # end of type group + )? # close type clause ":type" + \))? # End of optional name and/or type + + s # End of replacement + """, + re.VERBOSE, + ).sub, ): """Remove type information from parameter placeholders. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4b5fcb543..4eef60e92 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -31,9 +31,14 @@ def bqstorage_client(bigquery_client): return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) -@pytest.fixture +@pytest.fixture(scope="session") def dataset_id(bigquery_client): dataset_id = f"bqsystem_{helpers.temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture +def table_id(dataset_id): + return f"{dataset_id}.table_{helpers.temp_suffix()}" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 1164e36da..ddf5eaf43 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -149,7 +149,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): """Test that a DataFrame containing column with None-type values and int64 datatype can be uploaded without specifying a schema. @@ -157,9 +157,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( https://github.com/googleapis/python-bigquery/issues/22 """ - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - bigquery_client.project, dataset_id - ) df_data = collections.OrderedDict( [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] ) @@ -511,7 +508,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): from google.cloud.bigquery.job import SourceFormat @@ -536,10 +533,6 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( ) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - bigquery_client.project, dataset_id - ) - job_config = bigquery.LoadJobConfig( schema=table_schema, source_format=SourceFormat.CSV ) diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py new file mode 100644 index 000000000..20740f614 --- /dev/null +++ b/tests/system/test_structs.py @@ -0,0 +1,31 @@ +import datetime + +import pytest + +from google.cloud.bigquery.dbapi import connect + +person_type = "struct>>" +person_type_sized = ( + "struct>>" +) + + +@pytest.mark.parametrize("person_type_decl", [person_type, person_type_sized]) +def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): + conn = connect(bigquery_client) + cursor = conn.cursor() + cursor.execute(f"create table {table_id} (person {person_type_decl})") + data = dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ) + cursor.execute( + f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + ) + + cursor.execute(f"select * from {table_id}") + [[result]] = list(cursor) + assert result == data diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 250ba46d9..b33203354 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -16,6 +16,7 @@ import decimal import math import operator as op +import re import unittest import pytest @@ -394,11 +395,13 @@ def test_to_query_parameters_dict_w_types(): assert sorted( _helpers.to_query_parameters( - dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + dict(i=1, x=1.2, y=None, q="hi", z=[]), + dict(x="numeric", y="string", q="string(9)", z="float64"), ), key=lambda p: p.name, ) == [ bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("q", "STRING", "hi"), bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), bigquery.ScalarQueryParameter("y", "STRING", None), bigquery.ArrayQueryParameter("z", "FLOAT64", []), @@ -409,10 +412,285 @@ def test_to_query_parameters_list_w_types(): from google.cloud import bigquery assert _helpers.to_query_parameters( - [1, 1.2, None, []], [None, "numeric", "string", "float64"] + [1, 1.2, None, "hi", []], [None, "numeric", "string", "string(9)", "float64"] ) == [ bigquery.ScalarQueryParameter(None, "INT64", 1), bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ScalarQueryParameter(None, "STRING", "hi"), bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": []}, + }, + ), + ( + [1, 2], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + ["1", "hi"], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "hi"}]}, + }, + ), + ], +) +def test_complex_query_parameter_type(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + + param = complex_query_parameter("test", value, type_).to_api_repr() + assert param.pop("name") == "test" + assert param == expect + + +def _expected_error_match(expect): + return "^" + re.escape(expect) + "$" + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + "The given parameter type, INT," + " is not a valid BigQuery scalar type, in ARRAY.", + ), + ([], "x", "Invalid parameter type, x"), + ({}, "struct", "Invalid struct field, int, in struct"), + ( + {"x": 1}, + "struct", + "The given parameter type, int," + " for x is not a valid BigQuery scalar type, in struct.", + ), + ([], "x<", "Invalid parameter type, x<"), + (0, "ARRAY", "Array type with non-array-like value with type int"), + ( + [], + "ARRAY>", + "Array can't contain an array in ARRAY>", + ), + ([], "struct", "Non-mapping value for type struct"), + ({}, "struct", "No field value for x in struct"), + ({"x": 1, "y": 1}, "struct", "Extra data keys for struct"), + ([], "array>", "Invalid struct field, xxx, in array>"), + ([], "array<<>>", "Invalid parameter type, <>"), + ], +) +def test_complex_query_parameter_type_errors(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises( + exceptions.ProgrammingError, match=_expected_error_match(expect), + ): + complex_query_parameter("test", value, type_) + + +@pytest.mark.parametrize( + "parameters,parameter_types,expect", + [ + ( + [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], + [ + { + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ( + dict(ids=[], child=dict(name="ch1", bdate=datetime.date(2021, 1, 1))), + dict(ids="ARRAY", child="struct"), + [ + { + "name": "ids", + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "name": "child", + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ], +) +def test_to_query_parameters_complex_types(parameters, parameter_types, expect): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] + assert result == expect + + +def test_to_query_parameters_struct_error(): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported, " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(:struct<...>)s')." + ), + ): + to_query_parameters([dict(x=1)], [None]) + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported (parameter foo), " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(foo:struct<...>)s')." + ), + ): + to_query_parameters(dict(foo=dict(x=1)), {}) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index a2d6693d0..026810aaf 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,32 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:string(10))s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="string(10)")), + ), ], ) def test__extract_types(inp, expect): From 1246da86b78b03ca1aa2c45ec71649e294cfb2f1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 19:17:01 +0200 Subject: [PATCH 257/341] feat: make it easier to disable best-effort deduplication with streaming inserts (#734) * feat: make it easier to disable row insert IDs * Also accept any iterables for row_ids --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/client.py | 47 +++++++-- google/cloud/bigquery/enums.py | 7 ++ tests/unit/test_client.py | 153 ++++++++++++++++++++++++++++-- 4 files changed, 195 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 94f87304a..dfe3a6320 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -144,6 +145,7 @@ "DEFAULT_RETRY", # Enum Constants "enums", + "AutoRowIDs", "Compression", "CreateDisposition", "DestinationFormat", diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2b7a5273e..2a02c7629 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -68,6 +68,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job @@ -3349,7 +3350,7 @@ def insert_rows_json( self, table: Union[Table, TableReference, str], json_rows: Sequence[Dict], - row_ids: Sequence[str] = None, + row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, template_suffix: str = None, @@ -3371,11 +3372,20 @@ def insert_rows_json( json_rows (Sequence[Dict]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. - row_ids (Optional[Sequence[Optional[str]]]): + row_ids (Union[Iterable[str], AutoRowIDs, None]): Unique IDs, one per row being inserted. An ID can also be ``None``, indicating that an explicit insert ID should **not** be used for that row. If the argument is omitted altogether, unique IDs are created automatically. + + .. versionchanged:: 2.21.0 + Can also be an iterable, not just a sequence, or an + :class:`AutoRowIDs` enum member. + + .. deprecated:: 2.21.0 + Passing ``None`` to explicitly request autogenerating insert IDs is + deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead. + skip_invalid_rows (Optional[bool]): Insert all valid rows of a request, even if invalid rows exist. The default value is ``False``, which causes the entire request @@ -3415,12 +3425,37 @@ def insert_rows_json( rows_info = [] data = {"rows": rows_info} - for index, row in enumerate(json_rows): + if row_ids is None: + warnings.warn( + "Passing None for row_ids is deprecated. To explicitly request " + "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead", + category=DeprecationWarning, + ) + row_ids = AutoRowIDs.GENERATE_UUID + + if not isinstance(row_ids, AutoRowIDs): + try: + row_ids_iter = iter(row_ids) + except TypeError: + msg = "row_ids is neither an iterable nor an AutoRowIDs enum member" + raise TypeError(msg) + + for i, row in enumerate(json_rows): info = {"json": row} - if row_ids is not None: - info["insertId"] = row_ids[index] - else: + + if row_ids is AutoRowIDs.GENERATE_UUID: info["insertId"] = str(uuid.uuid4()) + elif row_ids is AutoRowIDs.DISABLED: + info["insertId"] = None + else: + try: + insert_id = next(row_ids_iter) + except StopIteration: + msg = f"row_ids did not generate enough IDs, error at index {i}" + raise ValueError(msg) + else: + info["insertId"] = insert_id + rows_info.append(info) if skip_invalid_rows is not None: diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index edf991b6f..dbbd02635 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -21,6 +21,13 @@ from google.cloud.bigquery.query import ScalarQueryParameterType +class AutoRowIDs(enum.Enum): + """How to handle automatic insert IDs when inserting rows as a stream.""" + + DISABLED = enum.auto() + GENERATE_UUID = enum.auto() + + class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f6811e207..dffe7bdba 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5434,7 +5434,7 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None ) - def test_insert_rows_json(self): + def test_insert_rows_json_default_behavior(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5481,8 +5481,10 @@ def test_insert_rows_json(self): method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, ) - def test_insert_rows_json_with_string_id(self): - rows = [{"col1": "val1"}] + def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() http = object() client = self._make_one( @@ -5490,20 +5492,116 @@ def test_insert_rows_json_with_string_id(self): ) conn = client._connection = make_connection({}) - with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): - errors = client.insert_rows_json("proj.dset.tbl", rows) + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with uuid_patcher: + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.GENERATE_UUID + ) self.assertEqual(len(errors), 0) - expected = { - "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)] + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] } conn.api_request.assert_called_once_with( method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", - data=expected, + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED, + ) + + self.assertEqual(len(errors), 0) + + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": None}, + {"json": {"col2": "val2"}, "insertId": None}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_with_iterator_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + row_ids_iter = map(str, itertools.count(42)) + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=row_ids_iter) + + self.assertEqual(len(errors), 0) + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "42"}, + {"json": {"col2": "val2"}, "insertId": "43"}, + {"json": {"col3": "val3"}, "insertId": "44"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, timeout=None, ) + def test_insert_rows_json_with_non_iterable_row_ids(self): + rows = [{"col1": "val1"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + with self.assertRaises(TypeError) as exc: + client.insert_rows_json("proj.dset.tbl", rows, row_ids=object()) + + err_msg = str(exc.exception) + self.assertIn("row_ids", err_msg) + self.assertIn("iterable", err_msg) + + def test_insert_rows_json_with_too_few_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + insert_ids = ["10", "20"] + + error_msg_pattern = "row_ids did not generate enough IDs.*index 2" + with self.assertRaisesRegex(ValueError, error_msg_pattern): + client.insert_rows_json("proj.dset.tbl", rows, row_ids=insert_ids) + def test_insert_rows_json_w_explicit_none_insert_ids(self): rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() @@ -5526,6 +5624,45 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): timeout=None, ) + def test_insert_rows_json_w_none_insert_ids_sequence(self): + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with warnings.catch_warnings(record=True) as warned, uuid_patcher: + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=None) + + self.assertEqual(len(errors), 0) + + # Passing row_ids=None should have resulted in a deprecation warning. + matches = [ + warning + for warning in warned + if issubclass(warning.category, DeprecationWarning) + and "row_ids" in str(warning) + and "AutoRowIDs.GENERATE_UUID" in str(warning) + ] + assert matches, "The expected deprecation warning was not raised." + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + def test_insert_rows_w_wrong_arg(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField From 145944f24fedc4d739687399a8309f9d51d43dfd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Jul 2021 14:21:09 -0500 Subject: [PATCH 258/341] docs: add docs for all enums in module (#745) --- docs/conf.py | 1 + docs/enums.rst | 6 ++++++ docs/reference.rst | 7 ++++--- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 docs/enums.rst diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/enums.rst b/docs/enums.rst new file mode 100644 index 000000000..57608968a --- /dev/null +++ b/docs/enums.rst @@ -0,0 +1,6 @@ +BigQuery Enums +============== + +.. automodule:: google.cloud.bigquery.enums + :members: + :undoc-members: diff --git a/docs/reference.rst b/docs/reference.rst index 52d916f96..694379cd2 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -173,10 +173,11 @@ Magics Enums ===== -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 + + enums - enums.StandardSqlDataTypes Encryption Configuration ======================== From cd2f09e96c4db5f63afa4fe7179c71b8872d48a2 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 10 Jul 2021 11:05:31 +0200 Subject: [PATCH 259/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.0 (#743) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 83ab92ee5..30a59c15a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6b966fb07..ce02ac7ed 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 7d2d3e906a9eb161911a198fb925ad79de5df934 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:07:00 +0200 Subject: [PATCH 260/341] feat: add support for decimal target types (#735) * feat: add support for decimal target types * Add decimal target types support to ExternalConfig * Remove ambiguous parts of DecimalTargetType docs. --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/enums.py | 18 +++++++ google/cloud/bigquery/external_config.py | 23 +++++++++ google/cloud/bigquery/job/load.py | 23 +++++++++ tests/data/numeric_38_12.parquet | Bin 0 -> 307 bytes tests/system/test_client.py | 54 +++++++++++++++++++++ tests/unit/job/test_load_config.py | 39 +++++++++++++++ tests/unit/test_external_config.py | 58 +++++++++++++++++++++++ 8 files changed, 217 insertions(+) create mode 100644 tests/data/numeric_38_12.parquet diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index dfe3a6320..b97224176 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -148,6 +149,7 @@ "AutoRowIDs", "Compression", "CreateDisposition", + "DecimalTargetType", "DestinationFormat", "DeterminismLevel", "ExternalSourceFormat", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index dbbd02635..ef35dffe0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -49,6 +49,24 @@ class Compression(object): """Specifies no compression.""" +class DecimalTargetType: + """The data types that could be used as a target type when converting decimal values. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType + + .. versionadded:: 2.21.0 + """ + + NUMERIC = "NUMERIC" + """Decimal values could be converted to NUMERIC type.""" + + BIGNUMERIC = "BIGNUMERIC" + """Decimal values could be converted to BIGNUMERIC type.""" + + STRING = "STRING" + """Decimal values could be converted to STRING type.""" + + class CreateDisposition(object): """Specifies whether the job is allowed to create new tables. The default value is :attr:`CREATE_IF_NEEDED`. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 0c49d2d76..f1692ba50 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +from typing import FrozenSet, Iterable, Optional from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -693,6 +694,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._properties.get("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._properties["decimalTargetTypes"] = list(value) + else: + if "decimalTargetTypes" in self._properties: + del self._properties["decimalTargetTypes"] + @property def hive_partitioning(self): """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 41d38dd74..bdee5cb6b 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,6 +14,8 @@ """Classes for load jobs.""" +from typing import FrozenSet, Iterable, Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions @@ -121,6 +123,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._get_sub_prop("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._set_sub_prop("decimalTargetTypes", list(value)) + else: + self._del_sub_prop("decimalTargetTypes") + @property def destination_encryption_configuration(self): """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom diff --git a/tests/data/numeric_38_12.parquet b/tests/data/numeric_38_12.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ef4db91ea9a90b2e230b8057fb05446e3f25dfe4 GIT binary patch literal 307 zcmWG=3^EjD5cLuD(Gg_MA>{7D-4+Ihd3~*3z?EmNe|HVLj zCLm^JMpq11$pBF*$|R}d!JUy=mKtB2oRONFD9Xej$|tHI$|S)WpPZj#pvomGA?m{* z#v&;rDWk?ABg!Xf%D}-U36&NF%82b_1R^yK8=~C?(!s)@;aQMaR9Ko?qMKW!P?lO$ zoSC1eV5n!IXQ-f&RGOKSqF|DklxUKYm};JsWRPN#nv#^9W^QR@oS158Y;2ikWRaF) ToMxVuoTkZe0_d>-U~mBdp9NGq literal 0 HcmV?d00001 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ce3021399..460296b2f 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + def test_load_table_from_local_parquet_file_decimal_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + TABLE_NAME = "test_table_parquet" + + expected_rows = [ + (decimal.Decimal("123.999999999999"),), + (decimal.Decimal("99999999999999999999999999.999999999999"),), + ] + + dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump")) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig() + job_config.source_format = SourceFormat.PARQUET + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.decimal_target_types = [ + DecimalTargetType.NUMERIC, + DecimalTargetType.BIGNUMERIC, + DecimalTargetType.STRING, + ] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + job.result(timeout=JOB_TIMEOUT) # Retry until done. + + self.assertEqual(job.output_rows, len(expected_rows)) + + table = Config.CLIENT.get_table(table) + rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] + self.assertEqual(sorted(row_tuples), sorted(expected_rows)) + + # Forcing the NUMERIC type, however, should result in an error. + job_config.decimal_target_types = [DecimalTargetType.NUMERIC] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + with self.assertRaises(BadRequest) as exc_info: + job.result(timeout=JOB_TIMEOUT) + + exc_msg = str(exc_info.exception) + self.assertIn("out of valid NUMERIC range", exc_msg) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index eafe7e046..190bd16dc 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -122,6 +122,45 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_decimal_target_types_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.decimal_target_types) + + def test_decimal_target_types_hit(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + expected = frozenset(decimal_target_types) + self.assertEqual(config.decimal_target_types, expected) + + def test_decimal_target_types_setter(self): + from google.cloud.bigquery.enums import DecimalTargetType + + decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC) + config = self._get_target_class()() + config.decimal_target_types = decimal_target_types + self.assertEqual( + config._properties["load"]["decimalTargetTypes"], + list(decimal_target_types), + ) + + def test_decimal_target_types_setter_w_none(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.BIGNUMERIC] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + config.decimal_target_types = None + + self.assertIsNone(config.decimal_target_types) + self.assertNotIn("decimalTargetTypes", config._properties["load"]) + + config.decimal_target_types = None # No error if unsetting an unset property. + def test_destination_encryption_configuration_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_encryption_configuration) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 393df931e..1f49dba5d 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC], + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertEqual( + ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC]) + ) + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["decimalTargetTypes"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.decimal_target_types) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING], + } + self.assertEqual(got_resource, expected_resource) + + def test_to_api_repr_decimal_target_types_unset(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC] + ec.decimal_target_types = None + + got_resource = ec.to_api_repr() + + expected_resource = {"sourceFormat": "FORMAT_FOO"} + self.assertEqual(got_resource, expected_resource) + + ec.decimal_target_types = None # No error if unsetting when already unset. + def _copy_and_update(d, u): d = copy.deepcopy(d) From ba86b2a6300ae5a9f3c803beeb42bda4c522e34c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:29:00 +0200 Subject: [PATCH 261/341] feat: add support for table snapshots (#740) * feat: add support for table snapshots * Add system test for table snapshots * Make test taxonomy resource name unique * Store timezone aware snapshot time on snapshots * Make copy config tests more detailed * Use unique resource ID differently for display name * Add new classes to docs --- docs/reference.rst | 2 + google/cloud/bigquery/__init__.py | 4 ++ google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/copy_.py | 38 ++++++++++++++ google/cloud/bigquery/table.py | 37 ++++++++++++++ tests/system/test_client.py | 71 ++++++++++++++++++++++++- tests/unit/job/test_copy.py | 34 +++++++++++- tests/unit/test_table.py | 74 +++++++++++++++++++++++++++ 8 files changed, 260 insertions(+), 2 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 694379cd2..cb2faa5ec 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -59,6 +59,7 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.OperationType job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority @@ -90,6 +91,7 @@ Table table.RangePartitioning table.Row table.RowIterator + table.SnapshotDefinition table.Table table.TableListItem table.TableReference diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index b97224176..65dde5d94 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -61,6 +61,7 @@ from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority @@ -87,6 +88,7 @@ from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row +from google.cloud.bigquery.table import SnapshotDefinition from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioningType @@ -115,6 +117,7 @@ "PartitionRange", "RangePartitioning", "Row", + "SnapshotDefinition", "TimePartitioning", "TimePartitioningType", # Jobs @@ -155,6 +158,7 @@ "ExternalSourceFormat", "Encoding", "KeyResultStatementKind", + "OperationType", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index cdab92e05..6bdfa09be 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.copy_ import OperationType from google.cloud.bigquery.job.extract import ExtractJob from google.cloud.bigquery.job.extract import ExtractJobConfig from google.cloud.bigquery.job.load import LoadJob @@ -59,6 +60,7 @@ "UnknownJob", "CopyJob", "CopyJobConfig", + "OperationType", "ExtractJob", "ExtractJobConfig", "LoadJob", diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index 95f4b613b..c6ee98944 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -14,6 +14,8 @@ """Classes for copy jobs.""" +from typing import Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import _helpers from google.cloud.bigquery.table import TableReference @@ -23,6 +25,25 @@ from google.cloud.bigquery.job.base import _JobReference +class OperationType: + """Different operation types supported in table copy job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype + """ + + OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED" + """Unspecified operation type.""" + + COPY = "COPY" + """The source and destination table have the same table type.""" + + SNAPSHOT = "SNAPSHOT" + """The source table type is TABLE and the destination table type is SNAPSHOT.""" + + RESTORE = "RESTORE" + """The source table type is SNAPSHOT and the destination table type is TABLE.""" + + class CopyJobConfig(_JobConfig): """Configuration options for copy jobs. @@ -85,6 +106,23 @@ def destination_encryption_configuration(self, value): api_repr = value.to_api_repr() self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + @property + def operation_type(self) -> str: + """The operation to perform with this copy job. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type + """ + return self._get_sub_prop( + "operationType", OperationType.OPERATION_TYPE_UNSPECIFIED + ) + + @operation_type.setter + def operation_type(self, value: Optional[str]): + if value is None: + value = OperationType.OPERATION_TYPE_UNSPECIFIED + self._set_sub_prop("operationType", value) + class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a1c13c85d..765110ae6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -321,6 +321,7 @@ class Table(object): "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", + "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", "table_id": ["tableReference", "tableId"], @@ -910,6 +911,19 @@ def external_data_configuration(self, value): self._PROPERTY_TO_API_FIELD["external_data_configuration"] ] = api_repr + @property + def snapshot_definition(self) -> Optional["SnapshotDefinition"]: + """Information about the snapshot. This value is set via snapshot creation. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition + """ + snapshot_info = self._properties.get( + self._PROPERTY_TO_API_FIELD["snapshot_definition"] + ) + if snapshot_info is not None: + snapshot_info = SnapshotDefinition(snapshot_info) + return snapshot_info + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -1274,6 +1288,29 @@ def __init__(self, resource): ) +class SnapshotDefinition: + """Information about base table and snapshot time of the snapshot. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition + + Args: + resource: Snapshot definition representation returned from the API. + """ + + def __init__(self, resource: Dict[str, Any]): + self.base_table_reference = None + if "baseTableReference" in resource: + self.base_table_reference = TableReference.from_api_repr( + resource["baseTableReference"] + ) + + self.snapshot_time = None + if "snapshotTime" in resource: + self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime( + resource["snapshotTime"] + ) + + class Row(object): """A BigQuery row. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 460296b2f..7234333a2 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -394,7 +394,7 @@ def test_create_table_with_real_custom_policy(self): taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" new_taxonomy = datacatalog_types.Taxonomy( - display_name="Custom test taxonomy", + display_name="Custom test taxonomy" + unique_resource_id(), description="This taxonomy is ony used for a test.", activated_policy_types=[ datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL @@ -2370,6 +2370,75 @@ def test_parameterized_types_round_trip(self): self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def test_table_snapshots(self): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{Config.DATASET}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + self.to_delete.insert(0, source_table) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + snapshot_table = client.get_table(snapshot_table_path) + self.to_delete.insert(0, snapshot_table) + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index fb0c87391..992efcf6b 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -28,18 +28,34 @@ def _get_target_class(): return CopyJobConfig + def test_ctor_defaults(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one() + + assert config.create_disposition is None + assert config.write_disposition is None + assert config.destination_encryption_configuration is None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + def test_ctor_w_properties(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import WriteDisposition create_disposition = CreateDisposition.CREATE_NEVER write_disposition = WriteDisposition.WRITE_TRUNCATE + snapshot_operation = OperationType.SNAPSHOT + config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition + create_disposition=create_disposition, + write_disposition=write_disposition, + operation_type=snapshot_operation, ) self.assertEqual(config.create_disposition, create_disposition) self.assertEqual(config.write_disposition, write_disposition) + self.assertEqual(config.operation_type, snapshot_operation) def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.encryption_configuration import ( @@ -70,6 +86,22 @@ def test_to_api_repr_with_encryption_none(self): resource, {"copy": {"destinationEncryptionConfiguration": None}} ) + def test_operation_type_setting_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=OperationType.SNAPSHOT) + + # Setting it to None is the same as setting it to OPERATION_TYPE_UNSPECIFIED. + config.operation_type = None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + + def test_operation_type_setting_non_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=None) + config.operation_type = OperationType.RESTORE + assert config.operation_type == OperationType.RESTORE + class TestCopyJob(_Base): JOB_TYPE = "copy" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f4038835c..b30f16fe0 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -684,6 +684,40 @@ def test_props_set_by_server(self): self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, "TABLE") + def test_snapshot_definition_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.snapshot_definition is None + + def test_snapshot_definition_set(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import SnapshotDefinition + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["snapshotDefinition"] = { + "baseTableReference": { + "projectId": "project_x", + "datasetId": "dataset_y", + "tableId": "table_z", + }, + "snapshotTime": "2010-09-28T10:20:30.123Z", + } + + snapshot = table.snapshot_definition + + assert isinstance(snapshot, SnapshotDefinition) + assert snapshot.base_table_reference.path == ( + "/projects/project_x/datasets/dataset_y/tables/table_z" + ) + assert snapshot.snapshot_time == datetime.datetime( + 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC + ) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1509,6 +1543,46 @@ def test_to_api_repr(self): self.assertEqual(table.to_api_repr(), resource) +class TestSnapshotDefinition: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import SnapshotDefinition + + return SnapshotDefinition + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one(resource={}) + assert instance.base_table_reference is None + assert instance.snapshot_time is None + + def test_ctor_full_resource(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import TableReference + + resource = { + "baseTableReference": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "our-table", + }, + "snapshotTime": "2005-06-07T19:35:02.123Z", + } + instance = self._make_one(resource) + + expected_table_ref = TableReference.from_string( + "my-project.your-dataset.our-table" + ) + assert instance.base_table_reference == expected_table_ref + + expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC) + assert instance.snapshot_time == expected_time + + class TestRow(unittest.TestCase): def test_row(self): from google.cloud.bigquery.table import Row From 67bc5fbd306be7cdffd216f3791d4024acfa95b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Br=C3=A6dstrup?= <3591721+LinuxChristian@users.noreply.github.com> Date: Mon, 12 Jul 2021 21:21:24 +0200 Subject: [PATCH 262/341] fix: use pandas function to check for NaN (#750) * fix: use pandas function to check for NaN Starting with pandas 1.0, an experimental pandas.NA value (singleton) is available to represent scalar missing values as opposed to numpy.nan. Comparing the variable with itself results in a pandas.NA value that doesn't support type-casting to boolean. Using the build-in pandas.isna function handles all pandas supported NaN values. * tests: Skip tests if pandas below required version * tests: compare expected and actual directly as lists * Fix pytest.mark.skipif spelling Co-authored-by: Peter Lamut --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test__pandas_helpers.py | 40 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e93a99eba..285c0e83c 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if value != value: + if pandas.isna(value): continue output[column] = value yield output diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 39a3d845b..aa87e28f5 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -19,6 +19,7 @@ import operator import queue import warnings +import pkg_resources import mock @@ -47,6 +48,14 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -734,6 +743,37 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected +@pytest.mark.skipif( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", +) +def test_dataframe_to_json_generator(module_under_test): + utcnow = datetime.datetime.utcnow() + df_data = collections.OrderedDict( + [ + ("a_series", [pandas.NA, 2, 3, 4]), + ("b_series", [0.1, float("NaN"), 0.3, 0.4]), + ("c_series", ["a", "b", pandas.NA, "d"]), + ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), + ("e_series", [True, False, True, None]), + ] + ) + dataframe = pandas.DataFrame( + df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + ) + + dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + expected = [ + {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, + {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, + {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, + {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( From 7771f34421654575c9eb2dbdef7ba0e9384beb39 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Jul 2021 19:06:15 +0200 Subject: [PATCH 263/341] chore: release 2.21.0 (#753) Supersedes #711. ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-13) ### Features * Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- CHANGELOG.md | 24 ++++++++++++++++++++++++ docs/conf.py | 1 - google/cloud/bigquery/version.py | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08cd98c7..7344542b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) + + +### Features + +* Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) +* Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) +* Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) +* Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) +* Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) +* Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) + + +### Bug Fixes + +* Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) +* Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) + + +### Documentation + +* Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) +* Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) + ## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9fea4fece..563b0e160 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.20.0" +__version__ = "2.21.0" From dea92d36ffa4d4dcf5cbbddbbf439df049da9558 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Jul 2021 19:42:22 +0200 Subject: [PATCH 264/341] chore(deps): update dependency google-cloud-bigquery to v2.21.0 (#755) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.20.0` -> `==2.21.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/compatibility-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/confidence-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.21.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2210-httpswwwgithubcomgoogleapispython-bigquerycomparev2200v2210-2021-07-12) ##### Features - Add max_results parameter to some of the `QueryJob` methods. ([#​698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) - Add support for decimal target types. ([#​735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) - Add support for table snapshots. ([#​740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) - Enable unsetting policy tags on schema fields. ([#​703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) - Make it easier to disable best-effort deduplication with streaming inserts. ([#​734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) - Support passing struct data to the DB API. ([#​718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ##### Bug Fixes - Inserting non-finite floats with `insert_rows()`. ([#​728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) - Use `pandas` function to check for `NaN`. ([#​750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ##### Documentation - Add docs for all enums in module. ([#​745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) - Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#​706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 30a59c15a..c7aa209ad 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ce02ac7ed..b62c84c33 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From 481b86449e37e58a4f354343ed14f4dfd6ef60dd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 15:24:57 +0000 Subject: [PATCH 265/341] build(python): exit with success status if no samples found (#759) Source-Link: https://github.com/googleapis/synthtool/commit/53ea3896a52f87c758e79b5a19fa338c83925a98 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/test-samples-impl.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e2b39f946..a5d3697f2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 + digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index cf5de74c1..311a8d54b 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -20,9 +20,9 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" +# Exit early if samples don't exist +if ! find samples -name 'requirements.txt' | grep -q .; then + echo "No tests run. './samples/**/requirements.txt' not found" exit 0 fi From 5437d443c3e89cf0458771662c9ef2d2f2f8e4d8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 16:16:13 +0000 Subject: [PATCH 266/341] chore: release 2.21.0 (#760) :robot: I have created a release \*beep\* \*boop\* --- ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-14) ### Features * add always_use_jwt_access ([#714](https://www.github.com/googleapis/python-bigquery/issues/714)) ([92fbd4a](https://www.github.com/googleapis/python-bigquery/commit/92fbd4ade37e0be49dc278080ef73c83eafeea18)) * add max_results parameter to some of the QueryJob methods ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * add support for decimal target types ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * add support for table snapshots ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * enable unsetting policy tags on schema fields ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * make it easier to disable best-effort deduplication with streaming inserts ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * inserting non-finite floats with insert_rows() ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * use pandas function to check for NaN ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * add docs for all enums in module ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * omit mention of Python 2.7 in `CONTRIBUTING.rst` ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7344542b4..5fba4c517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) From 5deef6f1c548791ccbe6e8daf7fd60876d727a7c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 12:49:33 -0400 Subject: [PATCH 267/341] build(python): remove python 3.7 from kokoro Dockerfile (#762) Source-Link: https://github.com/googleapis/synthtool/commit/e44dc0c742b1230887a73552357e0c18dcc30b92 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/docker/docs/Dockerfile | 35 ++-------------------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a5d3697f2..cb06536da 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c + digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 412b0b56a..4e1b1fb8b 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ + python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -59,40 +60,8 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb - -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 3.7.8 3.8.5; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ - ; done \ - && rm -rf "${GNUPGHOME}" \ - && rm -rf /usr/src/python* \ - && rm -rf ~/.cache/ - RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.7 /tmp/get-pip.py \ && python3.8 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.7"] +CMD ["python3.8"] From 45b93089f5398740413104285cc8acfd5ebc9c08 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 21:20:27 +0200 Subject: [PATCH 268/341] fix: avoid possible job already exists error (#751) * fix: avoid possible job already exists error If job create request fails, a query job might still have started successfully. This commit handles this edge case and returns such query job one can be found. * Catch only Conflict errors on query job create --- google/cloud/bigquery/client.py | 26 +++++++++++- tests/unit/test_client.py | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2a02c7629..de259abce 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3190,6 +3190,7 @@ def query( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + job_id_given = job_id is not None job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -3221,9 +3222,30 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry, timeout=timeout) - return query_job + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job def insert_rows( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dffe7bdba..2be8daab6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4617,6 +4617,81 @@ def test_query_w_query_parameters(self): }, ) + def test_query_job_rpc_fail_w_random_error(self): + from google.api_core.exceptions import Unknown + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Unknown("Not sure what went wrong.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Unknown, match="Not sure what went wrong."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_job_id_given(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails, the original exception should be raised. + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id=None) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", return_value=mock.sentinel.query_job + ) + + with job_begin_patcher, get_job_patcher: + result = client.query("SELECT 1;", job_id=None) + + assert result is mock.sentinel.query_job + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table From 2bb8de680d07b6b5b0bfd67ad2ef559886d38dca Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 14 Jul 2021 17:29:00 -0400 Subject: [PATCH 269/341] chore: expand range to allow 2.x versions (#768) api-core, cloud-core, and resumable-media wil all be releasing Python3-only 2.x versions shortly. Closes #767. --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index fcb1dd966..71958ccf9 100644 --- a/setup.py +++ b/setup.py @@ -30,10 +30,10 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.6.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 3.0dev", + "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", "requests >= 2.18.0, < 3.0.0dev", From 87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Jul 2021 17:30:13 -0500 Subject: [PATCH 270/341] deps: allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` (#770) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Note**: PR is empty because this is purely to make sure CHANGELOG is updated. Follow-up to https://github.com/googleapis/python-bigquery/pull/768 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #767 🦕 From c45a7380871af3dfbd3c45524cb606c60e1a01d1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Jul 2021 04:19:25 -0500 Subject: [PATCH 271/341] feat: add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields (#736) * feat: add LoadJobConfig.projection_fields to select DATASTORE_BACKUP fields * add type annotations * annotate setter too Co-authored-by: Peter Lamut --- google/cloud/bigquery/job/load.py | 24 ++++++++++++++++++++++-- tests/unit/job/test_load_config.py | 11 +++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index bdee5cb6b..f1b045412 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,7 +14,7 @@ """Classes for load jobs.""" -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions @@ -25,7 +25,6 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -300,6 +299,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def projection_fields(self) -> Optional[List[str]]: + """Optional[List[str]]: If + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to + "DATASTORE_BACKUP", indicates which entity properties to load into + BigQuery from a Cloud Datastore backup. + + Property names are case sensitive and must be top-level properties. If + no properties are specified, BigQuery loads all properties. If any + named property isn't found in the Cloud Datastore backup, an invalid + error is returned in the job result. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields + """ + return self._get_sub_prop("projectionFields") + + @projection_fields.setter + def projection_fields(self, value: Optional[List[str]]): + self._set_sub_prop("projectionFields", value) + @property def quote_character(self): """Optional[str]: Character used to quote data sections (CSV only). diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 190bd16dc..cbe087dac 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -424,6 +424,17 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_projection_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.projection_fields) + + def test_projection_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config.projection_fields = fields + self.assertEqual(config._properties["load"]["projectionFields"], fields) + self.assertEqual(config.projection_fields, fields) + def test_quote_character_missing(self): config = self._get_target_class()() self.assertIsNone(config.quote_character) From 36fe86f41c1a8f46167284f752a6d6bbf886a04b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 15 Jul 2021 19:37:17 +0200 Subject: [PATCH 272/341] feat: add support for more detailed DML stats (#758) * feat: add support for more detailed DML stats * Move is None check of DmlStats one level higher --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 37 ++++++++++++++++ tests/system/test_client.py | 56 +++++++++++++++++++++++ tests/unit/job/test_query.py | 64 +++++++++++++++++++++++++++ tests/unit/job/test_query_stats.py | 37 ++++++++++++++++ 7 files changed, 199 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index cb2faa5ec..8c38d0c44 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -58,6 +58,7 @@ Job-Related Types job.Compression job.CreateDisposition job.DestinationFormat + job.DmlStats job.Encoding job.OperationType job.QueryPlanEntry diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 65dde5d94..ced8cefae 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -56,6 +56,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import DestinationFormat +from google.cloud.bigquery.job import DmlStats from google.cloud.bigquery.job import Encoding from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig @@ -142,6 +143,7 @@ "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", + "DmlStats", "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 6bdfa09be..4c16d0e20 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -31,6 +31,7 @@ from google.cloud.bigquery.job.load import LoadJob from google.cloud.bigquery.job.load import LoadJobConfig from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import DmlStats from google.cloud.bigquery.job.query import QueryJob from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry @@ -66,6 +67,7 @@ "LoadJob", "LoadJobConfig", "_contains_order_by", + "DmlStats", "QueryJob", "QueryJobConfig", "QueryPlanEntry", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 6ff9f2647..d588e9b5a 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -114,6 +114,35 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class DmlStats(typing.NamedTuple): + """Detailed statistics for DML statements. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats + """ + + inserted_row_count: int = 0 + """Number of inserted rows. Populated by DML INSERT and MERGE statements.""" + + deleted_row_count: int = 0 + """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements. + """ + + updated_row_count: int = 0 + """Number of updated rows. Populated by DML UPDATE and MERGE statements.""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": + # NOTE: The field order here must match the order of fields set at the + # class level. + api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount") + + args = ( + int(stats.get(api_field, default_val)) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + ) + return cls(*args) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -1079,6 +1108,14 @@ def estimated_bytes_processed(self): result = int(result) return result + @property + def dml_stats(self) -> Optional[DmlStats]: + stats = self._job_statistics().get("dmlStats") + if stats is None: + return None + else: + return DmlStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7234333a2..cbca73619 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1521,6 +1521,62 @@ def test_query_statistics(self): self.assertGreater(stages_with_inputs, 0) self.assertGreater(len(plan), stages_with_inputs) + def test_dml_statistics(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id) + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 4 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 0 + + # Update some of the rows. + sql = f""" + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 2 + assert query_job.dml_stats.deleted_row_count == 0 + + # Now delete a few rows and check the stats. + sql = f""" + DELETE FROM `{table_id}` + WHERE foo != "two"; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 3 + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4665933ea..482f7f3af 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -110,6 +110,24 @@ def _verify_table_definitions(self, job, config): self.assertIsNotNone(expected_ec) self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_dml_stats_resource_properties(self, job, resource): + query_stats = resource.get("statistics", {}).get("query", {}) + + if "dmlStats" in query_stats: + resource_dml_stats = query_stats["dmlStats"] + job_dml_stats = job.dml_stats + assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get( + "insertedRowCount", "0" + ) + assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get( + "updatedRowCount", "0" + ) + assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get( + "deletedRowCount", "0" + ) + else: + assert job.dml_stats is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -118,6 +136,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) + self._verify_dml_stats_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -130,16 +149,19 @@ def _verifyResourceProperties(self, job, resource): self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: self.assertEqual(job.create_disposition, query_config["createDisposition"]) else: self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: ds_ref = job.default_dataset ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} self.assertEqual(ds_ref, query_config["defaultDataset"]) else: self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: table = job.destination tb_ref = { @@ -150,14 +172,17 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(tb_ref, query_config["destinationTable"]) else: self.assertIsNone(job.destination) + if "priority" in query_config: self.assertEqual(job.priority, query_config["priority"]) else: self.assertIsNone(job.priority) + if "writeDisposition" in query_config: self.assertEqual(job.write_disposition, query_config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -166,6 +191,7 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: self.assertEqual( job.schema_update_options, query_config["schemaUpdateOptions"] @@ -190,6 +216,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) self.assertIsNone(job.destination) + self.assertIsNone(job.dml_stats) self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) @@ -278,6 +305,26 @@ def test_from_api_repr_with_encryption(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_dml_stats(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": { + "query": { + "dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"}, + }, + }, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption @@ -815,6 +862,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_dml_stats(self): + from google.cloud.bigquery.job.query import DmlStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.dml_stats is None + + statistics = job._properties["statistics"] = {} + assert job.dml_stats is None + + query_stats = statistics["query"] = {} + assert job.dml_stats is None + + query_stats["dmlStats"] = {"insertedRowCount": "35"} + assert isinstance(job.dml_stats, DmlStats) + assert job.dml_stats.inserted_row_count == 35 + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 09a0efc45..e70eb097c 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -15,6 +15,43 @@ from .helpers import _Base +class TestDmlStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import DmlStats + + return DmlStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dml_stats = self._make_one() + assert dml_stats.inserted_row_count == 0 + assert dml_stats.deleted_row_count == 0 + assert dml_stats.updated_row_count == 0 + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr({"deletedRowCount": "12"}) + + assert isinstance(result, klass) + assert result.inserted_row_count == 0 + assert result.deleted_row_count == 12 + assert result.updated_row_count == 0 + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"} + ) + + assert isinstance(result, klass) + assert result.inserted_row_count == 7 + assert result.deleted_row_count == 25 + assert result.updated_row_count == 4 + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") From 4ff8bed5c3f13df1930afee244ed776b21551800 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 03:18:14 -0500 Subject: [PATCH 273/341] refactor: omit `read_session` with latest google-cloud-bigquery-storage (#748) * refactor: omit `read_session` with latest google-cloud-bigquery-storage `read_session` is unnecessary as of `google-cloud-bigquery-storage>=2.6.0`. This will allow us to more loudly deprecate the use of `rows(read_session)`. Rather than require 2.6.0, version switches will allow us to keep our requirements range wider. Will want to give this version some time to bake before making it required. * optimize _verify_bq_storage_version * fix failing tests due to optimization * fix unit tests * create BQStorageVersions class for version comparisons * add type annotations Also, use packaging directly, since that's all pkg_resources does https://github.com/pypa/setuptools/blob/a4dbe3457d89cf67ee3aa571fdb149e6eb544e88/pkg_resources/__init__.py\#L112 * allow legacy versions * fix coverage * fix coverage * add tests for version helpers --- google/cloud/bigquery/_helpers.py | 74 +++++++++++++++++------- google/cloud/bigquery/_pandas_helpers.py | 10 +++- google/cloud/bigquery/client.py | 4 +- google/cloud/bigquery/table.py | 2 +- tests/unit/test__helpers.py | 39 +++++++++++-- tests/unit/test__pandas_helpers.py | 69 ++++++++++++++++++++++ tests/unit/test_client.py | 4 +- tests/unit/test_magics.py | 2 +- tests/unit/test_table.py | 2 +- 9 files changed, 174 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 77054542a..bf0f80e22 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -26,7 +26,7 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import pkg_resources +import packaging.version from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -41,31 +41,65 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") -def _verify_bq_storage_version(): - """Verify that a recent enough version of BigQuery Storage extra is installed. +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" - The function assumes that google-cloud-bigquery-storage extra is installed, and - should thus be used in places where this assumption holds. + def __init__(self): + self._installed_version = None - Because `pip` can install an outdated version of this extra despite the constraints - in setup.py, the the calling code can use this helper to verify the version - compatibility at runtime. - """ - from google.cloud import bigquery_storage + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage - installed_version = pkg_resources.parse_version( - getattr(bigquery_storage, "__version__", "legacy") - ) + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) - if installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {installed_version})." - ) - raise LegacyBigQueryStorageError(msg) + return self._installed_version + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + + +BQ_STORAGE_VERSIONS = BQStorageVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 285c0e83c..2ff96da4d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -41,6 +41,7 @@ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema @@ -590,7 +591,14 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + reader = bqstorage_client.read_rows(stream.name) + + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) for page in rowstream.pages: if download_state.done: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index de259abce..8572ba911 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import _verify_bq_storage_version +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -508,7 +508,7 @@ def _ensure_bqstorage_client( return None try: - _verify_bq_storage_version() + BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 765110ae6..2d9c15f50 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1565,7 +1565,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - _helpers._verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index c62947d37..af026ccbe 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -26,11 +26,17 @@ @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class Test_verify_bq_storage_version(unittest.TestCase): +class TestBQStorageVersions(unittest.TestCase): + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.BQStorageVersions() + def _call_fut(self): - from google.cloud.bigquery._helpers import _verify_bq_storage_version + from google.cloud.bigquery import _helpers - return _verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -53,10 +59,35 @@ def test_raises_error_w_unknown_bqstorage_version(self): with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ - error_pattern = r"version found: legacy" + error_pattern = r"version found: 0.0.0" with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): self._call_fut() + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + def test_is_read_session_optional_true(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert versions.is_read_session_optional + + def test_is_read_session_optional_false(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not versions.is_read_session_optional + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index aa87e28f5..0ba671cd9 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,11 +40,14 @@ import pytz from google import api_core +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() except ImportError: # pragma: NO COVER bigquery_storage = None @@ -1311,6 +1314,72 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage_stream_includes_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with(session) + + +@pytest.mark.skipif( + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", +) +def test__download_table_bqstorage_stream_omits_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with() + + @pytest.mark.parametrize( "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", [ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2be8daab6..6b62eb85b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -663,7 +663,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -700,7 +700,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 5e9bf28a9..d030482cc 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -368,7 +368,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b30f16fe0..37650cd27 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1889,7 +1889,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: From 22fd848cae4af1148040e1faa31dd15a4d674687 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 12:02:15 -0500 Subject: [PATCH 274/341] docs: add loading data from Firestore backup sample (#737) Follow-up to https://github.com/googleapis/python-bigquery/pull/736 To be included here: https://cloud.google.com/bigquery/docs/loading-data-cloud-firestore Also * Use `google-cloud-testutils` for cleanup as described in https://github.com/googleapis/python-test-utils/pull/39 --- samples/snippets/conftest.py | 39 +++++-------- samples/snippets/load_table_uri_firestore.py | 55 +++++++++++++++++++ .../snippets/load_table_uri_firestore_test.py | 21 +++++++ samples/snippets/requirements-test.txt | 1 + samples/snippets/test_update_with_dml.py | 4 +- 5 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 samples/snippets/load_table_uri_firestore.py create mode 100644 samples/snippets/load_table_uri_firestore_test.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index cb11eb68f..000e5f85c 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,38 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import random - from google.cloud import bigquery import pytest +import test_utils.prefixer -RESOURCE_PREFIX = "python_bigquery_samples_snippets" -RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" -RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 - - -def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) - random_string = hex(random.randrange(1000000))[2:] - return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" - - -def resource_name_to_date(resource_name: str): - start_date = len(RESOURCE_PREFIX) + 1 - date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets") @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): - if ( - dataset.dataset_id.startswith(RESOURCE_PREFIX) - and resource_name_to_date(dataset.dataset_id) < yesterday - ): + if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True ) @@ -62,7 +42,7 @@ def project_id(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client: bigquery.Client, project_id: str): - dataset_id = resource_prefix() + dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) bigquery_client.create_dataset(dataset) @@ -70,6 +50,17 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture +def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + """Create a new table ID each time, so random_table_id can be used as + target for load jobs. + """ + random_table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id}.{random_table_id}" + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/load_table_uri_firestore.py b/samples/snippets/load_table_uri_firestore.py new file mode 100644 index 000000000..bf9d01349 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_firestore(table_id): + orig_table_id = table_id + # [START bigquery_load_table_gcs_firestore] + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set uri to the path of the kind export metadata + uri = ( + "gs://cloud-samples-data/bigquery/us-states" + "/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states" + "/all_namespaces_kind_us-states.export_metadata" + ) + + # TODO(developer): Set projection_fields to a list of document properties + # to import. Leave unset or set to `None` for all fields. + projection_fields = ["name", "post_abbr"] + + # [END bigquery_load_table_gcs_firestore] + table_id = orig_table_id + + # [START bigquery_load_table_gcs_firestore] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.DATASTORE_BACKUP, + projection_fields=projection_fields, + ) + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_firestore] diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py new file mode 100644 index 000000000..ffa02cdf9 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_table_uri_firestore + + +def test_load_table_uri_firestore(capsys, random_table_id): + load_table_uri_firestore.load_table_uri_firestore(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b0cf76724..9e9d4e40f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,3 @@ +google-cloud-testutils==0.3.0 pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index 3cca7a649..912fd76e2 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -15,13 +15,13 @@ from google.cloud import bigquery import pytest -from conftest import resource_prefix +from conftest import prefixer import update_with_dml @pytest.fixture def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): - table_id = f"{resource_prefix()}_update_with_dml" + table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) From b8b5433898ec881f8da1303614780a660d94733a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 10:00:41 +0200 Subject: [PATCH 275/341] feat: add standard sql table type, update scalar type enums (#777) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add standard sql table type, update scalar type enums Committer: @shollyman PiperOrigin-RevId: 385164907 Source-Link: https://github.com/googleapis/googleapis/commit/9ae82b82bdb634058af4b2bafe53c37b8566f68d Source-Link: https://github.com/googleapis/googleapis-gen/commit/bc1724b0b544bdcd9b5b2f4e3d8676f75adacfdf * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * fix: exclude copying microgenerated '.coveragrc' * fix: add 'INTERVAL'/'JSON' to _SQL_SCALAR_TYPES Co-authored-by: Owl Bot Co-authored-by: Tres Seaver --- google/cloud/bigquery/enums.py | 2 ++ google/cloud/bigquery_v2/__init__.py | 2 ++ google/cloud/bigquery_v2/types/__init__.py | 2 ++ .../cloud/bigquery_v2/types/standard_sql.py | 19 ++++++++++++++++++- owlbot.py | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index ef35dffe0..0da01d665 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -191,9 +191,11 @@ class KeyResultStatementKind: "DATE", "TIME", "DATETIME", + "INTERVAL", "GEOGRAPHY", "NUMERIC", "BIGNUMERIC", + "JSON", ) ) diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index 476bd5747..f9957efa9 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -26,6 +26,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.standard_sql import StandardSqlTableType from .types.table_reference import TableReference __all__ = ( @@ -40,5 +41,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 9c850dca1..83bbb3a54 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -27,6 +27,7 @@ StandardSqlDataType, StandardSqlField, StandardSqlStructType, + StandardSqlTableType, ) from .table_reference import TableReference @@ -42,5 +43,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index b2191a417..7a845fc48 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -18,7 +18,12 @@ __protobuf__ = proto.module( package="google.cloud.bigquery.v2", - manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, + manifest={ + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + }, ) @@ -54,9 +59,11 @@ class TypeKind(proto.Enum): DATE = 10 TIME = 20 DATETIME = 21 + INTERVAL = 26 GEOGRAPHY = 22 NUMERIC = 23 BIGNUMERIC = 24 + JSON = 25 ARRAY = 16 STRUCT = 17 @@ -97,4 +104,14 @@ class StandardSqlStructType(proto.Message): fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) +class StandardSqlTableType(proto.Message): + r"""A table type + Attributes: + columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + The columns in this table type + """ + + columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index 476c5ee5d..09845480a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -70,6 +70,7 @@ library, excludes=[ "*.tar.gz", + ".coveragerc", "docs/index.rst", f"docs/bigquery_{library.name}/*_service.rst", f"docs/bigquery_{library.name}/services.rst", From 8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 19 Jul 2021 22:39:44 +0200 Subject: [PATCH 276/341] feat: add support for user defined Table View Functions (#724) * Add auxiliary classes for TVF routines * Add return_table_type property to Routine * Add system test for TVF routines * Use the generated StandardSqlTableType class * Update docs with new changes * Add missing space in misc. Sphinx directives --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/job/query.py | 14 +-- google/cloud/bigquery/routine/__init__.py | 2 + google/cloud/bigquery/routine/routine.py | 45 ++++++++ google/cloud/bigquery/table.py | 14 +-- tests/system/test_client.py | 79 ++++++++++++++ tests/unit/routine/test_routine.py | 127 ++++++++++++++++++++++ 8 files changed, 270 insertions(+), 14 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 8c38d0c44..8a5bff9a4 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -118,6 +118,7 @@ Routine routine.Routine routine.RoutineArgument routine.RoutineReference + routine.RoutineType Schema ====== diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ced8cefae..222aadcc9 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -85,6 +85,7 @@ from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning @@ -162,6 +163,7 @@ "KeyResultStatementKind", "OperationType", "QueryPriority", + "RoutineType", "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d588e9b5a..2cb7ee28e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1386,12 +1386,12 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: pyarrow.Table @@ -1403,7 +1403,7 @@ def to_arrow( ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( @@ -1452,7 +1452,7 @@ def to_dataframe( :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` for details. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1461,18 +1461,18 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index d1c79b05e..7353073c8 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.routine.routine import Routine from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference +from google.cloud.bigquery.routine.routine import RoutineType __all__ = ( @@ -26,4 +27,5 @@ "Routine", "RoutineArgument", "RoutineReference", + "RoutineType", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index bbc0a7693..a776212c3 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -21,6 +21,21 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types +from google.cloud.bigquery_v2.types import StandardSqlTableType + + +class RoutineType: + """The fine-grained type of the routine. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype + + .. versionadded:: 2.22.0 + """ + + ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED" + SCALAR_FUNCTION = "SCALAR_FUNCTION" + PROCEDURE = "PROCEDURE" + TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION" class Routine(object): @@ -48,6 +63,7 @@ class Routine(object): "modified": "lastModifiedTime", "reference": "routineReference", "return_type": "returnType", + "return_table_type": "returnTableType", "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", @@ -204,6 +220,35 @@ def return_type(self, value): resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource + @property + def return_table_type(self) -> StandardSqlTableType: + """The return type of a Table Valued Function (TVF) routine. + + .. versionadded:: 2.22.0 + """ + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["return_table_type"] + ) + if not resource: + return resource + + output = google.cloud.bigquery_v2.types.StandardSqlTableType() + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) + + @return_table_type.setter + def return_table_type(self, value): + if not value: + resource = None + else: + resource = { + "columns": [json_format.MessageToDict(col._pb) for col in value.columns] + } + + self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource + @property def imported_libraries(self): """List[str]: The path of the imported JavaScript libraries. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2d9c15f50..18d969a3f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1684,7 +1684,7 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 Returns: pyarrow.Table @@ -1695,7 +1695,7 @@ def to_arrow( Raises: ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) @@ -1775,7 +1775,7 @@ def to_dataframe_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. - ..versionadded:: 2.14.0 + .. versionadded:: 2.14.0 Returns: pandas.DataFrame: @@ -1861,7 +1861,7 @@ def to_dataframe( Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1870,13 +1870,13 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 Returns: pandas.DataFrame: @@ -2010,7 +2010,7 @@ def to_dataframe_iterable( ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Args: bqstorage_client: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index cbca73619..ceb62b8cd 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2228,6 +2228,85 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_tvf_routine(self): + from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + INT64 = StandardSqlDataType.TypeKind.INT64 + STRING = StandardSqlDataType.TypeKind.STRING + + client = Config.CLIENT + + dataset = self.temp_dataset(_make_dataset_id("create_tvf_routine")) + routine_ref = dataset.routine("test_tvf_routine") + + routine_body = """ + SELECT int_col, str_col + FROM ( + UNNEST([1, 2, 3]) int_col + JOIN + (SELECT str_col FROM UNNEST(["one", "two", "three"]) str_col) + ON TRUE + ) + WHERE int_col > threshold + """ + + return_table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", type=StandardSqlDataType(type_kind=INT64), + ), + StandardSqlField( + name="str_col", type=StandardSqlDataType(type_kind=STRING), + ), + ] + ) + + routine_args = [ + RoutineArgument( + name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + ) + ] + + routine_def = Routine( + routine_ref, + type_=RoutineType.TABLE_VALUED_FUNCTION, + arguments=routine_args, + return_table_type=return_table_type, + body=routine_body, + ) + + # Create TVF routine. + client.delete_routine(routine_ref, not_found_ok=True) + routine = client.create_routine(routine_def) + + assert routine.body == routine_body + assert routine.return_table_type == return_table_type + assert routine.arguments == routine_args + + # Execute the routine to see if it's working as expected. + query_job = client.query( + f""" + SELECT int_col, str_col + FROM `{routine.reference}`(1) + ORDER BY int_col, str_col ASC + """ + ) + + result_rows = [tuple(row) for row in query_job.result()] + expected = [ + (2, "one"), + (2, "three"), + (2, "two"), + (3, "one"), + (3, "three"), + (3, "two"), + ] + assert result_rows == expected + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 0a59e7c5f..fdaf13324 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -156,12 +156,86 @@ def test_from_api_repr(target_class): assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) + assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" +def test_from_api_repr_tvf_function(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + from google.cloud.bigquery.routine import RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a", + "arguments": [{"name": "a", "dataType": {"typeKind": "INT64"}}], + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "someNewField": "someValue", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + } + actual_routine = target_class.from_api_repr(resource) + + assert actual_routine.project == "my-project" + assert actual_routine.dataset_id == "my_dataset" + assert actual_routine.routine_id == "my_routine" + assert ( + actual_routine.path + == "/projects/my-project/datasets/my_dataset/routines/my_routine" + ) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag == "abcdefg" + assert actual_routine.created == creation_time + assert actual_routine.modified == modified_time + assert actual_routine.arguments == [ + RoutineArgument( + name="a", + data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" + assert actual_routine.language == "SQL" + assert actual_routine.return_type is None + assert actual_routine.return_table_type == StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + ) + assert actual_routine.type_ == RoutineType.TABLE_VALUED_FUNCTION + assert actual_routine._properties["someNewField"] == "someValue" + assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" + + def test_from_api_repr_w_minimal_resource(target_class): from google.cloud.bigquery.routine import RoutineReference @@ -261,6 +335,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["return_type"], {"returnType": {"typeKind": "INT64"}}, ), + ( + { + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > 1", + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["return_table_type"], + { + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + } + }, + ), ( { "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], @@ -361,6 +453,41 @@ def test_set_return_type_w_none(object_under_test): assert object_under_test._properties["returnType"] is None +def test_set_return_table_type_w_none(object_under_test): + object_under_test.return_table_type = None + assert object_under_test.return_table_type is None + assert object_under_test._properties["returnTableType"] is None + + +def test_set_return_table_type_w_not_none(object_under_test): + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ), + StandardSqlField( + name="str_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + ), + ] + ) + + object_under_test.return_table_type = table_type + + assert object_under_test.return_table_type == table_type + assert object_under_test._properties["returnTableType"] == { + "columns": [ + {"name": "int_col", "type": {"typeKind": "INT64"}}, + {"name": "str_col", "type": {"typeKind": "STRING"}}, + ] + } + + def test_set_description_w_none(object_under_test): object_under_test.description = None assert object_under_test.description is None From 5802092bef4cc1627f4568694fd56b6aa16507ff Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 20:58:25 +0000 Subject: [PATCH 277/341] chore: release 2.22.0 (#771) :robot: I have created a release \*beep\* \*boop\* --- ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) ### Features * add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) * add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) * add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) * add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) ### Bug Fixes * avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) ### Dependencies * allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) ### Documentation * add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 25 +++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fba4c517..2439d64b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) + + +### Features + +* add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) +* add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) +* add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) +* add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) + + +### Bug Fixes + +* avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) + + +### Dependencies + +* allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) + + +### Documentation + +* add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 563b0e160..2db0ca518 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.21.0" +__version__ = "2.22.0" From 7a55a7789a5d3f8f5e4f1293e1cdccc374ea03b7 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 20 Jul 2021 02:07:00 -0600 Subject: [PATCH 278/341] chore: add note to preserve >1, <3 version range for google-api-core, google-cloud-core (#784) * fix(deps): pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions * Update setup.py --- setup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 71958ccf9..0ca19b576 100644 --- a/setup.py +++ b/setup.py @@ -30,9 +30,15 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", + # NOTE: Maintainers, please do not require google-api-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-api-core[grpc] >= 1.29.0, <3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 3.0dev", + # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", From d1cbc3817a4b93f61356bd14ba51fb176e5d0269 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Jul 2021 10:07:30 +0200 Subject: [PATCH 279/341] chore(deps): update dependency google-cloud-bigquery to v2.22.0 (#783) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c7aa209ad..d70ac3fa4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b62c84c33..7b4721eac 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From e403721af1373eb1f1a1c7be5b2182e3819ed1f9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Jul 2021 18:59:49 +0200 Subject: [PATCH 280/341] fix: issue a warning if buggy pyarrow is detected (#787) Some pyarrow versions can cause issue when loading data from dataframe. This commit detects if such pyarrow version is installed and warns the user. --- google/cloud/bigquery/client.py | 15 +++++++++++++ tests/unit/test_client.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8572ba911..273cf5f77 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid @@ -34,6 +35,8 @@ try: import pyarrow + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None @@ -118,6 +121,9 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -2609,6 +2615,15 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) if job_config.schema: if parquet_compression == "snappy": # adjust the default value diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6b62eb85b..c1aba9b67 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pytz @@ -7510,6 +7511,42 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): parquet_compression="gzip", ) + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): From e58702967d572e83b4c774278818302594a511b7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 21 Jul 2021 19:37:06 +0200 Subject: [PATCH 281/341] feat: add Samples section to CONTRIBUTING.rst (#785) Source-Link: https://github.com/googleapis/synthtool/commit/52e4e46eff2a0b70e3ff5506a02929d089d077d4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 24 ++++++++++++++++++++++++ samples/geography/noxfile.py | 5 +++-- samples/snippets/noxfile.py | 5 +++-- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cb06536da..d57f74204 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d + digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 102355b3a..2faf5aed3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -177,6 +177,30 @@ Build the docs via: $ nox -s docs +************************* +Samples and code snippets +************************* + +Code samples and snippets live in the `samples/` catalogue. Feel free to +provide more examples, but make sure to write tests for those examples. +Each folder containing example code requires its own `noxfile.py` script +which automates testing. If you decide to create a new folder, you can +base it on the `samples/snippets` folder (providing `noxfile.py` and +the requirements files). + +The tests will run against a real Google Cloud Project, so you should +configure them just like the System Tests. + +- To run sample tests, you can execute:: + + # Run all tests in a folder + $ cd samples/snippets + $ nox -s py-3.8 + + # Run a single sample test + $ cd samples/snippets + $ nox -s py-3.8 -- -k + ******************************************** Note About ``README`` as it pertains to PyPI ******************************************** diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 160fe7286..9fc7f1782 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 160fe7286..9fc7f1782 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) From 46e65a6338b7c59acad895edebb97fd2e841d4a3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jul 2021 08:44:11 -0500 Subject: [PATCH 282/341] chore: release 2.22.1 (#794) Release-As: 2.22.1 From be9b242f2180f5b795dfb3a168a97af1682999fd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jul 2021 10:21:02 -0500 Subject: [PATCH 283/341] docs: add sample to delete job metadata (#798) Planned to be included in https://cloud.google.com/bigquery/docs/managing-jobs --- samples/snippets/conftest.py | 25 ++++++++++++++++ samples/snippets/delete_job.py | 44 +++++++++++++++++++++++++++++ samples/snippets/delete_job_test.py | 33 ++++++++++++++++++++++ tests/system/test_client.py | 17 ----------- 4 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 samples/snippets/delete_job.py create mode 100644 samples/snippets/delete_job_test.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 000e5f85c..74984f902 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -50,6 +50,31 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): + dataset_id = prefixer.create_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + dataset.location = "us-east1" + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture(scope="session") +def table_id_us_east1( + bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str +): + table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}" + table = bigquery.Table( + full_table_id, schema=[bigquery.SchemaField("string_col", "STRING")] + ) + bigquery_client.create_table(table) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + @pytest.fixture def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): """Create a new table ID each time, so random_table_id can be used as diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py new file mode 100644 index 000000000..abed0c90d --- /dev/null +++ b/samples/snippets/delete_job.py @@ -0,0 +1,44 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_job_metadata(job_id: str, location: str): + orig_job_id = job_id + orig_location = location + # [START bigquery_delete_job] + from google.cloud import bigquery + from google.api_core import exceptions + + # TODO(developer): Set the job ID to the ID of the job whose metadata you + # wish to delete. + job_id = "abcd-efgh-ijkl-mnop" + + # TODO(developer): Set the location to the region or multi-region + # containing the job. + location = "us-east1" + + # [END bigquery_delete_job] + job_id = orig_job_id + location = orig_location + + # [START bigquery_delete_job] + client = bigquery.Client() + + client.delete_job_metadata(job_id, location=location) + + try: + client.get_job(job_id, location=location) + except exceptions.NotFound: + print(f"Job metadata for job {location}:{job_id} was deleted.") + # [END bigquery_delete_job] diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py new file mode 100644 index 000000000..c9baa817d --- /dev/null +++ b/samples/snippets/delete_job_test.py @@ -0,0 +1,33 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +import delete_job + + +def test_delete_job_metadata( + capsys, bigquery_client: bigquery.Client, table_id_us_east1: str +): + query_job: bigquery.QueryJob = bigquery_client.query( + f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", + ) + query_job.result() + assert query_job.job_id is not None + + delete_job.delete_job_metadata(query_job.job_id, "us-east1") + + out, _ = capsys.readouterr() + assert "deleted" in out + assert f"us-east1:{query_job.job_id}" in out diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ceb62b8cd..2536af9fc 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -63,7 +63,6 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -506,22 +505,6 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) - def test_delete_job_metadata(self): - dataset_id = _make_dataset_id("us_east1") - self.temp_dataset(dataset_id, location="us-east1") - full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" - table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) - Config.CLIENT.create_table(table) - query_job: bigquery.QueryJob = Config.CLIENT.query( - f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", - ) - query_job.result() - self.assertIsNotNone(Config.CLIENT.get_job(query_job)) - - Config.CLIENT.delete_job_metadata(query_job) - with self.assertRaises(NotFound): - Config.CLIENT.get_job(query_job) - def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" From f0990f2cd27b3a71040d67b4d335f3daef1120cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 22 Jul 2021 17:43:54 +0200 Subject: [PATCH 284/341] chore(deps): update dependency grpcio to v1.39.0 (#796) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7b4721eac..3d9dce718 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.1 +grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From b804373277c1c1baa3370ebfb4783503b7ff360f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 22 Jul 2021 14:36:30 -0400 Subject: [PATCH 285/341] fix: use a larger chunk size when loading data (#799) * The chunk size used for data uploads was too small (1MB). Now it's 100MB. * fix: The chunk size used for data uploads was too small --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 273cf5f77..742ecac2e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -98,7 +98,7 @@ from google.cloud.bigquery.table import RowIterator -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c1aba9b67..535685511 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self): client.schema_to_json(schema_list, fake_file) assert file_content == json.loads(fake_file.getvalue()) + + +def test_upload_chunksize(client): + with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU: + upload = RU.return_value + + upload.finished = False + + def transmit_next_chunk(transport): + upload.finished = True + result = mock.MagicMock() + result.json.return_value = {} + return result + + upload.transmit_next_chunk = transmit_next_chunk + f = io.BytesIO() + client.load_table_from_file(f, "foo.bar") + + chunk_size = RU.call_args_list[0][0][1] + assert chunk_size == 100 * (1 << 20) From f20ee503f395b0443b570efb56c75b0b40d31179 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 22 Jul 2021 14:26:01 -0500 Subject: [PATCH 286/341] chore: release 2.22.1 (#797) * chore: release 2.22.1 * remove misc Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2439d64b0..7dbc5d4da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) + + +### Bug Fixes + +* issue a warning if buggy pyarrow is detected ([#787](https://www.github.com/googleapis/python-bigquery/issues/787)) ([e403721](https://www.github.com/googleapis/python-bigquery/commit/e403721af1373eb1f1a1c7be5b2182e3819ed1f9)) +* use a larger chunk size when loading data ([#799](https://www.github.com/googleapis/python-bigquery/issues/799)) ([b804373](https://www.github.com/googleapis/python-bigquery/commit/b804373277c1c1baa3370ebfb4783503b7ff360f)) + + +### Documentation + +* add Samples section to CONTRIBUTING.rst ([#785](https://www.github.com/googleapis/python-bigquery/issues/785)) ([e587029](https://www.github.com/googleapis/python-bigquery/commit/e58702967d572e83b4c774278818302594a511b7)) +* add sample to delete job metadata ([#798](https://www.github.com/googleapis/python-bigquery/issues/798)) ([be9b242](https://www.github.com/googleapis/python-bigquery/commit/be9b242f2180f5b795dfb3a168a97af1682999fd)) + ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2db0ca518..dbc524478 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.0" +__version__ = "2.22.1" From 7e0e2bafc4c3f98a4246100f504fd78a01a28e7d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Jul 2021 00:48:22 +0200 Subject: [PATCH 287/341] chore(deps): update dependency google-cloud-bigquery to v2.22.1 (#800) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d70ac3fa4..3a83eda64 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3d9dce718..ffa689a9e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From 419d36d6b1887041e5795dbc8fc808890e91ab11 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Jul 2021 16:42:21 +0200 Subject: [PATCH 288/341] fix: retry ChunkedEncodingError by default (#802) --- google/cloud/bigquery/retry.py | 1 + tests/unit/test_retry.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 5e9075fe1..2df4de08b 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -27,6 +27,7 @@ exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, auth_exceptions.TransportError, ) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 0bef1e5e1..6fb7f93fd 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -51,6 +51,10 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_chunked_encoding_error(self): + exc = requests.exceptions.ChunkedEncodingError() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 16af7b0c93f3945af95123f4f9affd55ffa1f98d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 23 Jul 2021 15:32:40 +0000 Subject: [PATCH 289/341] chore: fix kokoro config for samples (#804) Source-Link: https://github.com/googleapis/synthtool/commit/dd05f9d12f134871c9e45282349c9856fbebecdd Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/samples/python3.6/periodic-head.cfg | 2 +- .kokoro/samples/python3.7/periodic-head.cfg | 2 +- .kokoro/samples/python3.8/periodic-head.cfg | 2 +- .kokoro/samples/python3.9/periodic-head.cfg | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d57f74204..9ee60f7e4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 + digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.6/periodic-head.cfg +++ b/.kokoro/samples/python3.6/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.7/periodic-head.cfg +++ b/.kokoro/samples/python3.7/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.8/periodic-head.cfg +++ b/.kokoro/samples/python3.8/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.9/periodic-head.cfg b/.kokoro/samples/python3.9/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.9/periodic-head.cfg +++ b/.kokoro/samples/python3.9/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } From eef6c8e4cc6fbd9c442605447e60242f67d48a7e Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 25 Jul 2021 09:51:55 -0400 Subject: [PATCH 290/341] test: Stop creating extra datasets (#791) --- tests/system/conftest.py | 20 +++- tests/system/test_client.py | 222 +++++++++++++++++------------------- 2 files changed, 122 insertions(+), 120 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4eef60e92..7b389013f 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -14,13 +14,25 @@ import pytest +from google.cloud import bigquery +import test_utils.prefixer + from . import helpers +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + for dataset in bigquery_client.list_datasets(): + if prefixer.should_cleanup(dataset.dataset_id): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + @pytest.fixture(scope="session") def bigquery_client(): - from google.cloud import bigquery - return bigquery.Client() @@ -33,10 +45,10 @@ def bqstorage_client(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client): - dataset_id = f"bqsystem_{helpers.temp_suffix()}" + dataset_id = prefixer.create_prefix() bigquery_client.create_dataset(dataset_id) yield dataset_id - bigquery_client.delete_dataset(dataset_id, delete_contents=True) + bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) @pytest.fixture diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 2536af9fc..baa2b6ad8 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -153,7 +153,6 @@ class Config(object): CLIENT: Optional[bigquery.Client] = None CURSOR = None - DATASET = None def setUpModule(): @@ -163,9 +162,7 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): - Config.DATASET = _make_dataset_id("bq_system_tests") - dataset = Config.CLIENT.create_dataset(Config.DATASET) - self.to_delete = [dataset] + self.to_delete = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() @@ -1605,20 +1602,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - def test_dbapi_create_view(self): - - query = """ - CREATE VIEW {}.dbapi_create_view - AS SELECT name, SUM(number) AS total - FROM `bigquery-public-data.usa_names.usa_1910_2013` - GROUP BY name; - """.format( - Config.DATASET - ) - - Config.CURSOR.execute(query) - self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") - @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2459,104 +2442,6 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - def test_parameterized_types_round_trip(self): - client = Config.CLIENT - table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" - fields = ( - ("n", "NUMERIC"), - ("n9", "NUMERIC(9)"), - ("n92", "NUMERIC(9, 2)"), - ("bn", "BIGNUMERIC"), - ("bn9", "BIGNUMERIC(38)"), - ("bn92", "BIGNUMERIC(38, 22)"), - ("s", "STRING"), - ("s9", "STRING(9)"), - ("b", "BYTES"), - ("b9", "BYTES(9)"), - ) - self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) - client.query( - "create table {} ({})".format( - table_id, ", ".join(" ".join(f) for f in fields) - ) - ).result() - table = client.get_table(table_id) - table_id2 = table_id + "2" - self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) - client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) - table2 = client.get_table(table_id2) - - self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) - - def test_table_snapshots(self): - from google.cloud.bigquery import CopyJobConfig - from google.cloud.bigquery import OperationType - - client = Config.CLIENT - - source_table_path = f"{client.project}.{Config.DATASET}.test_table" - snapshot_table_path = f"{source_table_path}_snapshot" - - # Create the table before loading so that the column order is predictable. - schema = [ - bigquery.SchemaField("foo", "INTEGER"), - bigquery.SchemaField("bar", "STRING"), - ] - source_table = helpers.retry_403(Config.CLIENT.create_table)( - Table(source_table_path, schema=schema) - ) - self.to_delete.insert(0, source_table) - - # Populate the table with initial data. - rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] - load_job = Config.CLIENT.load_table_from_json(rows, source_table) - load_job.result() - - # Now create a snapshot before modifying the original table data. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.SNAPSHOT - - copy_job = client.copy_table( - sources=source_table_path, - destination=snapshot_table_path, - job_config=copy_config, - ) - copy_job.result() - - snapshot_table = client.get_table(snapshot_table_path) - self.to_delete.insert(0, snapshot_table) - - # Modify data in original table. - sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' - query_job = client.query(sql) - query_job.result() - - # List rows from the source table and compare them to rows from the snapshot. - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two"), (3, "three")] - - rows_iter = client.list_rows(snapshot_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - - # Now restore the table from the snapshot and it should again contain the old - # set of rows. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.RESTORE - copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - - copy_job = client.copy_table( - sources=snapshot_table_path, - destination=source_table_path, - job_config=copy_config, - ) - copy_job.result() - - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -2587,3 +2472,108 @@ def _table_exists(t): return True except NotFound: return False + + +def test_dbapi_create_view(dataset_id): + + query = f""" + CREATE VIEW {dataset_id}.dbapi_create_view + AS SELECT name, SUM(number) AS total + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name; + """ + + Config.CURSOR.execute(query) + assert Config.CURSOR.rowcount == 0, "expected 0 rows" + + +def test_parameterized_types_round_trip(dataset_id): + client = Config.CLIENT + table_id = f"{dataset_id}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + client.query( + "create table {} ({})".format(table_id, ", ".join(" ".join(f) for f in fields)) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + assert tuple(s._key()[:2] for s in table2.schema) == fields + + +def test_table_snapshots(dataset_id): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{dataset_id}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] From da87fd921cc8067b187d7985c978aac8eb58d107 Mon Sep 17 00:00:00 2001 From: mgorsk1 Date: Mon, 26 Jul 2021 19:44:38 +0200 Subject: [PATCH 291/341] docs: correct docs for `LoadJobConfig.destination_table_description` (#810) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #811. --- google/cloud/bigquery/job/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index f1b045412..aee055c1c 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -170,7 +170,7 @@ def destination_encryption_configuration(self, value): @property def destination_table_description(self): - """Optional[str]: Name given to destination table. + """Optional[str]: Description of the destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description From c293e3c914cd0cfe3da34b99330fd6d87a5f534e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 11:21:21 -0500 Subject: [PATCH 292/341] tests: add system tests for `to_arrow` with extreme values (#813) * tests: add system tests for `to_arrow` with extreme values * fix bad merge * revert pandas tests * revert pandas tests * fix link to decimal types Co-authored-by: Peter Lamut * use north and south pole as extreme geography points * add another row of extreme values * base64 encode bytes columns Co-authored-by: Peter Lamut --- google/cloud/bigquery/_pandas_helpers.py | 2 + tests/data/scalars.jsonl | 2 + tests/data/scalars_extreme.jsonl | 5 ++ tests/data/scalars_schema.json | 62 +++++++++++++++++ tests/system/conftest.py | 48 ++++++++++++- tests/system/test_arrow.py | 88 ++++++++++++++++++++++++ 6 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 tests/data/scalars.jsonl create mode 100644 tests/data/scalars_extreme.jsonl create mode 100644 tests/data/scalars_schema.json create mode 100644 tests/system/test_arrow.py diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 2ff96da4d..b381fa5f7 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -93,6 +93,8 @@ def pyarrow_numeric(): def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types return pyarrow.decimal256(76, 38) diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl new file mode 100644 index 000000000..4419a6e9a --- /dev/null +++ b/tests/data/scalars.jsonl @@ -0,0 +1,2 @@ +{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl new file mode 100644 index 000000000..ceccd8dbc --- /dev/null +++ b/tests/data/scalars_extreme.jsonl @@ -0,0 +1,5 @@ +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json new file mode 100644 index 000000000..00bd150fd --- /dev/null +++ b/tests/data/scalars_schema.json @@ -0,0 +1,62 @@ +[ + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" + }, + { + "mode": "NULLABLE", + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "float64_col", + "type": "FLOAT" + }, + { + "mode": "NULLABLE", + "name": "datetime_col", + "type": "DATETIME" + }, + { + "mode": "NULLABLE", + "name": "bignumeric_col", + "type": "BIGNUMERIC" + }, + { + "mode": "NULLABLE", + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "NULLABLE", + "name": "geography_col", + "type": "GEOGRAPHY" + }, + { + "mode": "NULLABLE", + "name": "date_col", + "type": "DATE" + }, + { + "mode": "NULLABLE", + "name": "string_col", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "bool_col", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "bytes_col", + "type": "BYTES" + }, + { + "mode": "NULLABLE", + "name": "int64_col", + "type": "INTEGER" + } +] diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 7b389013f..cc2c2a4dc 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +import pathlib -from google.cloud import bigquery +import pytest import test_utils.prefixer +from google.cloud import bigquery +from google.cloud.bigquery import enums from . import helpers + prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") +DATA_DIR = pathlib.Path(__file__).parent.parent / "data" + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): @@ -36,6 +41,11 @@ def bigquery_client(): return bigquery.Client() +@pytest.fixture(scope="session") +def project_id(bigquery_client: bigquery.Client): + return bigquery_client.project + + @pytest.fixture(scope="session") def bqstorage_client(bigquery_client): from google.cloud import bigquery_storage @@ -54,3 +64,37 @@ def dataset_id(bigquery_client): @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars" + with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) + + +@pytest.fixture(scope="session") +def scalars_extreme_table( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" + with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py new file mode 100644 index 000000000..f97488e39 --- /dev/null +++ b/tests/system/test_arrow.py @@ -0,0 +1,88 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Arrow connector.""" + +import pytest + +pyarrow = pytest.importorskip( + "pyarrow", minversion="3.0.0" +) # Needs decimal256 for BIGNUMERIC columns. + + +@pytest.mark.parametrize( + ("max_results", "scalars_table_name"), + ( + (None, "scalars_table"), # Use BQ Storage API. + (10, "scalars_table"), # Use REST API. + (None, "scalars_extreme_table"), # Use BQ Storage API. + (10, "scalars_extreme_table"), # Use REST API. + ), +) +def test_list_rows_nullable_scalars_dtypes( + bigquery_client, + scalars_table, + scalars_extreme_table, + max_results, + scalars_table_name, +): + table_id = scalars_table + if scalars_table_name == "scalars_extreme_table": + table_id = scalars_extreme_table + arrow_table = bigquery_client.list_rows( + table_id, max_results=max_results, + ).to_arrow() + + schema = arrow_table.schema + bignumeric_type = schema.field("bignumeric_col").type + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + assert bignumeric_type.precision in {76, 77} + assert bignumeric_type.scale == 38 + + bool_type = schema.field("bool_col").type + assert bool_type.equals(pyarrow.bool_()) + + bytes_type = schema.field("bytes_col").type + assert bytes_type.equals(pyarrow.binary()) + + date_type = schema.field("date_col").type + assert date_type.equals(pyarrow.date32()) + + datetime_type = schema.field("datetime_col").type + assert datetime_type.unit == "us" + assert datetime_type.tz is None + + float64_type = schema.field("float64_col").type + assert float64_type.equals(pyarrow.float64()) + + geography_type = schema.field("geography_col").type + assert geography_type.equals(pyarrow.string()) + + int64_type = schema.field("int64_col").type + assert int64_type.equals(pyarrow.int64()) + + numeric_type = schema.field("numeric_col").type + assert numeric_type.precision == 38 + assert numeric_type.scale == 9 + + string_type = schema.field("string_col").type + assert string_type.equals(pyarrow.string()) + + time_type = schema.field("time_col").type + assert time_type.equals(pyarrow.time64("us")) + + timestamp_type = schema.field("timestamp_col").type + assert timestamp_type.unit == "us" + assert timestamp_type.tz is not None From 3b70891135f5fe32dcd12210ff4faa51ac53742d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 13:04:10 -0500 Subject: [PATCH 293/341] chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name --- .github/sync-repo-settings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index b18fb9c29..2697f214c 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,7 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: master +- pattern: '{master,v3}' requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 3c1be149e76b1d1d8879fdcf0924ddb1c1839e94 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 20:08:37 +0200 Subject: [PATCH 294/341] fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. --- google/cloud/bigquery/table.py | 30 +++++-- tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++-- 2 files changed, 179 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 18d969a3f..daade1ac6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1552,11 +1552,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False if self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) return False try: @@ -1604,6 +1599,25 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows + def _maybe_warn_max_results( + self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + ): + """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. + + This helper method should be used directly in the relevant top-level public + methods, so that the warning is issued for the correct line in user code. + + Args: + bqstorage_client: + The BigQuery Storage client intended to use for downloading result rows. + """ + if bqstorage_client is not None and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=3, + ) + def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1700,6 +1714,8 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1790,6 +1806,8 @@ def to_dataframe_iterable( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1896,6 +1914,8 @@ def to_dataframe( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..4b1fd833b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import time +import types import unittest import warnings @@ -1862,6 +1863,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_max_results_set(self): + iterator = self._make_one( + max_results=10, first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached @@ -2105,7 +2115,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_arrow_max_results_w_create_bqstorage_warning(self): + def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2119,6 +2129,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2129,7 +2140,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_arrow(create_bqstorage_client=True) + row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2139,6 +2150,49 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -2372,7 +2426,6 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2415,7 +2468,6 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2527,6 +2579,61 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=iterator_schema, + max_results=25, + ) + + with warnings.catch_warnings(record=True) as warned: + dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + + # Was a warning emitted? + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + assert len(matches) == 1, "User warning was not emitted." + assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel" + + # Basic check of what we got as a result. + dataframes = list(dfs) + assert len(dataframes) == 2 + assert isinstance(dataframes[0], pandas.DataFrame) + assert isinstance(dataframes[1], pandas.DataFrame) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2926,7 +3033,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2940,6 +3047,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2950,7 +3058,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_dataframe(create_bqstorage_client=True) + row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2960,6 +3068,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") From fe7a902e8b3e723ace335c9b499aea6d180a025b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:14:09 +0000 Subject: [PATCH 295/341] feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 --- google/cloud/bigquery_v2/types/model.py | 104 ++++++++++++++++-- .../bigquery_v2/types/table_reference.py | 12 ++ 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 17e101d25..706418401 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -96,6 +96,8 @@ class Model(proto.Message): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. """ class ModelType(proto.Enum): @@ -113,6 +115,7 @@ class ModelType(proto.Enum): ARIMA = 11 AUTOML_REGRESSOR = 12 AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum): WEEKLY = 5 DAILY = 6 HOURLY = 7 + PER_MINUTE = 8 class HolidayRegion(proto.Enum): r"""Type of supported holiday regions for time series forecasting @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message): median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. + R^2 score. This corresponds to r2_score in ML.EVALUATE. """ mean_absolute_error = proto.Field( @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message): Mean of squared distances between each sample to its cluster centroid. clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - [Beta] Information for all clusters. + Information for all clusters. """ class Cluster(proto.Message): @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=3,) time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", @@ -901,7 +934,7 @@ class TrainingRun(proto.Message): """ class TrainingOptions(proto.Message): - r""" + r"""Options used in model training. Attributes: max_iterations (int): The maximum number of iterations in training. @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message): num_clusters (int): Number of clusters for clustering models. model_uri (str): - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message): If a valid value is specified, then holiday effects modeling is enabled. time_series_id_column (str): - The id column that will be used to indicate - different time series to forecast in parallel. + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. horizon (int): The number of periods ahead that need to be forecasted. @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message): output feature name is A.b. auto_arima_max_order (int): The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. """ max_iterations = proto.Field(proto.INT64, number=1,) @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message): proto.ENUM, number=42, enum="Model.HolidayRegion", ) time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) horizon = proto.Field(proto.INT64, number=44,) preserve_input_structs = proto.Field(proto.BOOL, number=45,) auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=4,) time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) arima_model_info = proto.RepeatedField( proto.MESSAGE, @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message): label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) + best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index a0a8ee4c9..d56e5b09f 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -36,11 +36,23 @@ class TableReference(proto.Message): maximum length is 1,024 characters. Certain operations allow suffixing of the table ID with a partition decorator, such as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. """ project_id = proto.Field(proto.STRING, number=1,) dataset_id = proto.Field(proto.STRING, number=2,) table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) __all__ = tuple(sorted(__protobuf__.manifest)) From 02bbdaebb40be771124d397cb45545f1bf697548 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:50:04 -0500 Subject: [PATCH 296/341] chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbc5d4da..966a8744a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) + + +### Features + +* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b)) + + +### Bug Fixes + +* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94)) +* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11)) + + +### Documentation + +* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) + ### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index dbc524478..416bf20ed 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.1" +__version__ = "2.23.0" From 42b66d34b979c87cc98b8984a8abe74edda753ac Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 16:30:44 +0200 Subject: [PATCH 297/341] chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3a83eda64..0f9c3a2e3 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ffa689a9e..81ef4df2f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d9378af13add879118a1d004529b811f72c325d6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 28 Jul 2021 17:18:18 +0200 Subject: [PATCH 298/341] fix: `insert_rows()` accepts float column values as strings again (#824) --- google/cloud/bigquery/_helpers.py | 12 +++++++----- tests/unit/test__helpers.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..0a1f71444 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +from typing import Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -338,14 +339,15 @@ def _int_to_json(value): return value -def _float_to_json(value): +def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None - elif math.isnan(value) or math.isinf(value): - return str(value) - else: - return float(value) + + if isinstance(value, str): + value = float(value) + + return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..f8d00e67d 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -690,21 +690,45 @@ def _call_fut(self, value): def test_w_none(self): self.assertEqual(self._call_fut(None), None) + def test_w_non_numeric(self): + with self.assertRaises(TypeError): + self._call_fut(object()) + + def test_w_integer(self): + result = self._call_fut(123) + self.assertIsInstance(result, float) + self.assertEqual(result, 123.0) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_float_as_string(self): + self.assertEqual(self._call_fut("1.23"), 1.23) + def test_w_nan(self): result = self._call_fut(float("nan")) self.assertEqual(result.lower(), "nan") + def test_w_nan_as_string(self): + result = self._call_fut("NaN") + self.assertEqual(result.lower(), "nan") + def test_w_infinity(self): result = self._call_fut(float("inf")) self.assertEqual(result.lower(), "inf") + def test_w_infinity_as_string(self): + result = self._call_fut("inf") + self.assertEqual(result.lower(), "inf") + def test_w_negative_infinity(self): result = self._call_fut(float("-inf")) self.assertEqual(result.lower(), "-inf") + def test_w_negative_infinity_as_string(self): + result = self._call_fut("-inf") + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From a505440e810d377dbb97e33412580089d67db9ba Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 28 Jul 2021 10:45:37 -0500 Subject: [PATCH 299/341] chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 966a8744a..be4eab769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) + + +### Bug Fixes + +* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6)) + ## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 416bf20ed..0195d572c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.23.1" From c541c69355cd4c3f37576b4f22955a1f8ebc82f0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 13:03:13 -0500 Subject: [PATCH 300/341] chore: add second protection rule for v3 branch (#828) --- .github/sync-repo-settings.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 2697f214c..cc69b2551 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,16 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: '{master,v3}' +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' +- pattern: v3 requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 48e8a3535a13abe97ccc76e1fa42ca3a179ba496 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 21:43:43 +0200 Subject: [PATCH 301/341] chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0f9c3a2e3..6f6e670ab 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 81ef4df2f..dd36b5fe4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d8c25ac139d53d0e689ee77ba46560dc63b4d9fa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 03:59:03 -0500 Subject: [PATCH 302/341] test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) --- tests/system/test_pandas.py | 40 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..821b375e1 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -21,6 +21,7 @@ import io import operator +import google.api_core.retry import pkg_resources import pytest import pytz @@ -41,6 +42,10 @@ PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") +class MissingDataError(Exception): + pass + + def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): """Test that a DataFrame with dtypes that map well to BigQuery types can be uploaded without specifying a schema. @@ -666,19 +671,6 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): ) for errors in chunk_errors: assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - bigquery_client.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. @@ -686,7 +678,27 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): for data_row in dataframe.itertuples(index=False) ] - assert len(row_tuples) == len(expected) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer "within a few seconds". + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(MissingDataError) + ) + def get_rows(): + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + if len(rows) != len(expected): + raise MissingDataError() + return rows + + rows = get_rows() + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] for row, expected_row in zip(row_tuples, expected): assert ( From 8149d9e3116e6f5340b9a15eb2c46deaaa24920b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:02 +0200 Subject: [PATCH 303/341] chore(deps): update dependency pyarrow to v5 (#834) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dd36b5fe4..73badd1f3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.1 +pyarrow==5.0.0 pytz==2021.1 From b9349adb2b54e26a45dbb69c10a948f5fc015a3c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:38 +0200 Subject: [PATCH 304/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f6e670ab..eca0275a5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 73badd1f3..8f4ea0406 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.4 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From 80e3a61c60419fb19b70b664c6415cd01ba82f5b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 29 Jul 2021 16:42:35 +0200 Subject: [PATCH 305/341] deps: expand pyarrow pins to support 5.x releases (#833) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0ca19b576..e9deaf117 100644 --- a/setup.py +++ b/setup.py @@ -54,10 +54,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", + "pyarrow >= 1.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", From 40ef77f376db0db9be23de1a3657be9571f5b48f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Jul 2021 10:04:05 -0500 Subject: [PATCH 306/341] chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be4eab769..0c08e7910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) + + +### Dependencies + +* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) + ### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0195d572c..0460e7bb9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.1" +__version__ = "2.23.2" From 55687b89cc5ab04d1ff5ffeb31e6a4bf3b9eff79 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 19:57:59 +0200 Subject: [PATCH 307/341] chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8f4ea0406..d7a99a8bd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.2 -google-auth-oauthlib==0.4.4 +google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 85ce81cfd2e7199fa9016065c7329acb6079528c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 21:36:10 +0200 Subject: [PATCH 308/341] chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index eca0275a5..5aa967b24 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d7a99a8bd..4f2eaf90b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 20df24b70e8934196200d0335c7f5afbdd08ea37 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 Aug 2021 03:14:34 +0200 Subject: [PATCH 309/341] chore(deps): update dependency google-cloud-testutils to v1 (#845) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9e9d4e40f..b8dee50d0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==0.3.0 +google-cloud-testutils==1.0.0 pytest==6.2.4 mock==4.0.3 From 7016f69b6064be101a359bc093ea74fc2a305ac7 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 2 Aug 2021 19:20:21 -0600 Subject: [PATCH 310/341] chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 --- .github/sync-repo-settings.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index cc69b2551..8634a3043 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -4,6 +4,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' @@ -13,6 +15,8 @@ branchProtectionRules: - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From cf0b0d862e01e9309407b2ac1a48f0bfe23d520d Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Thu, 5 Aug 2021 08:59:15 -0600 Subject: [PATCH 311/341] chore: add api-bigquery as a samples owner (#852) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae570eb01..76112476b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners From 30770fd0575fbd5aaa70c14196a4cc54627aecd2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Aug 2021 12:14:24 -0500 Subject: [PATCH 312/341] fix: increase default retry deadline to 10 minutes (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 --- google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 2df4de08b..bab28aacb 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -47,7 +47,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, From e2cbcaa75a5da2bcd520d9116ead90b02d7326fd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Aug 2021 22:34:42 +0200 Subject: [PATCH 313/341] process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76112476b..6763f258c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python From 9694a4dd1544e06209d091d9a36d086ea794b3b0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:24:20 -0500 Subject: [PATCH 314/341] chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c08e7910..856f1ecd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) + + +### Bug Fixes + +* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) + ### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0460e7bb9..df992a051 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.2" +__version__ = "2.23.3" From 9c6614f939604d3ac99b2945c802df277b629d1b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 Aug 2021 20:10:11 +0200 Subject: [PATCH 315/341] chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5aa967b24..d55d0f254 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4f2eaf90b..69f537de4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 7f7b1a808d50558772a0deb534ca654da65d629e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 10 Aug 2021 19:21:41 +0200 Subject: [PATCH 316/341] feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 ++ google/cloud/bigquery/job/__init__.py | 2 ++ google/cloud/bigquery/job/base.py | 29 +++++++++++++++++++++++ tests/system/test_client.py | 34 +++++++++++++++++++++++++++ tests/unit/job/helpers.py | 1 + tests/unit/job/test_base.py | 14 +++++++++++ tests/unit/job/test_query.py | 29 +++++++++++++++++++++++ 8 files changed, 112 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index 8a5bff9a4..5ac596370 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -68,6 +68,7 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption + job.TransactionInfo Dataset diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..a7a0da3dd 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob +from google.cloud.bigquery.job import TransactionInfo from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -149,6 +150,7 @@ "GoogleSheetsOptions", "ParquetOptions", "ScriptOptions", + "TransactionInfo", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4c16d0e20..f51311b0b 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import TransactionInfo from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig @@ -81,5 +82,6 @@ "QueryPriority", "SchemaUpdateOption", "SourceFormat", + "TransactionInfo", "WriteDisposition", ] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 20ad81c0b..e5fc592a6 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -19,6 +19,7 @@ import http import threading import typing +from typing import Dict, Optional from google.api_core import exceptions import google.api_core.future.polling @@ -88,6 +89,22 @@ def _error_result_to_exception(error_result): ) +class TransactionInfo(typing.NamedTuple): + """[Alpha] Information of a multi-statement transaction. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo + + .. versionadded:: 2.24.0 + """ + + transaction_id: str + """Output only. ID of the transaction.""" + + @classmethod + def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo": + return cls(transaction_info["transactionId"]) + + class _JobReference(object): """A reference to a job. @@ -336,6 +353,18 @@ def reservation_usage(self): for usage in usage_stats_raw ] + @property + def transaction_info(self) -> Optional[TransactionInfo]: + """Information of the multi-statement transaction if this job is part of one. + + .. versionadded:: 2.24.0 + """ + info = self._properties.get("statistics", {}).get("transactionInfo") + if info is None: + return None + else: + return TransactionInfo.from_api_repr(info) + @property def error_result(self): """Error information about the job as a whole. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..f540611a6 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1557,6 +1557,40 @@ def test_dml_statistics(self): assert query_job.dml_stats.updated_row_count == 0 assert query_job.dml_stats.deleted_row_count == 3 + def test_transaction_info(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics" + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + BEGIN TRANSACTION; + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + COMMIT TRANSACTION; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + # Transaction ID set by the server should be accessible + assert query_job.transaction_info is not None + assert query_job.transaction_info.transaction_id != "" + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index ea071c5ac..c792214e7 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job): self.assertIsNone(job.created) self.assertIsNone(job.started) self.assertIsNone(job.ended) + self.assertIsNone(job.transaction_info) # derived from resource['status'] self.assertIsNone(job.error_result) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 405ad6ee5..0ac1d05b5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -227,6 +227,20 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_transaction_info(self): + from google.cloud.bigquery.job.base import TransactionInfo + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + assert job.transaction_info is None + + statistics = job._properties["statistics"] = {} + assert job.transaction_info is None + + statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"} + assert isinstance(job.transaction_info, TransactionInfo) + assert job.transaction_info.transaction_id == "123-abc-xyz" + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 482f7f3af..d41370520 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -128,6 +128,18 @@ def _verify_dml_stats_resource_properties(self, job, resource): else: assert job.dml_stats is None + def _verify_transaction_info_resource_properties(self, job, resource): + resource_stats = resource.get("statistics", {}) + + if "transactionInfo" in resource_stats: + resource_transaction_info = resource_stats["transactionInfo"] + job_transaction_info = job.transaction_info + assert job_transaction_info.transaction_id == resource_transaction_info.get( + "transactionId" + ) + else: + assert job.transaction_info is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -137,6 +149,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) self._verify_dml_stats_resource_properties(job, resource) + self._verify_transaction_info_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -325,6 +338,22 @@ def test_from_api_repr_with_dml_stats(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_transaction_info(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}}, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption From 443b8ab28c19bdd0bd3cad39db33cb7bc8ad8741 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 10 Aug 2021 20:02:10 +0200 Subject: [PATCH 317/341] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d55d0f254..d3e599101 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 69f537de4..1545ed96e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From aee814c6a48758325609b6fdfc35e2378461786e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 12:29:39 +0200 Subject: [PATCH 318/341] chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 5 ++++- samples/snippets/noxfile.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9ee60f7e4..649877dc4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # From c1a3d4435739a21d25aa154145e36d3a7c42eeb6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 11 Aug 2021 16:28:43 +0200 Subject: [PATCH 319/341] feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. --- google/cloud/bigquery/table.py | 42 +++++- tests/unit/test_table.py | 225 ++++++++++++++++++++++++++++++--- 2 files changed, 244 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index daade1ac6..d23885ebf 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -255,9 +255,16 @@ def _key(self): return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): - if not isinstance(other, TableReference): + if isinstance(other, (Table, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + elif isinstance(other, TableReference): + return self._key() == other._key() + else: return NotImplemented - return self._key() == other._key() def __ne__(self, other): return not self == other @@ -1011,6 +1018,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + def __eq__(self, other): + if isinstance(other, Table): + return ( + self._properties["tableReference"] + == other._properties["tableReference"] + ) + elif isinstance(other, (TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def __repr__(self): return "Table({})".format(repr(self.reference)) @@ -1229,6 +1254,19 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if isinstance(other, (Table, TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4b1fd833b..a5badc66c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -115,8 +115,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -124,8 +122,6 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -137,7 +133,6 @@ def test_to_api_repr(self): ) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference dataset_ref = DatasetReference("project_1", "dataset_1") @@ -204,8 +199,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset_ref, "table_1") other = object() @@ -213,8 +206,6 @@ def test___eq___wrong_type(self): self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_2", "dataset_1") table = self._make_one(dataset, "table_1") @@ -222,8 +213,6 @@ def test___eq___project_mismatch(self): self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_1", "dataset_2") table = self._make_one(dataset, "table_1") @@ -231,24 +220,18 @@ def test___eq___dataset_mismatch(self): self.assertNotEqual(table, other) def test___eq___table_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -257,8 +240,6 @@ def test___hash__set_equality(self): self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -294,8 +275,6 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): - from google.cloud.bigquery.dataset import DatasetReference - if len(args) == 0: dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -581,6 +560,68 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") + def test__eq__wrong_type(self): + table = self._make_one("project_foo.dataset_bar.table_baz") + + class TableWannabe: + pass + + not_a_table = TableWannabe() + not_a_table._properties = table._properties + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table_basic(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + assert table_1 == table_2 + + def test__eq__same_table_multiple_properties(self): + from google.cloud.bigquery import SchemaField + + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.require_partition_filter = True + table_1.labels = {"first": "one", "second": "two"} + + table_1.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.require_partition_filter = True + table_2.labels = {"first": "one", "second": "two"} + table_2.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is table baz" + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.description = "This is also table baz" + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") + + assert table_1 != table_2 + + def test_hashable(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is a table" + + table_1b = self._make_one("project_foo.dataset_bar.table_baz") + table_1b.description = "Metadata is irrelevant for hashes" + + assert hash(table_1) == hash(table_1b) + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1543,6 +1584,148 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) + def test__eq__wrong_type(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table = self._make_one(resource) + + class FakeTableListItem: + project = "project_foo" + dataset_id = "dataset_bar" + table_id = "table_baz" + + not_a_table = FakeTableListItem() + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource) + table_2 = self._make_one(resource) + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_ref_resource = { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + + resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"} + table_1 = self._make_one(resource_1) + + resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"} + table_2 = self._make_one(resource_2) + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + resource_1 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource_1) + + resource_2 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_quux", + } + } + table_2 = self._make_one(resource_2) + + assert table_1 != table_2 + + def test_hashable(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_item = self._make_one(resource) + table_item_2 = self._make_one(resource) + + assert hash(table_item) == hash(table_item_2) + + +class TestTableClassesInterchangeability: + @staticmethod + def _make_table(*args, **kwargs): + from google.cloud.bigquery.table import Table + + return Table(*args, **kwargs) + + @staticmethod + def _make_table_ref(*args, **kwargs): + from google.cloud.bigquery.table import TableReference + + return TableReference(*args, **kwargs) + + @staticmethod + def _make_table_list_item(*args, **kwargs): + from google.cloud.bigquery.table import TableListItem + + return TableListItem(*args, **kwargs) + + def test_table_eq_table_ref(self): + + table = self._make_table("project_foo.dataset_bar.table_baz") + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + + assert table == table_ref + assert table_ref == table + + def test_table_eq_table_list_item(self): + table = self._make_table("project_foo.dataset_bar.table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table == table_list_item + assert table_list_item == table + + def test_table_ref_eq_table_list_item(self): + + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table_ref == table_list_item + assert table_list_item == table_ref + class TestSnapshotDefinition: @staticmethod From 93d15e2e5405c2cc6d158c4e5737361344193dbc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 10:12:23 -0500 Subject: [PATCH 320/341] feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- docs/conf.py | 1 + docs/reference.rst | 1 + google/cloud/bigquery/enums.py | 24 +++++++++---------- google/cloud/bigquery/query.py | 42 ++++++++++++++++++++++++---------- tests/unit/test_query.py | 13 +++++++++++ 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/reference.rst b/docs/reference.rst index 5ac596370..d8738e67b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -138,6 +138,7 @@ Query query.ArrayQueryParameter query.ScalarQueryParameter + query.ScalarQueryParameterType query.StructQueryParameter query.UDFResource diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 0da01d665..d67cebd4c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -259,23 +259,23 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index d1e9a45a5..1f449f189 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,7 +16,9 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -24,6 +26,11 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +_SCALAR_VALUE_TYPE = Optional[ + Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] +] + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. @@ -325,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. Args: - name (Optional[str]): + name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_: + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: _SCALAR_VALUE_TYPE, + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod - def positional(cls, type_: str, value) -> "ScalarQueryParameter": + def positional( + cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE + ) -> "ScalarQueryParameter": """Factory for positional paramater. Args: - type_ (str): + type_: Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. Returns: diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 9483fe8dd..69a6772e5 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import unittest import mock @@ -430,6 +431,18 @@ def test_positional(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_ctor_w_scalar_query_parameter_type(self): + from google.cloud.bigquery import enums + + param = self._make_one( + name="foo", + type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + value=decimal.Decimal("123.456"), + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "BIGNUMERIC") + self.assertEqual(param.value, decimal.Decimal("123.456")) + def test_from_api_repr_w_name(self): RESOURCE = { "name": "foo", From 519d99c20e7d1101f76981f3de036fdf3c7a4ecc Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 11 Aug 2021 14:24:28 -0400 Subject: [PATCH 321/341] feat: retry failed query jobs in `result()` (#837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). --- google/cloud/bigquery/client.py | 110 +++++++++---- google/cloud/bigquery/job/query.py | 84 ++++++++-- google/cloud/bigquery/retry.py | 20 +++ tests/system/test_job_retry.py | 72 +++++++++ tests/unit/test_job_retry.py | 247 +++++++++++++++++++++++++++++ tests/unit/test_retry.py | 24 +++ 6 files changed, 518 insertions(+), 39 deletions(-) create mode 100644 tests/system/test_job_retry.py create mode 100644 tests/unit/test_job_retry.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 742ecac2e..8142c59cd 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -86,7 +86,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -3163,6 +3163,7 @@ def query( project: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3192,21 +3193,52 @@ def query( Project ID of the project of where to run the job. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` is + provided, then the job returned by the query will not + be retryable, and an exception will be raised if a + non-``None`` (and non-default) value for ``job_retry`` + is also provided. + + Note that errors aren't detected until ``result()`` is + called on the job returned. The ``job_retry`` + specified here becomes the default ``job_retry`` for + ``result()``, where it can also be specified. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class, or if both ``job_id`` and non-``None`` non-default + ``job_retry`` are provided. """ job_id_given = job_id is not None - job_id = _make_job_id(job_id, job_id_prefix) + if ( + job_id_given + and job_retry is not None + and job_retry is not DEFAULT_JOB_RETRY + ): + raise TypeError( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ) + + job_id_save = job_id if project is None: project = self.project @@ -3214,8 +3246,6 @@ def query( if location is None: location = self.location - job_config = copy.deepcopy(job_config) - if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -3225,6 +3255,8 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence + # + # Note that _fill_from_default doesn't mutate the receiver job_config = job_config._fill_from_default( self._default_query_job_config ) @@ -3233,34 +3265,54 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = copy.deepcopy(self._default_query_job_config) + job_config = self._default_query_job_config - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) + # Note that we haven't modified the original job_config (or + # _default_query_job_config) up to this point. + job_config_save = job_config - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = _make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job else: return query_job - else: - return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future def insert_rows( self, diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2cb7ee28e..3ab47b0f9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -36,7 +36,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning @@ -1260,6 +1260,7 @@ def result( retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, start_index: int = None, + job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. @@ -1270,9 +1271,13 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. If the job state is - ``DONE``, retrying is aborted early even if the results are not - available, as this will not change anymore. + How to retry the call that retrieves rows. This only + applies to making RPC calls. It isn't used to retry + failed jobs. This has a reasonable default that + should only be overridden with care. If the job state + is ``DONE``, retrying is aborted early even if the + results are not available, as this will not change + anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1280,6 +1285,16 @@ def result( applies to each individual request. start_index (Optional[int]): The zero-based index of the starting row to read. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` was + provided to the query that created this job, then the + job returned by the query will not be retryable, and + an exception will be raised if non-``None`` + non-default ``job_retry`` is also provided. Returns: google.cloud.bigquery.table.RowIterator: @@ -1295,17 +1310,66 @@ def result( Raises: google.cloud.exceptions.GoogleAPICallError: - If the job failed. + If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. + TypeError: + If Non-``None`` and non-default ``job_retry`` is + provided and the job is not retryable. """ try: - super(QueryJob, self).result(retry=retry, timeout=timeout) + retry_do_query = getattr(self, "_retry_do_query", None) + if retry_do_query is not None: + if job_retry is DEFAULT_JOB_RETRY: + job_retry = self._job_retry + else: + if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: + raise TypeError( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ) + + first = True + + def do_get_result(): + nonlocal first + + if first: + first = False + else: + # Note that we won't get here if retry_do_query is + # None, because we won't use a retry. + + # The orinal job is failed. Create a new one. + job = retry_do_query() + + # If it's already failed, we might as well stop: + if job.done() and job.exception() is not None: + raise job.exception() + + # Become the new job: + self.__dict__.clear() + self.__dict__.update(job.__dict__) + + # This shouldn't be necessary, because once we have a good + # job, it should stay good,and we shouldn't have to retry. + # But let's be paranoid. :) + self._retry_do_query = retry_do_query + self._job_retry = job_retry + + super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) + + if retry_do_query is not None and job_retry is not None: + do_get_result = job_retry(do_get_result) + + do_get_result() - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index bab28aacb..e9286055c 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -32,6 +32,8 @@ auth_exceptions.TransportError, ) +_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds + def _should_retry(exc): """Predicate for determining when to retry. @@ -56,3 +58,21 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +job_retry_reasons = "rateLimitExceeded", "backendError" + + +def _job_should_retry(exc): + if not hasattr(exc, "errors") or len(exc.errors) == 0: + return False + + reason = exc.errors[0]["reason"] + return reason in job_retry_reasons + + +DEFAULT_JOB_RETRY = retry.Retry( + predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE +) +""" +The default job retry object. +""" diff --git a/tests/system/test_job_retry.py b/tests/system/test_job_retry.py new file mode 100644 index 000000000..520545493 --- /dev/null +++ b/tests/system/test_job_retry.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import threading +import time + +import google.api_core.exceptions +import google.cloud.bigquery +import pytest + + +def thread(func): + thread = threading.Thread(target=func, daemon=True) + thread.start() + return thread + + +@pytest.mark.parametrize("job_retry_on_query", [True, False]) +def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): + """ + Test job_retry + + See: https://github.com/googleapis/python-bigquery/issues/539 + """ + from google.api_core import exceptions + from google.api_core.retry import if_exception_type, Retry + + table_name = f"{dataset_id}.t539" + + # Without a custom retry, we fail: + with pytest.raises(google.api_core.exceptions.NotFound): + bigquery_client.query(f"select count(*) from {table_name}").result() + + retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) + + job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} + job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) + job_id = job.job_id + + # We can already know that the job failed, but we're not supposed + # to find out until we call result, which is where retry happend + assert job.done() + assert job.exception() is not None + + @thread + def create_table(): + time.sleep(1) # Give the first retry attempt time to fail. + with contextlib.closing(google.cloud.bigquery.Client()) as client: + client.query(f"create table {table_name} (id int64)").result() + + job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) + [[count]] = list(job.result(**job_retry)) + assert count == 0 + + # The job was retried, and thus got a new job id + assert job.job_id != job_id + + # Make sure we don't leave a thread behind: + create_table.join() + bigquery_client.query(f"drop table {table_name}").result() diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py new file mode 100644 index 000000000..b2095d2f2 --- /dev/null +++ b/tests/unit/test_job_retry.py @@ -0,0 +1,247 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re + +import mock +import pytest + +import google.api_core.exceptions +import google.api_core.retry + +from .helpers import make_connection + + +# With job_retry_on_query, we're testing 4 scenarios: +# - No `job_retry` passed, retry on default rateLimitExceeded. +# - Pass NotFound retry to `query`. +# - Pass NotFound retry to `result`. +# - Pass BadRequest retry to query, with the value passed to `result` overriding. +@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) +@mock.patch("time.sleep") +def test_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + + retry_notfound = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound + ) + ) + retry_badrequest = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest + ) + ) + + if job_retry_on_query is None: + reason = "rateLimitExceeded" + else: + reason = "notFound" + + err = dict(reason=reason) + responses = [ + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=retry_notfound) + elif job_retry_on_query == "Both": + # This will be overridden in `result` + job_retry = dict(job_retry=retry_badrequest) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = ( + dict(job_retry=retry_notfound) + if job_retry_on_query in ("Result", "Both") + else {} + ) + result = job.result(**job_retry) + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # The job adjusts it's job id based on the id of the last attempt. + assert job.job_id != orig_job_id + assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + + # We had to sleep three times + assert len(sleep.mock_calls) == 3 + + # Sleeps are random, however they're more than 0 + assert min(c[1][0] for c in sleep.mock_calls) > 0 + + # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial + # The default multiplier is 2 + assert max(c[1][0] for c in sleep.mock_calls) <= 8 + + # We can ask for the result again: + responses = [ + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + orig_job_id = job.job_id + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # We wouldn't (and didn't) fail, because we're dealing with a successful job. + # So the job id hasn't changed. + assert job.job_id == orig_job_id + + +# With job_retry_on_query, we're testing 4 scenarios: +# - Pass None retry to `query`. +# - Pass None retry to `result`. +@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) +@mock.patch("time.sleep") +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + err = dict(reason="rateLimitExceeded") + responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3 + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + response["jobReference"] = data["jobReference"] + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=None) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} + with pytest.raises(google.api_core.exceptions.Forbidden): + job.result(**job_retry) + + assert job.job_id == orig_job_id + assert len(sleep.mock_calls) == 0 + + +@mock.patch("google.api_core.retry.datetime_helpers") +@mock.patch("time.sleep") +def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): + """ + If at first you don't succeed, maybe you will later. :) + """ + conn = client._connection = make_connection() + + datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + + err = dict(reason="rateLimitExceeded") + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) + response["jobReference"] = data["jobReference"] + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id + + +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ), + ): + client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) + + +def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): + client._connection = make_connection({}) + job = client.query("select 42", job_id=42) + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ), + ): + job.result(job_retry=google.api_core.retry.Retry()) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 6fb7f93fd..c7c25e036 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -86,3 +86,27 @@ def test_w_unstructured_bad_gateway(self): exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) + + +def test_DEFAULT_JOB_RETRY_predicate(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="rateLimitExceeded")]) + ) + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) + + +def test_DEFAULT_JOB_RETRY_deadline(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + + assert DEFAULT_JOB_RETRY._deadline == 600 From ad9c8026f0e667f13dd754279f9dc40d06f4fa78 Mon Sep 17 00:00:00 2001 From: Grimmer Date: Thu, 12 Aug 2021 03:23:48 +0800 Subject: [PATCH 322/341] fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8142c59cd..cbac82548 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2762,7 +2762,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 535685511..671dd8da1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_unicode_emoji_data_case(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + + emoji = "\U0001F3E6" + json_row = {"emoji": emoji} + json_rows = [json_row] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=None, + ) + + sent_data_file = load_table_from_file.mock_calls[0][1][1] + + # make sure json_row's unicode characters are only encoded one time + expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}' + assert sent_data_file.getvalue() == expected_bytes + # Low-level tests @classmethod From cf6f0e923d385817c9aff447255ecfa4b9b4c72d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 19:46:11 +0000 Subject: [PATCH 323/341] chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 15 +++++++++++++++ docs/conf.py | 1 - google/cloud/bigquery/version.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 856f1ecd1..83b409015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) + + +### Features + +* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) +* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) +* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) +* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) + + +### Bug Fixes + +* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) + ### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index df992a051..84f6b4643 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.3" +__version__ = "2.24.0" From c44d45bc0481aeef2e39ba3392666125bdd2715d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Aug 2021 12:15:45 +0200 Subject: [PATCH 324/341] chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d3e599101..dfee339d4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1545ed96e..264899dff 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From e3704c3494b90112cb30b091bcacb443bf148383 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Aug 2021 02:00:19 -0500 Subject: [PATCH 325/341] test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests --- tests/data/scalars.jsonl | 4 +- tests/data/scalars_extreme.jsonl | 10 +-- tests/data/scalars_schema.json | 54 +++++++++------ tests/system/test_arrow.py | 36 ++++++++-- tests/system/test_client.py | 48 ------------- tests/system/test_list_rows.py | 112 +++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 83 deletions(-) create mode 100644 tests/system/test_list_rows.py diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl index 4419a6e9a..e06139e5c 100644 --- a/tests/data/scalars.jsonl +++ b/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl index ceccd8dbc..d0a33fdba 100644 --- a/tests/data/scalars_extreme.jsonl +++ b/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json index 00bd150fd..676d37d56 100644 --- a/tests/data/scalars_schema.json +++ b/tests/data/scalars_schema.json @@ -1,33 +1,33 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", + "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +36,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f97488e39..12f7af9cb 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,8 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + + pyarrow = pytest.importorskip( "pyarrow", minversion="3.0.0" ) # Needs decimal256 for BIGNUMERIC columns. @@ -31,17 +37,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f540611a6..06ef40126 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py new file mode 100644 index 000000000..70388059e --- /dev/null +++ b/tests/system/test_list_rows.py @@ -0,0 +1,112 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") + assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None From cd21df1716e9ab163c779a716d94a850a6b2d253 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:17:31 -0400 Subject: [PATCH 326/341] chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + samples/geography/noxfile.py | 6 +++--- samples/snippets/noxfile.py | 6 +++--- scripts/readme-gen/templates/install_deps.tmpl.rst | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 649877dc4..a9fcd07cc 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8..275d64989 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash From 2cb3563ee863edef7eaf5d04d739bcfe7bc6438e Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 13 Aug 2021 12:54:09 -0600 Subject: [PATCH 327/341] fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast --- docs/snippets.py | 5 +- google/cloud/bigquery/_pandas_helpers.py | 17 ++---- google/cloud/bigquery/table.py | 3 +- samples/client_query_w_timestamp_params.py | 3 +- setup.py | 4 +- testing/constraints-3.6.txt | 2 +- tests/system/test_client.py | 14 ++--- tests/system/test_pandas.py | 42 ++++++------- tests/unit/job/test_base.py | 4 +- tests/unit/test__pandas_helpers.py | 69 ++++++++-------------- tests/unit/test_client.py | 21 ++++--- tests/unit/test_table.py | 17 ++---- 12 files changed, 78 insertions(+), 123 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..c62001fc0 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete): # [START bigquery_update_table_expiration] import datetime - import pytz # from google.cloud import bigquery # client = bigquery.Client() @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete): assert table.expires is None # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) table.expires = expiration table = client.update_table(table, ["expires"]) # API request diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7..f49980645 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,8 +20,6 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER @@ -110,6 +108,7 @@ def pyarrow_timestamp(): # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, "BYTES": pyarrow.binary, @@ -146,23 +145,15 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d23885ebf..62f888001 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import datetime import functools import operator -import pytz import typing from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings @@ -1969,7 +1968,7 @@ def to_dataframe( # Pandas, we set the timestamp_as_object parameter to True, if necessary. types_to_check = { pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), + pyarrow.timestamp("us", tz=datetime.timezone.utc), } for column in record_batch: diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index ca8eec0b5..41a27770e 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -18,7 +18,6 @@ def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime - import pytz from google.cloud import bigquery # Construct a BigQuery client object. @@ -30,7 +29,7 @@ def client_query_w_timestamp_params(): bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc), ) ] ) diff --git a/setup.py b/setup.py index e9deaf117..a1b3b61a0 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 6.0dev", + "pyarrow >= 3.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 06ef40126..4250111b4 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,7 +30,6 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers try: @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 821b375e1..371dcea71 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -24,10 +24,8 @@ import google.api_core.retry import pkg_resources import pytest -import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ( "dt_col", @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc ), ], ), diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 0ac1d05b5..c3f7854e3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -295,11 +295,11 @@ def test_user_email(self): @staticmethod def _datetime_and_millis(): import datetime - import pytz from google.cloud._helpers import _millis now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + microsecond=123000, + tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) return now, _millis(now) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..b9cb56572 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -37,12 +37,10 @@ # used in test parameterization. pyarrow = mock.Mock() import pytest -import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage @@ -60,11 +58,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -153,9 +146,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +225,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -312,6 +302,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +312,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +323,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +332,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -363,6 +350,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +360,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +371,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +380,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -441,7 +425,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,17 +433,18 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( "TIMESTAMP", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ], ), ( @@ -938,6 +923,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +932,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,11 +941,15 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), ], "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], @@ -971,11 +959,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -1210,11 +1193,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,13 +1216,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 671dd8da1..ca0dca975 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -30,7 +30,6 @@ import packaging import requests import pytest -import pytz import pkg_resources try: @@ -5018,16 +5017,24 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], [1.25, 2.5], ), { "score": 13, "times": [ - datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], "distances": [-1.25, -2.5], }, @@ -6974,7 +6981,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ] ) @@ -7306,7 +7313,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a5badc66c..50d573345 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -20,9 +20,7 @@ import warnings import mock -import pkg_resources import pytest -import pytz import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -44,11 +42,8 @@ try: import pyarrow import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -58,9 +53,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -914,7 +906,9 @@ def test_mview_last_refresh_time(self): } self.assertEqual( table.mview_last_refresh_time, - datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + datetime.datetime( + 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc + ), ) def test_mview_enable_refresh(self): @@ -2878,10 +2872,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc - + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) From 936660bdf48eb65844b39bc567146968895225d7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 19 Aug 2021 10:01:53 -0400 Subject: [PATCH 328/341] chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83b409015..5a3e74fd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 84f6b4643..96f84438a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.24.1" From 5c5b4b852e8818f885014bca3769c4b7c13183cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 Aug 2021 17:29:00 +0200 Subject: [PATCH 329/341] chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dfee339d4..ac804c81c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 264899dff..484e10516 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 16f65e6ae15979217ceea6c6d398c9057a363a13 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 10:29:04 -0400 Subject: [PATCH 330/341] feat: Support using GeoPandas for GEOGRAPHY columns (#848) --- docs/conf.py | 2 + docs/usage/pandas.rst | 15 ++ google/cloud/bigquery/_pandas_helpers.py | 71 ++++++- google/cloud/bigquery/job/query.py | 119 ++++++++++- google/cloud/bigquery/table.py | 196 +++++++++++++++++- owlbot.py | 4 + samples/geography/requirements.txt | 44 ++++ samples/geography/to_geodataframe.py | 32 +++ samples/geography/to_geodataframe_test.py | 25 +++ setup.py | 1 + testing/constraints-3.6.txt | 4 +- tests/system/test_client.py | 3 - tests/system/test_pandas.py | 143 +++++++++++++ tests/unit/job/test_query_pandas.py | 130 ++++++++++-- tests/unit/test__pandas_helpers.py | 100 +++++++++ tests/unit/test_table.py | 242 ++++++++++++++++++++++ 16 files changed, 1102 insertions(+), 29 deletions(-) create mode 100644 samples/geography/to_geodataframe.py create mode 100644 samples/geography/to_geodataframe_test.py diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..59a2d8fb3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 9db98dfbb..92eee67cf 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] + +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index f49980645..ab58b1729 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -24,6 +24,36 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy + +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() try: import pyarrow @@ -69,6 +99,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -193,14 +224,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -225,7 +258,24 @@ def bq_to_arrow_schema(bq_schema): def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -279,6 +329,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -319,6 +375,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -450,11 +513,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 3ab47b0f9..0cb4798be 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1487,6 +1488,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1538,13 +1540,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1553,6 +1569,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 62f888001..609c0b57e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,6 +29,20 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + try: import pyarrow except ImportError: # pragma: NO COVER @@ -52,6 +66,7 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas import pyarrow from google.cloud import bigquery_storage @@ -60,6 +75,14 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _NO_PYARROW_ERROR = ( "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." @@ -1878,6 +1901,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1933,6 +1957,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1941,13 +1972,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1988,8 +2024,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2042,6 +2206,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2059,6 +2224,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/owlbot.py b/owlbot.py index 09845480a..ea9904cdb 100644 --- a/owlbot.py +++ b/owlbot.py @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "geopandas": "https://geopandas.org/", + } ) # BigQuery has a custom multiprocessing note diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ac804c81c..7a76b4033 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,48 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 +geopandas==0.9.0 +google-api-core==1.31.2 +google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 +google-cloud-core==1.7.2 +google-crc32c==1.1.2 +google-resumable-media==1.3.3 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.6.4 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5 +packaging==21.0 +pandas==1.1.5 +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1 +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.0 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py new file mode 100644 index 000000000..fa8073fef --- /dev/null +++ b/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000..7a2ba6937 --- /dev/null +++ b/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/setup.py b/setup.py index a1b3b61a0..e7515493d 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 3.0.0, < 6.0dev", ], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index ce012f0d7..be1a992fa 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -13,10 +14,11 @@ grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 -pandas==0.23.0 +pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 4250111b4..9da45ee6e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2360,9 +2360,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 371dcea71..836f93210 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -798,3 +798,146 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe.index) == 100 + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..b5af90c0b 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -23,6 +23,14 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -425,38 +433,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -868,3 +879,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index b9cb56572..a9b0ae21f 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -36,6 +36,11 @@ # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core @@ -584,6 +589,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( @@ -1158,6 +1217,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): @@ -1554,3 +1635,22 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 50d573345..1ce930ee4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -39,6 +40,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: import pyarrow import pyarrow.types @@ -1842,6 +1848,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1879,6 +1906,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3170,6 +3207,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3927,6 +3976,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From aa4876e226aa54a43d3e20d401675403a41d71f8 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 09:33:04 -0600 Subject: [PATCH 331/341] test: Add test of datetime and time pandas load (#895) --- tests/system/test_pandas.py | 64 ++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 836f93210..93ce23481 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -279,8 +279,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -288,7 +286,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -313,14 +311,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -800,6 +798,50 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( From f319d2596e7146ef355053a2a178d2e6a921e651 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 24 Aug 2021 15:36:00 -0600 Subject: [PATCH 332/341] chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3e74fd0..7a5727ee7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + ### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 96f84438a..f882cac3a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.1" +__version__ = "2.25.0" From fbbf72cd8d9629594b32ae981f7b6f4815fc3647 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 15:44:47 +0200 Subject: [PATCH 333/341] chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a76b4033..82a45e3e8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,8 @@ importlib-metadata==4.6.4 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 -numpy==1.19.5 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5 proto-plus==1.19.0 From 72a52f0253125a45e3162c5a32c0dbfe9e127466 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 16:28:49 +0200 Subject: [PATCH 334/341] chore(deps): update dependency google-cloud-core to v2 (#904) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 82a45e3e8..853306d71 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ google-api-core==1.31.2 google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 -google-cloud-core==1.7.2 +google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==1.3.3 googleapis-common-protos==1.53.0 From 1cb3e55253e824e3a1da5201f6ec09065fb6b627 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 25 Aug 2021 16:52:09 +0200 Subject: [PATCH 335/341] fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/magics/magics.py | 12 +++++++++--- tests/unit/test_magics.py | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..d368bbeaa 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..88c92a070 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -660,7 +660,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -703,7 +705,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -757,7 +761,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -929,7 +938,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1246,7 +1255,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) From b508809c0f887575274309a463e763c56ddd017d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 10:12:25 -0500 Subject: [PATCH 336/341] fix: populate default `timeout` and retry after client-side timeout (#896) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 --- google/cloud/bigquery/client.py | 123 +++++++-------- google/cloud/bigquery/retry.py | 8 + noxfile.py | 4 - tests/unit/test_client.py | 242 +++++++++++++++++------------- tests/unit/test_create_dataset.py | 19 +-- tests/unit/test_delete_dataset.py | 7 +- tests/unit/test_list_datasets.py | 11 +- tests/unit/test_list_jobs.py | 19 +-- tests/unit/test_list_models.py | 12 +- tests/unit/test_list_projects.py | 11 +- tests/unit/test_list_routines.py | 12 +- tests/unit/test_list_tables.py | 16 +- tests/unit/test_magics.py | 5 +- tests/unit/test_retry.py | 12 ++ 14 files changed, 282 insertions(+), 219 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cbac82548..023346ffa 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -76,17 +76,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -245,7 +252,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -292,7 +299,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -358,7 +365,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -549,7 +556,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -624,7 +631,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -679,7 +686,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -751,7 +758,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -795,7 +802,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -825,7 +832,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -858,7 +865,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -883,7 +890,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -926,7 +933,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -970,7 +977,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1012,7 +1019,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1082,7 +1089,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1146,7 +1153,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1220,7 +1227,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1286,7 +1293,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1363,7 +1370,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1440,7 +1447,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1515,7 +1522,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1574,7 +1581,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1624,12 +1631,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1637,26 +1644,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1697,7 +1698,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1751,7 +1752,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1804,7 +1805,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1893,7 +1894,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1990,7 +1991,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2064,7 +2065,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2141,7 +2142,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2256,7 +2257,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2340,7 +2341,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2443,7 +2444,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2678,7 +2679,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2961,7 +2962,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3064,7 +3065,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3162,7 +3163,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3444,7 +3445,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3579,7 +3580,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3629,7 +3630,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3741,7 +3742,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index e9286055c..830582322 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..9077924e9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca0dca975..e9204f1de 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -56,6 +56,7 @@ import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT try: from google.cloud import bigquery_storage @@ -367,7 +368,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -428,7 +429,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -771,7 +774,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -807,7 +810,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -843,11 +846,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -923,7 +928,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -964,7 +969,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1000,7 +1005,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -1081,7 +1086,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1136,7 +1141,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1175,7 +1180,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1209,7 +1214,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1241,7 +1246,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1276,7 +1281,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1319,9 +1324,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1394,7 +1399,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1503,7 +1508,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1846,7 +1851,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -2136,7 +2141,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2171,7 +2176,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2269,7 +2274,7 @@ def test_update_table_w_query(self): "schema": schema_resource, }, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2398,7 +2403,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2412,7 +2417,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2437,7 +2442,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2492,7 +2497,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2513,7 +2520,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2567,7 +2576,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2589,7 +2598,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2653,7 +2662,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2675,7 +2686,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): from google.cloud.bigquery import _helpers @@ -2697,7 +2710,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2846,7 +2859,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2886,7 +2899,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2904,7 +2917,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2973,7 +2986,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2992,7 +3005,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -3028,7 +3041,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3154,7 +3167,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3198,7 +3211,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3486,7 +3499,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3548,7 +3561,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3598,7 +3611,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3691,7 +3704,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3797,7 +3810,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3841,7 +3854,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3884,7 +3897,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3929,7 +3942,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4099,7 +4112,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4152,7 +4165,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4208,7 +4221,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4252,7 +4268,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4304,7 +4323,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4389,7 +4411,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4430,7 +4455,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4475,7 +4503,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4546,7 +4574,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4602,7 +4630,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4794,7 +4822,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4862,7 +4890,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4907,7 +4935,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4984,7 +5012,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -5085,7 +5113,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5151,7 +5179,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5185,7 +5213,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5269,7 +5297,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5461,7 +5489,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5514,7 +5545,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5594,7 +5628,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5624,7 +5658,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5651,7 +5685,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5704,7 +5738,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5743,7 +5777,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5938,7 +5972,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5948,7 +5982,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6099,7 +6133,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6169,7 +6203,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6224,7 +6258,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6234,7 +6268,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6407,7 +6441,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6440,7 +6474,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6474,7 +6508,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6536,7 +6570,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6565,7 +6599,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6590,7 +6624,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6627,7 +6661,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6750,7 +6784,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6808,7 +6842,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6862,7 +6896,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6952,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7012,7 +7046,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7073,7 +7107,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7120,7 +7154,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7162,7 +7196,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7210,7 +7244,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7272,7 +7306,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7347,7 +7381,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7442,7 +7476,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) assert warned # there should be at least one warning @@ -7592,7 +7626,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7658,7 +7692,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7696,7 +7730,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7749,7 +7783,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7809,7 +7843,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index d07aaed4f..67b21225d 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index 3a65e031c..b48beb147 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 7793a7ba6..6f0b55c5e 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index f348be724..1fb40d446 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 4ede9a7dd..b14852338 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index a88540dd5..190612b44 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 069966542..80e62d6bd 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 45d15bed3..8360f6605 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 88c92a070..36cbf4993 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -185,7 +186,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -249,7 +250,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index c7c25e036..e0a992f78 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 4fc7c693283e94b44d388f8c7991a1ad78fcde45 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:34:24 +0200 Subject: [PATCH 337/341] chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 853306d71..d810e1241 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==1.31.2 google-auth==1.35.0 -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 484e10516..07760b666 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 121c2c2005225fae8a89ed231026e7ac64625532 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:56:26 +0200 Subject: [PATCH 338/341] chore(deps): update dependency pandas to v1.3.2 (#900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 3 ++- samples/snippets/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d810e1241..b5fe247cb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -27,7 +27,8 @@ mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" numpy==1.21.2; python_version > "3.6" packaging==21.0 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 protobuf==3.17.3 pyarrow==5.0.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 07760b666..d75c747fb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,6 +7,6 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 From a3a85dac90211599b2260da0d514d19647085575 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 11:38:24 -0500 Subject: [PATCH 339/341] chore: group all renovate PRs together (#911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- owlbot.py | 28 +++++++++++++++++----------- renovate.json | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/owlbot.py b/owlbot.py index ea9904cdb..8664b658a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -98,9 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "pandas": "http://pandas.pydata.org/pandas-docs/dev", "geopandas": "https://geopandas.org/", - } + }, ) # BigQuery has a custom multiprocessing note @@ -113,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -125,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -140,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -160,7 +165,8 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index c04895563..713c60bb4 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 109a5365d7c1e388a49809e653a51c1d77ddb0a2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 25 Aug 2021 17:34:14 +0000 Subject: [PATCH 340/341] chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a5727ee7..8a21df6fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + ## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index f882cac3a..21cbec9fe 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.0" +__version__ = "2.25.1" From f55864ec3d6381f2b31598428a64822fdc73cb56 Mon Sep 17 00:00:00 2001 From: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> Date: Thu, 26 Aug 2021 08:40:47 +0530 Subject: [PATCH 341/341] docs: update docstring for bigquery_create_routine sample (#883) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- samples/create_routine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/create_routine.py b/samples/create_routine.py index 012c7927a..1cb4a80b4 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine(