From bf44e7b67d2de41c13053a4550484b9ea049db3e Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 29 Jan 2021 00:34:08 -0800 Subject: [PATCH 001/230] fix: don't try to close closed cursors (#498) --- google/cloud/bigquery/dbapi/connection.py | 3 ++- tests/unit/test_dbapi_connection.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 300c77dc9..459fc82aa 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -76,7 +76,8 @@ def close(self): self._bqstorage_client._transport.grpc_channel.close() for cursor_ in self._cursors_created: - cursor_.close() + if not cursor_._closed: + cursor_.close() def commit(self): """No-op, but for consistency raise an error if connection is closed.""" diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index edec559b2..74da318bf 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -176,6 +176,22 @@ def test_close_closes_all_created_cursors(self): self.assertTrue(cursor_1._closed) self.assertTrue(cursor_2._closed) + def test_close_closes_only_open_created_cursors(self): + connection = self._make_one(client=self._mock_client()) + cursor_1 = connection.cursor() + cursor_2 = connection.cursor() + self.assertFalse(cursor_1._closed) + self.assertFalse(cursor_2._closed) + + cursor_1.close() + self.assertTrue(cursor_1._closed) + cursor_1.close = mock.MagicMock() + + connection.close() + + self.assertFalse(cursor_1.close.called) + self.assertTrue(cursor_2._closed) + def test_does_not_keep_cursor_instances_alive(self): from google.cloud.bigquery.dbapi import Cursor From 2299cc648d5a8fa55fb08ddb58bae4675f0a13aa Mon Sep 17 00:00:00 2001 From: Justin Beckwith Date: Fri, 29 Jan 2021 08:09:02 -0800 Subject: [PATCH 002/230] build: migrate to flakybot (#500) --- .kokoro/test-samples.sh | 8 ++++---- .kokoro/trampoline_v2.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index c5653a81d..3ce8994cb 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -87,11 +87,11 @@ for file in samples/**/requirements.txt; do python3.6 -m nox -s "$RUN_TESTS_SESSION" EXIT=$? - # If this is a periodic build, send the test log to the Build Cop Bot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop. + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop - $KOKORO_GFILE_DIR/linux_amd64/buildcop + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot fi if [[ $EXIT -ne 0 ]]; then diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh index 719bcd5ba..4af6cdc26 100755 --- a/.kokoro/trampoline_v2.sh +++ b/.kokoro/trampoline_v2.sh @@ -159,7 +159,7 @@ if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then "KOKORO_GITHUB_COMMIT" "KOKORO_GITHUB_PULL_REQUEST_NUMBER" "KOKORO_GITHUB_PULL_REQUEST_COMMIT" - # For Build Cop Bot + # For FlakyBot "KOKORO_GITHUB_COMMIT_URL" "KOKORO_GITHUB_PULL_REQUEST_URL" ) From 475a5c9e96886bfbd2047c1a59ab5f9ab7b4998c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:14 +0100 Subject: [PATCH 003/230] chore(deps): update dependency pyarrow to v3 (#490) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 599b6d52f..979506199 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,5 +7,5 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.3 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==2.0.0 +pyarrow==3.0.0 pytz==2020.5 From 5caa14fd8562fde40ba79eab48db869db7dfdcf5 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:37:49 +0100 Subject: [PATCH 004/230] chore(deps): update dependency google-cloud-bigquery to v2.7.0 (#491) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 338cf2e89..6f9306af2 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 979506199..bc3985ebd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.6.2 +google-cloud-bigquery==2.7.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From 1cf9f4f360a80837889e3f12138677e72eb78881 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:38:33 +0100 Subject: [PATCH 005/230] chore(deps): update dependency matplotlib to v3.3.4 (#495) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index bc3985ebd..00d28fa0b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,7 @@ google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.3 +matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From c090323c3cbb973f0ba3c0c332a57d0612825b38 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 1 Feb 2021 10:41:52 -0800 Subject: [PATCH 006/230] chore: update shared templates (#468) * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * chore: add config / docs for 'pre-commit' support Source-Author: Tres Seaver Source-Date: Tue Dec 1 16:01:20 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: 32af6da519a6b042e3da62008e2a75e991efb6b4 Source-Link: https://github.com/googleapis/synthtool/commit/32af6da519a6b042e3da62008e2a75e991efb6b4 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.3.0 Source-Author: WhiteSource Renovate Source-Date: Wed Dec 2 17:18:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: 69629b64b83c6421d616be2b8e11795738ec8a6c Source-Link: https://github.com/googleapis/synthtool/commit/69629b64b83c6421d616be2b8e11795738ec8a6c * chore: update noxfile.py.j2 * Update noxfile.py.j2 add changes from @glasnt to the template template to ensure that enforcing type hinting doesn't fail for repos with the sample noxfile (aka all samples repos) See https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4869/files for context * fix typo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Thu Dec 3 13:44:30 2020 -0800 Source-Repo: googleapis/synthtool Source-Sha: 18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 Source-Link: https://github.com/googleapis/synthtool/commit/18c5dbdb4ac8cf75d4d8174e7b4558f48e76f8a1 * chore(deps): update precommit hook pre-commit/pre-commit-hooks to v3.4.0 Co-authored-by: Tres Seaver Source-Author: WhiteSource Renovate Source-Date: Wed Dec 16 18:13:24 2020 +0100 Source-Repo: googleapis/synthtool Source-Sha: aa255b15d52b6d8950cca48cfdf58f7d27a60c8a Source-Link: https://github.com/googleapis/synthtool/commit/aa255b15d52b6d8950cca48cfdf58f7d27a60c8a * docs(python): document adding Python 3.9 support, dropping 3.5 support Closes #787 Source-Author: Tres Seaver Source-Date: Thu Dec 17 16:08:02 2020 -0500 Source-Repo: googleapis/synthtool Source-Sha: b670a77a454f415d247907908e8ee7943e06d718 Source-Link: https://github.com/googleapis/synthtool/commit/b670a77a454f415d247907908e8ee7943e06d718 * chore: exclude `.nox` directories from linting The samples tests create `.nox` directories with all dependencies installed. These directories should be excluded from linting. I've tested this change locally, and it significantly speeds up linting on my machine. Source-Author: Tim Swast Source-Date: Tue Dec 22 13:04:04 2020 -0600 Source-Repo: googleapis/synthtool Source-Sha: 373861061648b5fe5e0ac4f8a38b32d639ee93e4 Source-Link: https://github.com/googleapis/synthtool/commit/373861061648b5fe5e0ac4f8a38b32d639ee93e4 * chore(python): fix column sizing issue in docs Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 11:58:32 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: f15b57ccfd71106c2299e9b89835fe6e55015662 Source-Link: https://github.com/googleapis/synthtool/commit/f15b57ccfd71106c2299e9b89835fe6e55015662 * chore(python): use 'http' in LICENSE Co-authored-by: Tim Swast Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Thu Jan 7 13:05:12 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 41a4e56982620d3edcf110d76f4fcdfdec471ac8 Source-Link: https://github.com/googleapis/synthtool/commit/41a4e56982620d3edcf110d76f4fcdfdec471ac8 * chore(python): skip docfx in main presubmit * chore(python): skip docfx in main presubmit * fix: properly template the repo name Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Jan 8 10:32:13 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 Source-Link: https://github.com/googleapis/synthtool/commit/fb53b6fb373b7c3edf4e55f3e8036bc6d73fa483 * chore: add missing quotation mark Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Jan 11 09:43:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 16ec872dd898d7de6e1822badfac32484b5d9031 Source-Link: https://github.com/googleapis/synthtool/commit/16ec872dd898d7de6e1822badfac32484b5d9031 --- .flake8 | 1 + .kokoro/docs/docs-presubmit.cfg | 2 +- .pre-commit-config.yaml | 17 ++++++ CONTRIBUTING.rst | 21 +++++--- LICENSE | 7 +-- docs/_static/custom.css | 7 ++- samples/geography/noxfile.py | 19 ++++--- samples/snippets/noxfile.py | 19 ++++--- synth.metadata | 92 +++++++++++++++++++++++++++++++-- 9 files changed, 154 insertions(+), 31 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.flake8 b/.flake8 index ed9316381..29227d4cf 100644 --- a/.flake8 +++ b/.flake8 @@ -26,6 +26,7 @@ exclude = *_pb2.py # Standard linting exemptions. + **/.nox/** __pycache__, .git, *.pyc, diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg index 5c216b4bc..08adb2e28 100644 --- a/.kokoro/docs/docs-presubmit.cfg +++ b/.kokoro/docs/docs-presubmit.cfg @@ -25,4 +25,4 @@ env_vars: { env_vars: { key: "NOX_SESSION" value: "docs docfx" -} \ No newline at end of file +} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..a9024b15d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.4.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml +- repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b3b802b49..15bcd2e28 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -21,8 +21,8 @@ In order to add a feature: - The feature must be documented in both the API and narrative documentation. -- The feature must work fully on the following CPython versions: 2.7, - 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows. +- The feature must work fully on the following CPython versions: + 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -111,6 +111,16 @@ Coding Style should point to the official ``googleapis`` checkout and the the branch should be the main branch on that remote (``master``). +- This repository contains configuration for the + `pre-commit `__ tool, which automates checking + our linters during a commit. If you have it installed on your ``$PATH``, + you can enable enforcing those checks via: + +.. code-block:: bash + + $ pre-commit install + pre-commit installed at .git/hooks/pre-commit + Exceptions to PEP8: - Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for @@ -192,25 +202,24 @@ Supported Python Versions We support: -- `Python 3.5`_ - `Python 3.6`_ - `Python 3.7`_ - `Python 3.8`_ +- `Python 3.9`_ -.. _Python 3.5: https://docs.python.org/3.5/ .. _Python 3.6: https://docs.python.org/3.6/ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ +.. _Python 3.9: https://docs.python.org/3.9/ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020. We also explicitly decided to support Python 3 beginning with version -3.5. Reasons for this include: +3.6. Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ diff --git a/LICENSE b/LICENSE index a8ee855de..d64569567 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,7 @@ - Apache License + + Apache License Version 2.0, January 2004 - https://www.apache.org/licenses/ + http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION @@ -192,7 +193,7 @@ you may not use this file except in compliance with the License. You may obtain a copy of the License at - https://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, diff --git a/docs/_static/custom.css b/docs/_static/custom.css index 0abaf229f..bcd37bbd3 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,4 +1,9 @@ div#python2-eol { border-color: red; border-width: medium; -} \ No newline at end of file +} + +/* Ensure minimum width for 'Parameters' / 'Returns' column */ +dl.field-list > dt { + min-width: 100px +} diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index ab2c49227..bbd25fcdb 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index ab2c49227..bbd25fcdb 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -65,7 +66,7 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} @@ -94,7 +95,7 @@ def get_pytest_env_vars(): # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -132,7 +133,7 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): +def lint(session: nox.sessions.Session) -> None: if not TEST_CONFIG["enforce_type_hints"]: session.install("flake8", "flake8-import-order") else: @@ -153,7 +154,7 @@ def lint(session): @nox.session -def blacken(session): +def blacken(session: nox.sessions.Session) -> None: session.install("black") python_files = [path for path in os.listdir(".") if path.endswith(".py")] @@ -168,7 +169,9 @@ def blacken(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests( + session: nox.sessions.Session, post_install: Callable = None +) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): session.install("-r", "requirements.txt") @@ -194,7 +197,7 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) @@ -209,7 +212,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -232,7 +235,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/synth.metadata b/synth.metadata index 6b7854860..eb9009391 100644 --- a/synth.metadata +++ b/synth.metadata @@ -3,8 +3,8 @@ { "git": { "name": ".", - "remote": "git@github.com:tswast/python-bigquery.git", - "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6" + "remote": "https://github.com/googleapis/python-bigquery.git", + "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5" + "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" } } ], @@ -40,5 +40,89 @@ "generator": "bazel" } } + ], + "generatedFiles": [ + ".flake8", + ".github/CONTRIBUTING.md", + ".github/ISSUE_TEMPLATE/bug_report.md", + ".github/ISSUE_TEMPLATE/feature_request.md", + ".github/ISSUE_TEMPLATE/support_request.md", + ".github/PULL_REQUEST_TEMPLATE.md", + ".github/release-please.yml", + ".github/snippet-bot.yml", + ".gitignore", + ".kokoro/build.sh", + ".kokoro/continuous/common.cfg", + ".kokoro/continuous/continuous.cfg", + ".kokoro/docker/docs/Dockerfile", + ".kokoro/docker/docs/fetch_gpg_keys.sh", + ".kokoro/docs/common.cfg", + ".kokoro/docs/docs-presubmit.cfg", + ".kokoro/docs/docs.cfg", + ".kokoro/populate-secrets.sh", + ".kokoro/presubmit/common.cfg", + ".kokoro/presubmit/system-3.8.cfg", + ".kokoro/publish-docs.sh", + ".kokoro/release.sh", + ".kokoro/release/common.cfg", + ".kokoro/release/release.cfg", + ".kokoro/samples/lint/common.cfg", + ".kokoro/samples/lint/continuous.cfg", + ".kokoro/samples/lint/periodic.cfg", + ".kokoro/samples/lint/presubmit.cfg", + ".kokoro/samples/python3.6/common.cfg", + ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic.cfg", + ".kokoro/samples/python3.6/presubmit.cfg", + ".kokoro/samples/python3.7/common.cfg", + ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic.cfg", + ".kokoro/samples/python3.7/presubmit.cfg", + ".kokoro/samples/python3.8/common.cfg", + ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic.cfg", + ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples.sh", + ".kokoro/trampoline.sh", + ".kokoro/trampoline_v2.sh", + ".pre-commit-config.yaml", + ".trampolinerc", + "CODE_OF_CONDUCT.md", + "CONTRIBUTING.rst", + "LICENSE", + "MANIFEST.in", + "docs/_static/custom.css", + "docs/_templates/layout.html", + "docs/bigquery_v2/services.rst", + "docs/bigquery_v2/types.rst", + "docs/conf.py", + "google/cloud/bigquery_v2/__init__.py", + "google/cloud/bigquery_v2/proto/encryption_config.proto", + "google/cloud/bigquery_v2/proto/model.proto", + "google/cloud/bigquery_v2/proto/model_reference.proto", + "google/cloud/bigquery_v2/proto/standard_sql.proto", + "google/cloud/bigquery_v2/proto/table_reference.proto", + "google/cloud/bigquery_v2/py.typed", + "google/cloud/bigquery_v2/types/__init__.py", + "google/cloud/bigquery_v2/types/encryption_config.py", + "google/cloud/bigquery_v2/types/model.py", + "google/cloud/bigquery_v2/types/model_reference.py", + "google/cloud/bigquery_v2/types/standard_sql.py", + "google/cloud/bigquery_v2/types/table_reference.py", + "mypy.ini", + "renovate.json", + "samples/AUTHORING_GUIDE.md", + "samples/CONTRIBUTING.md", + "samples/geography/noxfile.py", + "samples/snippets/noxfile.py", + "scripts/decrypt-secrets.sh", + "scripts/readme-gen/readme_gen.py", + "scripts/readme-gen/templates/README.tmpl.rst", + "scripts/readme-gen/templates/auth.tmpl.rst", + "scripts/readme-gen/templates/auth_api_key.tmpl.rst", + "scripts/readme-gen/templates/install_deps.tmpl.rst", + "scripts/readme-gen/templates/install_portaudio.tmpl.rst", + "setup.cfg", + "testing/.gitignore" ] } \ No newline at end of file From 64e1c0581b915e36756ea465936939390da7d818 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Feb 2021 19:52:02 +0100 Subject: [PATCH 007/230] chore(deps): update dependency pytz to v2021 (#502) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pytz](http://pythonhosted.org/pytz) | `==2020.5` -> `==2021.1` | [![age](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/compatibility-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pytz/2021.1/confidence-slim/2020.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 00d28fa0b..7087121b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,4 +8,4 @@ matplotlib==3.3.4 pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 -pytz==2020.5 +pytz==2021.1 From 3138d41b60be1b0419cc6bc456b381801b418089 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 3 Feb 2021 09:41:41 -0800 Subject: [PATCH 008/230] test: add samples Python 3.9 test session (#506) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * changes without context autosynth cannot find the source of changes triggered by earlier changes in this repository, or by version upgrades to tools such as linters. * feat: sync v1beta1 GKE API fix: deprecate SetLocations; use UpdateCluster feat: support for sysctls config in Linux nodes feat: support for node kubelet config controlling CPU manager policy, CFS quota feat: support for Customer Managed Encryption in nodes feat: support for SSDs as ephemeral storage feat: support for node reservation affinity feat: support for Gvisor in nodes fix: deprecate basic auth fields (removed in 1.19 clusters) feat: support for NodeLocalDNS feat: support for ConfigConnector feat: support for the Compute Engine Persistent Disk CSI driver feat: support for KALM feat: support for private cluster VPC peering and master global access feat: support for CloudRun load balancers feat: support using routes for pod IPs feat: support for Shielded Nodes feat: support for release channels feat: support for Workload Identity feat: support for Cluster Telemetry feat: support for Cloud TPU feat: support for receiving upgrade notifications feat: support for Confidential Nodes feat: support for disabling default sNAT feat: support for selecting Kubernetes datapath model feat: support for encrypting etcd databases feat: support for configuration of master components fix: deprecate Operation.cluster_conditions and operation_conditions; use error feat: support updating NodePool locations feat: support for node Surge Upgrades feat: support for specifying Cluster Autoscaling profile. feat: support for Node Auto Provisioning feat: support for specifying node disk size and type fix: deprecated StatusCondition.code; use canonical_code docs: many minor documentation clarifications docs: some output only fields now annotated as such PiperOrigin-RevId: 344443035 Source-Author: Google APIs Source-Date: Thu Nov 26 11:27:06 2020 -0800 Source-Repo: googleapis/googleapis Source-Sha: df4fd38d040c5c8a0869936205bca13fb64b2cff Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff * chore: add 3.9 to noxfile template Since the python-docs-samples noxfile-template doesn't sync with this, I wanted to make sure the noxfile template matched the most recent change [here](https://github.com/GoogleCloudPlatform/python-docs-samples/pull/4968/files) cc @tmatsuo Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Fri Jan 15 17:24:05 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f Source-Link: https://github.com/googleapis/synthtool/commit/56ddc68f36b32341e9f22c2c59b4ce6aa3ba635f * build(python): make `NOX_SESSION` optional I added this accidentally in #889. `NOX_SESSION` should be passed down if it is set but not marked required. Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Tue Jan 19 09:38:04 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: ba960d730416fe05c50547e975ce79fcee52c671 Source-Link: https://github.com/googleapis/synthtool/commit/ba960d730416fe05c50547e975ce79fcee52c671 * chore: Add header checker config to python library synth Now that we have it working in [python-docs-samples](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/.github/header-checker-lint.yml) we should consider adding it to the 🐍 libraries :) Source-Author: Leah E. Cole <6719667+leahecole@users.noreply.github.com> Source-Date: Mon Jan 25 13:24:08 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: 573f7655311b553a937f9123bee17bf78497db95 Source-Link: https://github.com/googleapis/synthtool/commit/573f7655311b553a937f9123bee17bf78497db95 * build: migrate to flakybot Source-Author: Justin Beckwith Source-Date: Thu Jan 28 22:22:38 2021 -0800 Source-Repo: googleapis/synthtool Source-Sha: d1bb9173100f62c0cfc8f3138b62241e7f47ca6a Source-Link: https://github.com/googleapis/synthtool/commit/d1bb9173100f62c0cfc8f3138b62241e7f47ca6a * remove tarball Co-authored-by: Tim Swast --- .github/header-checker-lint.yml | 15 +++++++++++++++ .trampolinerc | 1 - samples/geography/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- synth.metadata | 7 ++++--- 5 files changed, 21 insertions(+), 6 deletions(-) create mode 100644 .github/header-checker-lint.yml diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml new file mode 100644 index 000000000..fc281c05b --- /dev/null +++ b/.github/header-checker-lint.yml @@ -0,0 +1,15 @@ +{"allowedCopyrightHolders": ["Google LLC"], + "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "sourceFileExtensions": [ + "ts", + "js", + "java", + "sh", + "Dockerfile", + "yaml", + "py", + "html", + "txt" + ] +} \ No newline at end of file diff --git a/.trampolinerc b/.trampolinerc index c7d663ae9..383b6ec89 100644 --- a/.trampolinerc +++ b/.trampolinerc @@ -18,7 +18,6 @@ required_envvars+=( "STAGING_BUCKET" "V2_STAGING_BUCKET" - "NOX_SESSION" ) # Add env vars which are passed down into the container here. diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index bbd25fcdb..f2320ea00 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index bbd25fcdb..f2320ea00 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -82,7 +82,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/synth.metadata b/synth.metadata index eb9009391..f6dcca132 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "2788736b80a4c4ac0ae3029aeb28bcefd34f2db7" + "sha": "64e1c0581b915e36756ea465936939390da7d818" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "16ec872dd898d7de6e1822badfac32484b5d9031" + "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" } } ], @@ -48,6 +48,7 @@ ".github/ISSUE_TEMPLATE/feature_request.md", ".github/ISSUE_TEMPLATE/support_request.md", ".github/PULL_REQUEST_TEMPLATE.md", + ".github/header-checker-lint.yml", ".github/release-please.yml", ".github/snippet-bot.yml", ".gitignore", From 1823cadee3acf95c516d0479400e4175349ea199 Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Fri, 5 Feb 2021 07:53:15 -0800 Subject: [PATCH 009/230] feat: add mtls support to client (#492) * feat: add mtls feature --- google/cloud/bigquery/_http.py | 21 +++++++++++++++++++-- google/cloud/bigquery/client.py | 25 +++++++++++++++++++------ tests/system/test_client.py | 6 ++++++ tests/unit/helpers.py | 2 ++ tests/unit/test__http.py | 14 ++++++++++++++ tests/unit/test_client.py | 23 +++++++++++++++++++---- 6 files changed, 79 insertions(+), 12 deletions(-) diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index 8ee633e64..ede26cc70 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -14,11 +14,23 @@ """Create / interact with Google BigQuery connections.""" +import os +import pkg_resources + from google.cloud import _http from google.cloud.bigquery import __version__ +# TODO: Increase the minimum version of google-cloud-core to 1.6.0 +# and remove this logic. See: +# https://github.com/googleapis/python-bigquery/issues/509 +if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER + release = pkg_resources.get_distribution("google-cloud-core").parsed_version + if release < pkg_resources.parse_version("1.6.0"): + raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature") + + class Connection(_http.JSONConnection): """A connection to Google BigQuery via the JSON REST API. @@ -26,13 +38,18 @@ class Connection(_http.JSONConnection): client (google.cloud.bigquery.client.Client): The client that owns the current connection. client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent. + + api_endpoint (str): The api_endpoint to use. If None, the library will decide what endpoint to use. """ DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com" + DEFAULT_API_MTLS_ENDPOINT = "https://bigquery.mtls.googleapis.com" - def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT): + def __init__(self, client, client_info=None, api_endpoint=None): super(Connection, self).__init__(client, client_info) - self.API_BASE_URL = api_endpoint + self.API_BASE_URL = api_endpoint or self.DEFAULT_API_ENDPOINT + self.API_BASE_MTLS_URL = self.DEFAULT_API_MTLS_ENDPOINT + self.ALLOW_AUTO_SWITCH_TO_MTLS_URL = api_endpoint is None self._client_info.gapic_version = __version__ self._client_info.client_library_version = __version__ diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index b270075a9..f8c0d7c93 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -78,10 +78,7 @@ _DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 -_BASE_UPLOAD_TEMPLATE = ( - "https://bigquery.googleapis.com/upload/bigquery/v2/projects/" - "{project}/jobs?uploadType=" -) +_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" _MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" _RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" _GENERIC_CONTENT_TYPE = "*/*" @@ -2547,7 +2544,15 @@ def _initiate_resumable_upload( if project is None: project = self.project - upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project) # TODO: modify ResumableUpload to take a retry.Retry object # that it can use for the initial RPC. @@ -2616,7 +2621,15 @@ def _do_multipart_upload( if project is None: project = self.project - upload_url = _MULTIPART_URL_TEMPLATE.format(project=project) + # TODO: Increase the minimum version of google-cloud-core to 1.6.0 + # and remove this logic. See: + # https://github.com/googleapis/python-bigquery/issues/509 + hostname = ( + self._connection.API_BASE_URL + if not hasattr(self._connection, "get_api_base_url_for_mtls") + else self._connection.get_api_base_url_for_mtls() + ) + upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project) upload = MultipartUpload(upload_url, headers=headers) if num_retries is not None: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index aa1a03160..85c044bad 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -28,6 +28,7 @@ import uuid import psutil +import pytest import pytz import pkg_resources @@ -132,6 +133,8 @@ else: PYARROW_INSTALLED_VERSION = None +MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" + def _has_rows(result): return len(result) > 0 @@ -2651,6 +2654,9 @@ def test_insert_rows_nested_nested_dictionary(self): expected_rows = [("Some value", record)] self.assertEqual(row_tuples, expected_rows) + @pytest.mark.skipif( + MTLS_TESTING, reason="mTLS testing has no permission to the max-value.js file" + ) def test_create_routine(self): routine_name = "test_routine" dataset = self.temp_dataset(_make_dataset_id("create_routine")) diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index eea345e89..b51b0bbb7 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -21,6 +21,8 @@ def make_connection(*responses): mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection) mock_conn.user_agent = "testing 1.2.3" mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")] + mock_conn.API_BASE_URL = "https://bigquery.googleapis.com" + mock_conn.get_api_base_url_for_mtls = mock.Mock(return_value=mock_conn.API_BASE_URL) return mock_conn diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py index 78e59cb30..09f6d29d7 100644 --- a/tests/unit/test__http.py +++ b/tests/unit/test__http.py @@ -32,6 +32,9 @@ def _get_target_class(): return Connection def _make_one(self, *args, **kw): + if "api_endpoint" not in kw: + kw["api_endpoint"] = "https://bigquery.googleapis.com" + return self._get_target_class()(*args, **kw) def test_build_api_url_no_extra_query_params(self): @@ -138,3 +141,14 @@ def test_extra_headers_replace(self): url=expected_uri, timeout=self._get_default_timeout(), ) + + def test_ctor_mtls(self): + conn = self._make_one(object(), api_endpoint=None) + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, True) + self.assertEqual(conn.API_BASE_URL, "https://bigquery.googleapis.com") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") + + conn = self._make_one(object(), api_endpoint="http://foo") + self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, False) + self.assertEqual(conn.API_BASE_URL, "http://foo") + self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com") diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 625256e6e..66add9c0a 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2057,6 +2057,7 @@ def test_get_table_sets_user_agent(self): url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) http.reset_mock() + http.is_mtls = False mock_response.status_code = 200 mock_response.json.return_value = self._make_table_resource() user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") @@ -4425,7 +4426,7 @@ def _mock_transport(self, status_code, headers, content=b""): fake_transport.request.return_value = fake_response return fake_transport - def _initiate_resumable_upload_helper(self, num_retries=None): + def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): from google.resumable_media.requests import ResumableUpload from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE @@ -4440,6 +4441,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None): fake_transport = self._mock_transport(http.client.OK, response_headers) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") # Create some mock arguments and call the method under test. data = b"goodbye gudbi gootbee" @@ -4454,8 +4457,10 @@ def _initiate_resumable_upload_helper(self, num_retries=None): # Check the returned values. self.assertIsInstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}" + f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" "/jobs?uploadType=resumable" ) self.assertEqual(upload.upload_url, upload_url) @@ -4494,11 +4499,14 @@ def _initiate_resumable_upload_helper(self, num_retries=None): def test__initiate_resumable_upload(self): self._initiate_resumable_upload_helper() + def test__initiate_resumable_upload_mtls(self): + self._initiate_resumable_upload_helper(mtls=True) + def test__initiate_resumable_upload_with_retry(self): self._initiate_resumable_upload_helper(num_retries=11) def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None + self, get_boundary, num_retries=None, project=None, mtls=False ): from google.cloud.bigquery.client import _get_upload_headers from google.cloud.bigquery.job import LoadJob @@ -4508,6 +4516,8 @@ def _do_multipart_upload_success_helper( fake_transport = self._mock_transport(http.client.OK, {}) client = self._make_one(project=self.PROJECT, _http=fake_transport) conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") if project is None: project = self.PROJECT @@ -4530,8 +4540,9 @@ def _do_multipart_upload_success_helper( self.assertEqual(stream.tell(), size) get_boundary.assert_called_once_with() + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" upload_url = ( - f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}" + f"{host_name}/upload/bigquery/v2/projects/{project}" "/jobs?uploadType=multipart" ) payload = ( @@ -4556,6 +4567,10 @@ def _do_multipart_upload_success_helper( def test__do_multipart_upload(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") + def test__do_multipart_upload_mtls(self, get_boundary): + self._do_multipart_upload_success_helper(get_boundary, mtls=True) + @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") def test__do_multipart_upload_with_retry(self, get_boundary): self._do_multipart_upload_success_helper(get_boundary, num_retries=8) From 1c6681aba872c00afb16a904a2ba9bae8e9618d3 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 8 Feb 2021 09:16:42 -0800 Subject: [PATCH 010/230] chore(python): include py.typed files in release (#511) A py.typed file must be included in the released package for it to be considered typed by type checkers. https://www.python.org/dev/peps/pep-0561/#packaging-type-information. See https://github.com/googleapis/python-secret-manager/issues/79 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Fri Feb 5 17:32:06 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 33366574ffb9e11737b3547eb6f020ecae0536e8 Source-Link: https://github.com/googleapis/synthtool/commit/33366574ffb9e11737b3547eb6f020ecae0536e8 --- MANIFEST.in | 4 ++-- synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index e9e29d120..e783f4c62 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -16,10 +16,10 @@ # Generated by synthtool. DO NOT EDIT! include README.rst LICENSE -recursive-include google *.json *.proto +recursive-include google *.json *.proto py.typed recursive-include tests * global-exclude *.py[co] global-exclude __pycache__ # Exclude scripts for samples readmegen -prune scripts/readme-gen \ No newline at end of file +prune scripts/readme-gen diff --git a/synth.metadata b/synth.metadata index f6dcca132..1c5fecaf8 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "64e1c0581b915e36756ea465936939390da7d818" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "d1bb9173100f62c0cfc8f3138b62241e7f47ca6a" + "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" } } ], From 1773dae8dc30b37a74d7ee727e475b45b3b3d2e7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 10 Feb 2021 08:28:01 +0000 Subject: [PATCH 011/230] chore: release 2.8.0 (#510) :robot: I have created a release \*beep\* \*boop\* --- ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) ### Features * add mtls support to client ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) ### Bug Fixes * don't try to close closed cursors ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a58510c66..768b7b036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) + + +### Features + +* Add mTLS support to client. ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199)) + + +### Bug Fixes + +* Don't try to close closed cursors. ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e)) + ## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index d962613e0..0a9aecb37 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.7.0" +__version__ = "2.8.0" From 1de05f6cb1cea27cbfa5bc39dc428755464de130 Mon Sep 17 00:00:00 2001 From: arithmetic1728 <58957152+arithmetic1728@users.noreply.github.com> Date: Thu, 11 Feb 2021 13:08:39 -0800 Subject: [PATCH 012/230] test: update system test for mtls (#518) --- noxfile.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 942525ca9..df36d237e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -113,7 +113,11 @@ def system(session): session.install( "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path ) - session.install("google-cloud-storage", "-c", constraints_path) + if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": + # mTLS test requires pyopenssl and latest google-cloud-storage + session.install("google-cloud-storage", "pyopenssl") + else: + session.install("google-cloud-storage", "-c", constraints_path) session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) From 9902789791d0237dbda4695c781b8a056dbeddc0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 16 Feb 2021 06:49:16 -0800 Subject: [PATCH 013/230] chore: add PARQUET to DestinationFormat enum (#521) --- google/cloud/bigquery/enums.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 2268808fd..db463afdc 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -72,6 +72,9 @@ class DestinationFormat(object): AVRO = "AVRO" """Specifies Avro format.""" + PARQUET = "PARQUET" + """Specifies Parquet format.""" + class Encoding(object): """The character encoding of the data. The default is :attr:`UTF_8`. From eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 08:58:02 -0600 Subject: [PATCH 014/230] docs: clarify `%%bigquery`` magics and fix broken link (#508) --- docs/usage/index.rst | 4 ++-- google/cloud/bigquery/magics/magics.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/docs/usage/index.rst b/docs/usage/index.rst index ff4c9d7f1..1d3cc9f64 100644 --- a/docs/usage/index.rst +++ b/docs/usage/index.rst @@ -29,7 +29,7 @@ Integrations with Other Libraries pandas -See also, the :mod:`google.cloud.bigquery.magics` module for integrations -with Jupyter. +See also, the :mod:`google.cloud.bigquery.magics.magics` module for +integrations with Jupyter. diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 8f343ddcc..6ae7cae12 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -14,6 +14,15 @@ """IPython Magics +To use these magics, you must first register them. Run the ``%load_ext`` magic +in a Jupyter notebook cell. + +.. code:: + + %load_ext google.cloud.bigquery + +This makes the ``%%bigquery`` magic available. + .. function:: %%bigquery IPython cell magic to run a query and display the result as a DataFrame From 3e9430faff7f071600acef295cb5feefe767b954 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 16 Feb 2021 09:25:22 -0600 Subject: [PATCH 015/230] chore: remove redundant view code samples (#437) --- docs/snippets.py | 126 ----------------------------------------------- 1 file changed, 126 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 8c106e63d..3f9b9a88c 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -478,132 +478,6 @@ def test_update_table_cmek(client, to_delete): # [END bigquery_update_table_cmek] -@pytest.mark.skip( - reason=( - "update_table() is flaky " - "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589" - ) -) -def test_manage_views(client, to_delete): - project = client.project - source_dataset_id = "source_dataset_{}".format(_millis()) - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = bigquery.Dataset(source_dataset_ref) - source_dataset = client.create_dataset(source_dataset) - to_delete.append(source_dataset) - - job_config = bigquery.LoadJobConfig() - job_config.schema = [ - bigquery.SchemaField("name", "STRING"), - bigquery.SchemaField("post_abbr", "STRING"), - ] - job_config.skip_leading_rows = 1 - uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv" - source_table_id = "us_states" - load_job = client.load_table_from_uri( - uri, source_dataset.table(source_table_id), job_config=job_config - ) - load_job.result() - - shared_dataset_id = "shared_dataset_{}".format(_millis()) - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = bigquery.Dataset(shared_dataset_ref) - shared_dataset = client.create_dataset(shared_dataset) - to_delete.append(shared_dataset) - - # [START bigquery_create_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to create a shared view of a source table of - # US States. The source table contains all 50 states, while the view will - # contain only states with names starting with 'W'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.create_table(view) # API request - - print("Successfully created view at {}".format(view.full_table_id)) - # [END bigquery_create_view] - - # [START bigquery_update_view_query] - # from google.cloud import bigquery - # client = bigquery.Client() - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - # source_table_id = 'us_states' - # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset') - - # This example shows how to update a shared view of a source table of - # US States. The view's query will be updated to contain only states with - # names starting with 'M'. - view_ref = shared_dataset_ref.table("my_shared_view") - view = bigquery.Table(view_ref) - sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"' - view.view_query = sql_template.format(project, source_dataset_id, source_table_id) - view = client.update_table(view, ["view_query"]) # API request - # [END bigquery_update_view_query] - - # [START bigquery_get_view] - # from google.cloud import bigquery - # client = bigquery.Client() - # shared_dataset_id = 'my_shared_dataset' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - view_ref = shared_dataset_ref.table("my_shared_view") - view = client.get_table(view_ref) # API Request - - # Display view properties - print("View at {}".format(view.full_table_id)) - print("View Query:\n{}".format(view.view_query)) - # [END bigquery_get_view] - assert view.view_query is not None - - analyst_group_email = "example-analyst-group@google.com" - # [START bigquery_grant_view_access] - # from google.cloud import bigquery - # client = bigquery.Client() - - # Assign access controls to the dataset containing the view - # shared_dataset_id = 'my_shared_dataset' - # analyst_group_email = 'data_analysts@example.com' - project = client.project - shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id) - shared_dataset = client.get_dataset(shared_dataset_ref) # API request - access_entries = shared_dataset.access_entries - access_entries.append( - bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email) - ) - shared_dataset.access_entries = access_entries - shared_dataset = client.update_dataset( - shared_dataset, ["access_entries"] - ) # API request - - # Authorize the view to access the source dataset - # project = 'my-project' - # source_dataset_id = 'my_source_dataset' - project = client.project - source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id) - source_dataset = client.get_dataset(source_dataset_ref) # API request - view_reference = { - "projectId": project, - "datasetId": shared_dataset_id, - "tableId": "my_shared_view", - } - access_entries = source_dataset.access_entries - access_entries.append(bigquery.AccessEntry(None, "view", view_reference)) - source_dataset.access_entries = access_entries - source_dataset = client.update_dataset( - source_dataset, ["access_entries"] - ) # API request - # [END bigquery_grant_view_access] - - def test_load_table_add_column(client, to_delete): dataset_id = "load_table_add_column_{}".format(_millis()) project = client.project From edd3328fffa3040b2cd3a3c668c90a0e43e4c94c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 17:52:56 +0100 Subject: [PATCH 016/230] feat: add determinism level for javascript UDFs (#522) * feat: add determinism level for javascript UDFs * Add enum-like class for routine determinism level --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/enums.py | 17 ++++++++ google/cloud/bigquery/routine/__init__.py | 29 +++++++++++++ .../cloud/bigquery/{ => routine}/routine.py | 12 ++++++ tests/system/test_client.py | 1 + tests/unit/routine/test_routine.py | 41 ++++++++++++++++++- 7 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 google/cloud/bigquery/routine/__init__.py rename google/cloud/bigquery/{ => routine}/routine.py (97%) diff --git a/docs/reference.rst b/docs/reference.rst index 3643831cb..6b802e2a5 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -110,6 +110,7 @@ Routine .. autosummary:: :toctree: generated + routine.DeterminismLevel routine.Routine routine.RoutineArgument routine.RoutineReference diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 41f987228..29d375b03 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.routine import DeterminismLevel from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -134,6 +135,7 @@ "Compression", "CreateDisposition", "DestinationFormat", + "DeterminismLevel", "ExternalSourceFormat", "Encoding", "QueryPriority", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index db463afdc..e353b3132 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -234,3 +234,20 @@ class WriteDisposition(object): WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" + + +class DeterminismLevel: + """Specifies determinism level for JavaScript user-defined functions (UDFs). + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#DeterminismLevel + """ + + DETERMINISM_LEVEL_UNSPECIFIED = "DETERMINISM_LEVEL_UNSPECIFIED" + """The determinism of the UDF is unspecified.""" + + DETERMINISTIC = "DETERMINISTIC" + """The UDF is deterministic, meaning that 2 function calls with the same inputs + always produce the same result, even across 2 query runs.""" + + NOT_DETERMINISTIC = "NOT_DETERMINISTIC" + """The UDF is not deterministic.""" diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py new file mode 100644 index 000000000..d1c79b05e --- /dev/null +++ b/google/cloud/bigquery/routine/__init__.py @@ -0,0 +1,29 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""User-Defined Routines.""" + + +from google.cloud.bigquery.enums import DeterminismLevel +from google.cloud.bigquery.routine.routine import Routine +from google.cloud.bigquery.routine.routine import RoutineArgument +from google.cloud.bigquery.routine.routine import RoutineReference + + +__all__ = ( + "DeterminismLevel", + "Routine", + "RoutineArgument", + "RoutineReference", +) diff --git a/google/cloud/bigquery/routine.py b/google/cloud/bigquery/routine/routine.py similarity index 97% rename from google/cloud/bigquery/routine.py rename to google/cloud/bigquery/routine/routine.py index f26f20886..103799e8f 100644 --- a/google/cloud/bigquery/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -50,6 +50,7 @@ class Routine(object): "return_type": "returnType", "type_": "routineType", "description": "description", + "determinism_level": "determinismLevel", } def __init__(self, routine_ref, **kwargs): @@ -253,6 +254,17 @@ def description(self): def description(self, value): self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value + @property + def determinism_level(self): + """Optional[str]: (experimental) The determinism level of the JavaScript UDF + if defined. + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["determinism_level"]) + + @determinism_level.setter + def determinism_level(self, value): + self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value + @classmethod def from_api_repr(cls, resource): """Factory: construct a routine given its API representation. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 85c044bad..60c3b3fa8 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2682,6 +2682,7 @@ def test_create_routine(self): ) ] routine.body = "return maxValue(arr)" + routine.determinism_level = bigquery.DeterminismLevel.DETERMINISTIC query_string = "SELECT `{}`([-100.0, 3.14, 100.0, 42.0]) as max_value;".format( str(routine.reference) ) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index b02ace1db..0a59e7c5f 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -18,6 +18,7 @@ import pytest import google.cloud._helpers +from google.cloud import bigquery from google.cloud import bigquery_v2 @@ -73,6 +74,7 @@ def test_ctor_w_properties(target_class): ) type_ = "SCALAR_FUNCTION" description = "A routine description." + determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC actual_routine = target_class( routine_id, @@ -82,6 +84,7 @@ def test_ctor_w_properties(target_class): return_type=return_type, type_=type_, description=description, + determinism_level=determinism_level, ) ref = RoutineReference.from_string(routine_id) @@ -92,6 +95,9 @@ def test_ctor_w_properties(target_class): assert actual_routine.return_type == return_type assert actual_routine.type_ == type_ assert actual_routine.description == description + assert ( + actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC + ) def test_from_api_repr(target_class): @@ -120,6 +126,7 @@ def test_from_api_repr(target_class): "routineType": "SCALAR_FUNCTION", "someNewField": "someValue", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, } actual_routine = target_class.from_api_repr(resource) @@ -152,6 +159,7 @@ def test_from_api_repr(target_class): assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" def test_from_api_repr_w_minimal_resource(target_class): @@ -177,6 +185,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.return_type is None assert actual_routine.type_ is None assert actual_routine.description is None + assert actual_routine.determinism_level is None def test_from_api_repr_w_unknown_fields(target_class): @@ -208,6 +217,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["arguments"], {"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]}, @@ -220,6 +230,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["body"], {"definitionBody": "x * 3"}, @@ -232,6 +243,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["language"], {"language": "SQL"}, @@ -244,6 +256,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["return_type"], {"returnType": {"typeKind": "INT64"}}, @@ -256,6 +269,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["type_"], {"routineType": "SCALAR_FUNCTION"}, @@ -268,13 +282,37 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": {"typeKind": "INT64"}, "routineType": "SCALAR_FUNCTION", "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, }, ["description"], {"description": "A routine description."}, ), + ( + { + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "definitionBody": "x * 3", + "language": "SQL", + "returnType": {"typeKind": "INT64"}, + "routineType": "SCALAR_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["determinism_level"], + { + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED + }, + ), ( {}, - ["arguments", "language", "body", "type_", "return_type", "description"], + [ + "arguments", + "language", + "body", + "type_", + "return_type", + "description", + "determinism_level", + ], { "arguments": None, "definitionBody": None, @@ -282,6 +320,7 @@ def test_from_api_repr_w_unknown_fields(target_class): "returnType": None, "routineType": None, "description": None, + "determinismLevel": None, }, ), ( From 4ffb4e067abdaa54dad6eff49a7fbdb0fa358637 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 17 Feb 2021 18:30:58 +0100 Subject: [PATCH 017/230] feat: expose reservation usage stats on jobs (#524) * feat: expose reservation usage stats on jobs * Add ReservationUsage to job types in docs * Remove redundant space in docstring. --- docs/reference.rst | 1 + google/cloud/bigquery/job/__init__.py | 2 ++ google/cloud/bigquery/job/base.py | 27 +++++++++++++++++++++++++++ tests/unit/job/test_base.py | 24 ++++++++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index 6b802e2a5..52d916f96 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -62,6 +62,7 @@ Job-Related Types job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority + job.ReservationUsage job.SourceFormat job.WriteDisposition job.SchemaUpdateOption diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 26ecf8d3c..4945841d9 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame from google.cloud.bigquery.job.base import UnknownJob @@ -51,6 +52,7 @@ "_DONE_STATE", "_JobConfig", "_JobReference", + "ReservationUsage", "ScriptStatistics", "ScriptStackFrame", "UnknownJob", diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5ba01aa67..d8f5d6528 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -14,6 +14,7 @@ """Base classes and helpers for job classes.""" +from collections import namedtuple import copy import http import threading @@ -73,6 +74,16 @@ def _error_result_to_exception(error_result): ) +ReservationUsage = namedtuple("ReservationUsage", "name slot_ms") +ReservationUsage.__doc__ = "Job resource usage for a reservation." +ReservationUsage.name.__doc__ = ( + 'Reservation name or "unreserved" for on-demand resources usage.' +) +ReservationUsage.slot_ms.__doc__ = ( + "Total slot milliseconds used by the reservation for a particular job." +) + + class _JobReference(object): """A reference to a job. @@ -305,6 +316,22 @@ def _job_statistics(self): statistics = self._properties.get("statistics", {}) return statistics.get(self._JOB_TYPE, {}) + @property + def reservation_usage(self): + """Job resource usage breakdown by reservation. + + Returns: + List[google.cloud.bigquery.job.ReservationUsage]: + Reservation usage stats. Can be empty if not set from the server. + """ + usage_stats_raw = _helpers._get_sub_prop( + self._properties, ["statistics", "reservationUsage"], default=() + ) + return [ + ReservationUsage(name=usage["name"], slot_ms=int(usage["slotMs"])) + for usage in usage_stats_raw + ] + @property def error_result(self): """Error information about the job as a whole. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 44bbc2c77..bbeffba50 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -319,6 +319,30 @@ def test_ended(self): stats["endTime"] = millis self.assertEqual(job.ended, now) + def test_reservation_usage_no_stats(self): + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = {} + self.assertEqual(job.reservation_usage, []) + + def test_reservation_usage_stats_exist(self): + from google.cloud.bigquery.job import ReservationUsage + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + job._properties["statistics"] = { + "reservationUsage": [ + {"name": "slot_foo", "slotMs": "42"}, + {"name": "slot_bar", "slotMs": "123"}, + ], + } + + expected = [ + ReservationUsage(name="slot_foo", slot_ms=42), + ReservationUsage(name="slot_bar", slot_ms=123), + ] + self.assertEqual(job.reservation_usage, expected) + def test__job_statistics(self): statistics = {"foo": "bar"} client = _make_client(project=self.PROJECT) From 01e851d00fc17a780375580776753d78f6d74174 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 17 Feb 2021 12:06:05 -0800 Subject: [PATCH 018/230] docs: update python contributing guide (#514) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. Source-Link: https://github.com/googleapis/synthtool/commit/4679e7e415221f03ff2a71e3ffad75b9ec41d87e PiperOrigin-RevId: 344443035 Source-Link: https://github.com/googleapis/googleapis/commit/df4fd38d040c5c8a0869936205bca13fb64b2cff --- CONTRIBUTING.rst | 22 ++++++++++++++++++---- synth.metadata | 7 ++++--- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 15bcd2e28..a0e330e44 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -70,9 +70,14 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: $ nox -s unit-2.7 - $ nox -s unit-3.7 + $ nox -s unit-3.8 $ ... +- Args to pytest can be passed through the nox command separated by a `--`. For + example, to run a single test:: + + $ nox -s unit-3.8 -- -k + .. note:: The unit tests and system tests are described in the @@ -93,8 +98,12 @@ On Debian/Ubuntu:: ************ Coding Style ************ +- We use the automatic code formatter ``black``. You can run it using + the nox session ``blacken``. This will eliminate many lint errors. Run via:: + + $ nox -s blacken -- PEP8 compliance, with exceptions defined in the linter configuration. +- PEP8 compliance is required, with exceptions defined in the linter configuration. If you have ``nox`` installed, you can test that you have not introduced any non-compliant code via:: @@ -133,13 +142,18 @@ Running System Tests - To run system tests, you can execute:: - $ nox -s system-3.7 + # Run all system tests + $ nox -s system-3.8 $ nox -s system-2.7 + # Run a single system test + $ nox -s system-3.8 -- -k + + .. note:: System tests are only configured to run under Python 2.7 and - Python 3.7. For expediency, we do not run them in older versions + Python 3.8. For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local diff --git a/synth.metadata b/synth.metadata index 1c5fecaf8..f91ffab69 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "33366574ffb9e11737b3547eb6f020ecae0536e8" + "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" } } ], @@ -92,6 +92,7 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", + "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/services.rst", From 4c9947d7578ee9d03d36b464dd4b627545bfb09e Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 18 Feb 2021 08:18:03 -0800 Subject: [PATCH 019/230] chore: update automation naming, smaller generated code fixes (#505) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/9d4e6069-5c18-4f79-97fb-99ebae377691/targets - [ ] To automatically regenerate this PR, check this box. PiperOrigin-RevId: 350246057 Source-Link: https://github.com/googleapis/googleapis/commit/520682435235d9c503983a360a2090025aa47cd1 PiperOrigin-RevId: 347055288 Source-Link: https://github.com/googleapis/googleapis/commit/dd372aa22ded7a8ba6f0e03a80e06358a3fa0907 --- .coveragerc | 34 +-- .gitignore | 4 +- .kokoro/build.sh | 10 + docs/bigquery_v2/services.rst | 6 - docs/bigquery_v2/types.rst | 1 + google/cloud/bigquery_v2/types/__init__.py | 1 - .../bigquery_v2/types/encryption_config.py | 2 +- google/cloud/bigquery_v2/types/model.py | 216 +++++++++--------- .../cloud/bigquery_v2/types/standard_sql.py | 10 +- synth.metadata | 12 +- synth.py | 2 + 11 files changed, 144 insertions(+), 154 deletions(-) delete mode 100644 docs/bigquery_v2/services.rst diff --git a/.coveragerc b/.coveragerc index 0d8e6297d..23861a8eb 100644 --- a/.coveragerc +++ b/.coveragerc @@ -1,38 +1,18 @@ -# -*- coding: utf-8 -*- -# -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generated by synthtool. DO NOT EDIT! [run] branch = True -omit = - google/cloud/__init__.py [report] fail_under = 100 show_missing = True +omit = + google/cloud/bigquery/__init__.py exclude_lines = # Re-enable the standard pragma pragma: NO COVER # Ignore debug-only repr def __repr__ - # Ignore abstract methods - raise NotImplementedError -omit = - */gapic/*.py - */proto/*.py - */core/*.py - */site-packages/*.py - google/cloud/__init__.py + # Ignore pkg_resources exceptions. + # This is added at the module level as a safeguard for if someone + # generates the code and tries to run it without pip installing. This + # makes it virtually impossible to test properly. + except pkg_resources.DistributionNotFound diff --git a/.gitignore b/.gitignore index b9daa52f1..b4243ced7 100644 --- a/.gitignore +++ b/.gitignore @@ -50,8 +50,10 @@ docs.metadata # Virtual environment env/ + +# Test logs coverage.xml -sponge_log.xml +*sponge_log.xml # System test environment variables. system_tests/local_test_setup diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 058f363e1..302cc1e1a 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -40,6 +40,16 @@ python3 -m pip uninstall --yes --quiet nox-automation python3 -m pip install --upgrade --quiet nox python3 -m nox --version +# If this is a continuous build, send the test log to the FlakyBot. +# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then + cleanup() { + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + } + trap cleanup EXIT HUP +fi + # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then diff --git a/docs/bigquery_v2/services.rst b/docs/bigquery_v2/services.rst deleted file mode 100644 index 65fbb438c..000000000 --- a/docs/bigquery_v2/services.rst +++ /dev/null @@ -1,6 +0,0 @@ -Services for Google Cloud Bigquery v2 API -========================================= - -.. automodule:: google.cloud.bigquery_v2.services.model_service - :members: - :inherited-members: diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery_v2/types.rst index 41b906514..c36a83e0b 100644 --- a/docs/bigquery_v2/types.rst +++ b/docs/bigquery_v2/types.rst @@ -3,4 +3,5 @@ Types for Google Cloud Bigquery v2 API .. automodule:: google.cloud.bigquery_v2.types :members: + :undoc-members: :show-inheritance: diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 1e354641a..00dc837c9 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -32,7 +32,6 @@ ListModelsResponse, ) - __all__ = ( "EncryptionConfiguration", "ModelReference", diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py index 6fb90f340..2d801bde3 100644 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -30,7 +30,7 @@ class EncryptionConfiguration(proto.Message): r""" Attributes: - kms_key_name (~.wrappers.StringValue): + kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption key that will be used to protect destination BigQuery table. The BigQuery Service Account diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index c3530dec2..8ae158b64 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -45,7 +45,7 @@ class Model(proto.Message): Attributes: etag (str): Output only. A hash of this resource. - model_reference (~.gcb_model_reference.ModelReference): + model_reference (google.cloud.bigquery_v2.types.ModelReference): Required. Unique identifier for this model. creation_time (int): Output only. The time when this model was @@ -58,7 +58,7 @@ class Model(proto.Message): model. friendly_name (str): Optional. A descriptive name for this model. - labels (Sequence[~.gcb_model.Model.LabelsEntry]): + labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]): The labels associated with this model. You can use these to organize and group your models. Label keys and values can be no longer than 63 @@ -81,22 +81,22 @@ class Model(proto.Message): Output only. The geographic location where the model resides. This value is inherited from the dataset. - encryption_configuration (~.encryption_config.EncryptionConfiguration): + encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration): Custom encryption configuration (e.g., Cloud KMS keys). This shows the encryption configuration of the model data while stored in BigQuery storage. This field can be used with PatchModel to update encryption key for an already encrypted model. - model_type (~.gcb_model.Model.ModelType): + model_type (google.cloud.bigquery_v2.types.Model.ModelType): Output only. Type of the model resource. - training_runs (Sequence[~.gcb_model.Model.TrainingRun]): + training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]): Output only. Information for all training runs in increasing order of start_time. - feature_columns (Sequence[~.standard_sql.StandardSqlField]): + feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Input feature columns that were used to train this model. - label_columns (Sequence[~.standard_sql.StandardSqlField]): + label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. @@ -280,15 +280,15 @@ class RegressionMetrics(proto.Message): matrix factorization models. Attributes: - mean_absolute_error (~.wrappers.DoubleValue): + mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Mean absolute error. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared error. - mean_squared_log_error (~.wrappers.DoubleValue): + mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue): Mean squared log error. - median_absolute_error (~.wrappers.DoubleValue): + median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. - r_squared (~.wrappers.DoubleValue): + r_squared (google.protobuf.wrappers_pb2.DoubleValue): R^2 score. """ @@ -319,33 +319,33 @@ class AggregateClassificationMetrics(proto.Message): by counting the total number of correctly predicted rows. Attributes: - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): Precision is the fraction of actual positive predictions that had positive actual labels. For multiclass this is a macro-averaged metric treating each class as a binary classifier. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): Recall is the fraction of actual positive labels that were given a positive prediction. For multiclass this is a macro-averaged metric. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): Accuracy is the fraction of predictions given the correct label. For multiclass this is a micro-averaged metric. - threshold (~.wrappers.DoubleValue): + threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold at which the metrics are computed. For binary classification models this is the positive class threshold. For multi-class classfication models this is the confidence threshold. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The F1 score is an average of recall and precision. For multiclass this is a macro- averaged metric. - log_loss (~.wrappers.DoubleValue): + log_loss (google.protobuf.wrappers_pb2.DoubleValue): Logarithmic Loss. For multiclass this is a macro-averaged metric. - roc_auc (~.wrappers.DoubleValue): + roc_auc (google.protobuf.wrappers_pb2.DoubleValue): Area Under a ROC Curve. For multiclass this is a macro-averaged metric. """ @@ -369,9 +369,9 @@ class BinaryClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): + binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]): Binary confusion matrix at multiple thresholds. positive_label (str): @@ -384,27 +384,27 @@ class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. Attributes: - positive_class_threshold (~.wrappers.DoubleValue): + positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of the following metric. - true_positives (~.wrappers.Int64Value): + true_positives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as true. - false_positives (~.wrappers.Int64Value): + false_positives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as true. - true_negatives (~.wrappers.Int64Value): + true_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of true samples predicted as false. - false_negatives (~.wrappers.Int64Value): + false_negatives (google.protobuf.wrappers_pb2.Int64Value): Number of false samples predicted as false. - precision (~.wrappers.DoubleValue): + precision (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive predictions that had positive actual labels. - recall (~.wrappers.DoubleValue): + recall (google.protobuf.wrappers_pb2.DoubleValue): The fraction of actual positive labels that were given a positive prediction. - f1_score (~.wrappers.DoubleValue): + f1_score (google.protobuf.wrappers_pb2.DoubleValue): The equally weighted average of recall and precision. - accuracy (~.wrappers.DoubleValue): + accuracy (google.protobuf.wrappers_pb2.DoubleValue): The fraction of predictions given the correct label. """ @@ -462,9 +462,9 @@ class MultiClassClassificationMetrics(proto.Message): models. Attributes: - aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics): + aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics): Aggregate classification metrics. - confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]): + confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]): Confusion matrix at different thresholds. """ @@ -472,10 +472,10 @@ class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. Attributes: - confidence_threshold (~.wrappers.DoubleValue): + confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the entries of the confusion matrix. - rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): + rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]): One row per actual label. """ @@ -487,7 +487,7 @@ class Entry(proto.Message): The predicted label. For confidence_threshold > 0, we will also add an entry indicating the number of items under the confidence threshold. - item_count (~.wrappers.Int64Value): + item_count (google.protobuf.wrappers_pb2.Int64Value): Number of items being predicted as this label. """ @@ -504,7 +504,7 @@ class Row(proto.Message): Attributes: actual_label (str): The original label of this row. - entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): + entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]): Info describing predicted label distribution. """ @@ -540,12 +540,12 @@ class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. Attributes: - davies_bouldin_index (~.wrappers.DoubleValue): + davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. - mean_squared_distance (~.wrappers.DoubleValue): + mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue): Mean of squared distances between each sample to its cluster centroid. - clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]): + clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): [Beta] Information for all clusters. """ @@ -555,10 +555,10 @@ class Cluster(proto.Message): Attributes: centroid_id (int): Centroid id. - feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]): + feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]): Values of highly variant features for this cluster. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): Count of training data rows that were assigned to this cluster. """ @@ -569,10 +569,10 @@ class FeatureValue(proto.Message): Attributes: feature_column (str): The feature column name. - numerical_value (~.wrappers.DoubleValue): + numerical_value (google.protobuf.wrappers_pb2.DoubleValue): The numerical feature value. This is the centroid value for this feature. - categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): + categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue): The categorical feature value. """ @@ -580,7 +580,7 @@ class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. Attributes: - category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): + category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If there are more than ten categories, we return top ten (by count) and return one more CategoryCount with category @@ -594,7 +594,7 @@ class CategoryCount(proto.Message): Attributes: category (str): The name of category. - count (~.wrappers.Int64Value): + count (google.protobuf.wrappers_pb2.Int64Value): The count of training samples matching the category within the cluster. """ @@ -654,23 +654,23 @@ class RankingMetrics(proto.Message): feedback_type=implicit. Attributes: - mean_average_precision (~.wrappers.DoubleValue): + mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue): Calculates a precision per user for all the items by ranking them and then averages all the precisions across all the users. - mean_squared_error (~.wrappers.DoubleValue): + mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue): Similar to the mean squared error computed in regression and explicit recommendation models except instead of computing the rating directly, the output from evaluate is computed against a preference which is 1 or 0 depending on if the rating exists or not. - normalized_discounted_cumulative_gain (~.wrappers.DoubleValue): + normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue): A metric to determine the goodness of a ranking calculated from the predicted confidence by comparing it to an ideal rank measured by the original ratings. - average_rank (~.wrappers.DoubleValue): + average_rank (google.protobuf.wrappers_pb2.DoubleValue): Determines the goodness of a ranking by computing the percentile rank from the predicted confidence and dividing it by the original rank. @@ -696,11 +696,11 @@ class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. Attributes: - non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]): + non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. - arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]): + arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]): Arima model fitting metrics. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. has_drift (Sequence[bool]): @@ -709,7 +709,7 @@ class ArimaForecastingMetrics(proto.Message): time_series_id (Sequence[str]): Id to differentiate different time series for the large-scale case. - arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): + arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]): Repeated as there can be many metric sets (one for each model) in auto-arima and the large-scale case. @@ -720,16 +720,16 @@ class ArimaSingleModelForecastingMetrics(proto.Message): model. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -779,21 +779,21 @@ class EvaluationMetrics(proto.Message): imported models. Attributes: - regression_metrics (~.gcb_model.Model.RegressionMetrics): + regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics): Populated for regression models and explicit feedback type matrix factorization models. - binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics): + binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics): Populated for binary classification/classifier models. - multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics): + multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics): Populated for multi-class classification/classifier models. - clustering_metrics (~.gcb_model.Model.ClusteringMetrics): + clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics): Populated for clustering models. - ranking_metrics (~.gcb_model.Model.RankingMetrics): + ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics): Populated for implicit feedback type matrix factorization models. - arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics): + arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics): Populated for ARIMA models. """ @@ -835,10 +835,10 @@ class DataSplitResult(proto.Message): and evaluation data tables that were used to train the model. Attributes: - training_table (~.table_reference.TableReference): + training_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the training data after split. - evaluation_table (~.table_reference.TableReference): + evaluation_table (google.cloud.bigquery_v2.types.TableReference): Table reference of the evaluation data after split. """ @@ -893,7 +893,7 @@ class GlobalExplanation(proto.Message): features after training. Attributes: - explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]): + explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]): A list of the top global explanations. Sorted by absolute value of attribution in descending order. @@ -913,7 +913,7 @@ class Explanation(proto.Message): be formatted like .. Overall size of feature name will always be truncated to first 120 characters. - attribution (~.wrappers.DoubleValue): + attribution (google.protobuf.wrappers_pb2.DoubleValue): Attribution of feature. """ @@ -933,22 +933,22 @@ class TrainingRun(proto.Message): r"""Information about a single training query run for the model. Attributes: - training_options (~.gcb_model.Model.TrainingRun.TrainingOptions): + training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, includes user specified and default options that were used. - start_time (~.timestamp.Timestamp): + start_time (google.protobuf.timestamp_pb2.Timestamp): The start time of this training run. - results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]): + results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]): Output of each iteration run, results.size() <= max_iterations. - evaluation_metrics (~.gcb_model.Model.EvaluationMetrics): + evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics): The evaluation metrics over training/eval data that were computed at the end of training. - data_split_result (~.gcb_model.Model.DataSplitResult): + data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult): Data split result of the training run. Only set when the input data is actually split. - global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]): + global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]): Global explanations for important features of the model. For multi-class models, there is one entry for each label class. For other models, @@ -962,30 +962,30 @@ class TrainingOptions(proto.Message): max_iterations (int): The maximum number of iterations in training. Used only for iterative training algorithms. - loss_type (~.gcb_model.Model.LossType): + loss_type (google.cloud.bigquery_v2.types.Model.LossType): Type of loss function used during training run. learn_rate (float): Learning rate in training. Used only for iterative training algorithms. - l1_regularization (~.wrappers.DoubleValue): + l1_regularization (google.protobuf.wrappers_pb2.DoubleValue): L1 regularization coefficient. - l2_regularization (~.wrappers.DoubleValue): + l2_regularization (google.protobuf.wrappers_pb2.DoubleValue): L2 regularization coefficient. - min_relative_progress (~.wrappers.DoubleValue): + min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue): When early_stop is true, stops training when accuracy improvement is less than 'min_relative_progress'. Used only for iterative training algorithms. - warm_start (~.wrappers.BoolValue): + warm_start (google.protobuf.wrappers_pb2.BoolValue): Whether to train a model from the last checkpoint. - early_stop (~.wrappers.BoolValue): + early_stop (google.protobuf.wrappers_pb2.BoolValue): Whether to stop early when the loss doesn't improve significantly any more (compared to min_relative_progress). Used only for iterative training algorithms. input_label_columns (Sequence[str]): Name of input label columns in training data. - data_split_method (~.gcb_model.Model.DataSplitMethod): + data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod): The data split type for training and evaluation, e.g. RANDOM. data_split_eval_fraction (float): @@ -1007,13 +1007,13 @@ class TrainingOptions(proto.Message): and the rest are eval data. It respects the order in Orderable data types: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy): + learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy): The strategy to determine learn rate for the current iteration. initial_learn_rate (float): Specifies the initial learning rate for the line search learn rate strategy. - label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): + label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]): Weights associated with each label class, for rebalancing the training data. Only applicable for classification models. @@ -1023,21 +1023,21 @@ class TrainingOptions(proto.Message): item_column (str): Item column specified for matrix factorization models. - distance_type (~.gcb_model.Model.DistanceType): + distance_type (google.cloud.bigquery_v2.types.Model.DistanceType): Distance type for clustering models. num_clusters (int): Number of clusters for clustering models. model_uri (str): [Beta] Google Cloud Storage URI from which the model was imported. Only applicable for imported models. - optimization_strategy (~.gcb_model.Model.OptimizationStrategy): + optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. hidden_units (Sequence[int]): Hidden units for dnn models. batch_size (int): Batch size for dnn models. - dropout (~.wrappers.DoubleValue): + dropout (google.protobuf.wrappers_pb2.DoubleValue): Dropout probability for dnn models. max_tree_depth (int): Maximum depth of a tree for boosted tree @@ -1046,18 +1046,18 @@ class TrainingOptions(proto.Message): Subsample fraction of the training data to grow tree to prevent overfitting for boosted tree models. - min_split_loss (~.wrappers.DoubleValue): + min_split_loss (google.protobuf.wrappers_pb2.DoubleValue): Minimum split loss for boosted tree models. num_factors (int): Num factors specified for matrix factorization models. - feedback_type (~.gcb_model.Model.FeedbackType): + feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType): Feedback type that specifies which algorithm to run for matrix factorization. - wals_alpha (~.wrappers.DoubleValue): + wals_alpha (google.protobuf.wrappers_pb2.DoubleValue): Hyperparameter for matrix factoration when implicit feedback type is specified. - kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod): + kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod): The method used to initialize the centroids for kmeans algorithm. kmeans_initialization_column (str): @@ -1071,16 +1071,16 @@ class TrainingOptions(proto.Message): for ARIMA model. auto_arima (bool): Whether to enable auto ARIMA or not. - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): A specification of the non-seasonal part of the ARIMA model: the three components (p, d, q) are the AR order, the degree of differencing, and the MA order. - data_frequency (~.gcb_model.Model.DataFrequency): + data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency): The data frequency of a time series. include_drift (bool): Include drift when fitting an ARIMA model. - holiday_region (~.gcb_model.Model.HolidayRegion): + holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion): The geographical region based on which the holidays are considered in time series modeling. If a valid value is specified, then holiday @@ -1226,23 +1226,23 @@ class IterationResult(proto.Message): r"""Information about a single iteration of the training run. Attributes: - index (~.wrappers.Int32Value): + index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. - duration_ms (~.wrappers.Int64Value): + duration_ms (google.protobuf.wrappers_pb2.Int64Value): Time taken to run the iteration in milliseconds. - training_loss (~.wrappers.DoubleValue): + training_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the training data at the end of iteration. - eval_loss (~.wrappers.DoubleValue): + eval_loss (google.protobuf.wrappers_pb2.DoubleValue): Loss computed on the eval data at the end of iteration. learn_rate (float): Learn rate used for this iteration. - cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]): + cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]): Information about top clusters for clustering models. - arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult): + arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult): """ @@ -1252,10 +1252,10 @@ class ClusterInfo(proto.Message): Attributes: centroid_id (int): Centroid id. - cluster_radius (~.wrappers.DoubleValue): + cluster_radius (google.protobuf.wrappers_pb2.DoubleValue): Cluster radius, the average distance from centroid to each point assigned to the cluster. - cluster_size (~.wrappers.Int64Value): + cluster_size (google.protobuf.wrappers_pb2.Int64Value): Cluster size, the total number of points assigned to the cluster. """ @@ -1276,11 +1276,11 @@ class ArimaResult(proto.Message): iteration results. Attributes: - arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): + arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]): This message is repeated because there are multiple arima models fitted in auto-arima. For non-auto-arima model, its size is one. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1314,18 +1314,18 @@ class ArimaModelInfo(proto.Message): r"""Arima model information. Attributes: - non_seasonal_order (~.gcb_model.Model.ArimaOrder): + non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. - arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): + arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients): Arima coefficients. - arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics): + arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics): Arima fitting metrics. has_drift (bool): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): The id to indicate different time series. - seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]): + seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. """ @@ -1482,7 +1482,7 @@ class PatchModelRequest(proto.Message): Required. Dataset ID of the model to patch. model_id (str): Required. Model ID of the model to patch. - model (~.gcb_model.Model): + model (google.cloud.bigquery_v2.types.Model): Required. Patched model. Follows RFC5789 patch semantics. Missing fields are not updated. To clear a field, explicitly @@ -1525,7 +1525,7 @@ class ListModelsRequest(proto.Message): Required. Project ID of the models to list. dataset_id (str): Required. Dataset ID of the models to list. - max_results (~.wrappers.UInt32Value): + max_results (google.protobuf.wrappers_pb2.UInt32Value): The maximum number of results to return in a single response page. Leverage the page tokens to iterate through the entire collection. @@ -1547,7 +1547,7 @@ class ListModelsResponse(proto.Message): r""" Attributes: - models (Sequence[~.gcb_model.Model]): + models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields are populated: model_reference, model_type, creation_time, last_modified_time and labels. diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 80e4632f7..3bc6afedc 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -33,13 +33,13 @@ class StandardSqlDataType(proto.Message): array_element_type="DATE"}} ]}} Attributes: - type_kind (~.standard_sql.StandardSqlDataType.TypeKind): + type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind): Required. The top level type of this field. Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - array_element_type (~.standard_sql.StandardSqlDataType): + array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType): The type of the array's elements, if type_kind = "ARRAY". - struct_type (~.standard_sql.StandardSqlStructType): + struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType): The fields of this struct, in order, if type_kind = "STRUCT". """ @@ -80,7 +80,7 @@ class StandardSqlField(proto.Message): name (str): Optional. The name of this field. Can be absent for struct fields. - type (~.standard_sql.StandardSqlDataType): + type (google.cloud.bigquery_v2.types.StandardSqlDataType): Optional. The type of this parameter. Absent if not explicitly specified (e.g., CREATE FUNCTION statement can omit the return type; in @@ -97,7 +97,7 @@ class StandardSqlStructType(proto.Message): r""" Attributes: - fields (Sequence[~.standard_sql.StandardSqlField]): + fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): """ diff --git a/synth.metadata b/synth.metadata index f91ffab69..dc183a72e 100644 --- a/synth.metadata +++ b/synth.metadata @@ -11,22 +11,22 @@ "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff", - "internalRef": "344443035" + "sha": "e13001be33d69042a9505e698f792587a804a5cf", + "internalRef": "358152223" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4679e7e415221f03ff2a71e3ffad75b9ec41d87e" + "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" } } ], @@ -42,6 +42,7 @@ } ], "generatedFiles": [ + ".coveragerc", ".flake8", ".github/CONTRIBUTING.md", ".github/ISSUE_TEMPLATE/bug_report.md", @@ -95,6 +96,7 @@ "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", + "docs/bigquery_v2/model_service.rst", "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", @@ -127,4 +129,4 @@ "setup.cfg", "testing/.gitignore" ] -} \ No newline at end of file +} diff --git a/synth.py b/synth.py index 341c5832f..3ab271c96 100644 --- a/synth.py +++ b/synth.py @@ -33,6 +33,8 @@ library, excludes=[ "docs/index.rst", + "docs/bigquery_v2/*_service.rst", + "docs/bigquery_v2/services.rst", "README.rst", "noxfile.py", "setup.py", From 1862de798e09b81c9bbbf06b00a438b5f57daf79 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 10:52:56 -0600 Subject: [PATCH 020/230] chore: release 2.9.0 (#526) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 768b7b036..51fad831e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) + + +### Features + +* add determinism level for javascript UDFs ([#522](https://www.github.com/googleapis/python-bigquery/issues/522)) ([edd3328](https://www.github.com/googleapis/python-bigquery/commit/edd3328fffa3040b2cd3a3c668c90a0e43e4c94c)) +* expose reservation usage stats on jobs ([#524](https://www.github.com/googleapis/python-bigquery/issues/524)) ([4ffb4e0](https://www.github.com/googleapis/python-bigquery/commit/4ffb4e067abdaa54dad6eff49a7fbdb0fa358637)) + + +### Documentation + +* clarify `%%bigquery`` magics and fix broken link ([#508](https://www.github.com/googleapis/python-bigquery/issues/508)) ([eedf93b](https://www.github.com/googleapis/python-bigquery/commit/eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8)) +* update python contributing guide ([#514](https://www.github.com/googleapis/python-bigquery/issues/514)) ([01e851d](https://www.github.com/googleapis/python-bigquery/commit/01e851d00fc17a780375580776753d78f6d74174)) + ## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0a9aecb37..b2a8c5535 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.8.0" +__version__ = "2.9.0" From 60fbf287b0d34d5db2e61cce7a5b42735ed43d0e Mon Sep 17 00:00:00 2001 From: Alan Yee Date: Fri, 19 Feb 2021 03:20:15 -0800 Subject: [PATCH 021/230] docs(bigquery): Add alternative approach to setting credentials (#517) * docs(bigquery): Add alternative approach to setting credentials * docs(bigquery): Add alternative approach to setting credentials Correction: json object rather than string * Remove trailing space Co-authored-by: Peter Lamut --- samples/snippets/authenticate_service_account.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py index 58cd2b542..c07848bee 100644 --- a/samples/snippets/authenticate_service_account.py +++ b/samples/snippets/authenticate_service_account.py @@ -30,6 +30,11 @@ def main(): key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"], ) + # Alternatively, use service_account.Credentials.from_service_account_info() + # to set credentials directly via a json object rather than set a filepath + # TODO(developer): Set key_json to the content of the service account key file. + # credentials = service_account.Credentials.from_service_account_info(key_json) + client = bigquery.Client(credentials=credentials, project=credentials.project_id,) # [END bigquery_client_json_credentials] return client From 02147545c23cc135c14747971239e480bead4f9b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 19:53:11 +0100 Subject: [PATCH 022/230] chore(deps): update dependency google-cloud-bigquery to v2.9.0 (#515) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f9306af2..e9fcfca03 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7087121b5..9def04cb8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.7.0 +google-cloud-bigquery==2.9.0 google-cloud-bigquery-storage==2.2.1 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From bb9a94c3c8414d49f1e3bed31a810b371e3011be Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 19 Feb 2021 20:06:05 +0100 Subject: [PATCH 023/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.3.0 (#529) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.2.1` -> `==2.3.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/compatibility-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.3.0/confidence-slim/2.2.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.3.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​230-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev221v230-2021-02-18) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.2.1...v2.3.0) ##### Features - add `client_cert_source_for_mtls` argument to transports ([#​135](https://www.github.com/googleapis/python-bigquery-storage/issues/135)) ([072850d](https://www.github.com/googleapis/python-bigquery-storage/commit/072850dd341909fdc22f330117a17e48da12fdd1)) ##### Documentation - update python contributing guide ([#​140](https://www.github.com/googleapis/python-bigquery-storage/issues/140)) ([1671056](https://www.github.com/googleapis/python-bigquery-storage/commit/1671056bfe181660440b1bf4415005e3eed01eb2)) ##### [2.2.1](https://www.github.com/googleapis/python-bigquery-storage/compare/v2.2.0...v2.2.1) (2021-01-25) ##### Documentation - remove required session variable to fix publish ([#​124](https://www.github.com/googleapis/python-bigquery-storage/issues/124)) ([19a105c](https://www.github.com/googleapis/python-bigquery-storage/commit/19a105cb9c868bb1a9e63966609a2488876f511b))
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9def04cb8..c638178fc 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.9.0 -google-cloud-bigquery-storage==2.2.1 +google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 ipython==7.16.1; python_version < '3.7' From 696c443f0a6740be0767e12b706a7771bc1460c3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 10:14:26 +0100 Subject: [PATCH 024/230] docs: explain retry behavior for DONE jobs (#532) * docs: explain retry behavior for DONE jobs * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim Swast --- google/cloud/bigquery/job/base.py | 8 ++++++-- google/cloud/bigquery/job/query.py | 7 +++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index d8f5d6528..f24e972c8 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -614,7 +614,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): """Checks if the job is complete. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -635,7 +637,9 @@ def result(self, retry=DEFAULT_RETRY, timeout=None): """Start the job and wait for it to complete and get the result. Args: - retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. If the job state is ``DONE``, retrying is aborted + early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d87f87f52..b3ca8d940 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -979,7 +979,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): Args: retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. + How to retry the call that retrieves query results. If the job state is + ``DONE``, retrying is aborted early, as the job will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1128,7 +1129,9 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. + How to retry the call that retrieves rows. If the job state is + ``DONE``, retrying is aborted early even if the results are not + available, as this will not change anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. From cc3394f80934419eb00c2029bb81c92a696e7d88 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 23 Feb 2021 18:33:11 +0100 Subject: [PATCH 025/230] feat: add BIGNUMERIC support (#527) * feat: add support of BIGNUMERIC * feat: add BIGNUMERIC support * Add bignumeric_type extra * Add additional BIGNUMERIC tests * Prevent import time error if no BIGNUMERIC support * Add/improve a few comments * Add feature flag for BIGNUMERIC suppport Co-authored-by: HemangChothani --- google/cloud/bigquery/_pandas_helpers.py | 18 +- google/cloud/bigquery/dbapi/_helpers.py | 12 +- google/cloud/bigquery/dbapi/types.py | 2 +- google/cloud/bigquery/query.py | 8 +- google/cloud/bigquery/schema.py | 1 + setup.py | 2 + tests/system/test_client.py | 162 ++++++++++------ tests/unit/test__pandas_helpers.py | 228 +++++++++++++++-------- tests/unit/test_dbapi__helpers.py | 14 ++ tests/unit/test_query.py | 10 + 10 files changed, 305 insertions(+), 152 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 162c58b4b..7ad416e08 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ import queue import warnings +from packaging import version try: import pandas @@ -80,6 +81,10 @@ def pyarrow_numeric(): return pyarrow.decimal128(38, 9) +def pyarrow_bignumeric(): + return pyarrow.decimal256(76, 38) + + def pyarrow_time(): return pyarrow.time64("us") @@ -128,14 +133,23 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() + # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal128 instances. } + if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): + BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BIGNUMERIC_SUPPORT = True + else: + _BIGNUMERIC_SUPPORT = False + else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER + _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 95b5869e5..6b36d6e43 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,6 +19,11 @@ import functools import numbers +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None + from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions @@ -184,7 +189,12 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - return "NUMERIC" + # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. + scalar_object = pyarrow.scalar(value) + if isinstance(scalar_object, pyarrow.Decimal128Scalar): + return "NUMERIC" + else: + return "BIGNUMERIC" elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 14917820c..20eca9b00 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -78,7 +78,7 @@ def __eq__(self, other): STRING = "STRING" BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT") NUMBER = _DBAPITypeObject( - "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL" + "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL" ) DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME") ROWID = "ROWID" diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f2ed6337e..ecec73e99 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -83,7 +83,7 @@ class ScalarQueryParameter(_AbstractQueryParameter): type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -102,7 +102,7 @@ def positional(cls, type_, value): Args: type_ (str): Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or + 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): @@ -186,7 +186,7 @@ class ArrayQueryParameter(_AbstractQueryParameter): array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. """ @@ -203,7 +203,7 @@ def positional(cls, array_type, values): Args: array_type (str): Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. values (List[appropriate scalar type]): The parameter array values. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index c76aded02..9be27f3e8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -32,6 +32,7 @@ "FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64, "FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64, "NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC, + "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC, "BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL, "BOOL": types.StandardSqlDataType.TypeKind.BOOL, "GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY, diff --git a/setup.py b/setup.py index ea2df4843..31b6a3ff7 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", + "packaging >= 14.3", "protobuf >= 3.12.0", ] extras = { @@ -48,6 +49,7 @@ "pyarrow >= 1.0.0, < 4.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api==0.11b0", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 60c3b3fa8..684a42c30 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -65,6 +65,7 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -891,6 +892,9 @@ def test_load_table_from_dataframe_w_nulls(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -902,21 +906,22 @@ def test_load_table_from_dataframe_w_nulls(self): ) num_rows = 100 nulls = [None] * num_rows - df_data = collections.OrderedDict( - [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - ) + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1003,6 +1008,9 @@ def test_load_table_from_dataframe_w_explicit_schema(self): bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED # mode mismatch. See: @@ -1012,57 +1020,65 @@ def test_load_table_from_dataframe_w_explicit_schema(self): # https://jira.apache.org/jira/browse/ARROW-2587 # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", [u"abc", None, u"def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( ( - "ts_col", + "bignum_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), ], - ), - ] - ) + ) + ) + df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) dataset_id = _make_dataset_id("bq_load_test") @@ -1172,6 +1188,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), @@ -1210,6 +1227,14 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", [u"abc", None, u"def"]), ( "time_col", @@ -2157,6 +2182,10 @@ def test_query_w_query_params(self): pi_numeric_param = ScalarQueryParameter( name="pi_numeric_param", type_="NUMERIC", value=pi_numeric ) + bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)) + bignum_param = ScalarQueryParameter( + name="bignum_param", type_="BIGNUMERIC", value=bignum + ) truthy = True truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy) beef = b"DEADBEEF" @@ -2302,6 +2331,15 @@ def test_query_w_query_params(self): "query_parameters": [with_friends_param], }, ] + if _BIGNUMERIC_SUPPORT: + examples.append( + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + } + ) + for example in examples: jconfig = QueryJobConfig() jconfig.query_parameters = example["query_parameters"] diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index ef0c40e1a..abd725820 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -39,6 +39,12 @@ from google import api_core from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT + + +skip_if_no_bignumeric = pytest.mark.skipif( + not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", +) @pytest.fixture @@ -70,6 +76,15 @@ def is_numeric(type_): )(type_) +def is_bignumeric(type_): + # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type + return all_( + pyarrow.types.is_decimal, + lambda type_: type_.precision == 76, + lambda type_: type_.scale == 38, + )(type_) + + def is_timestamp(type_): # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type return all_( @@ -120,6 +135,9 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), + pytest.param( + "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, + ), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -198,6 +216,12 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), + pytest.param( + "BIGNUMERIC", + "REPEATED", + all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), + marks=skip_if_no_bignumeric, + ), ( "BOOLEAN", "REPEATED", @@ -270,34 +294,41 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected = pyarrow.struct(expected) + assert pyarrow.types.is_struct(actual) assert actual.num_fields == len(fields) assert actual.equals(expected) @@ -314,34 +345,41 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), - schema.SchemaField("field08", "BOOLEAN"), - schema.SchemaField("field09", "BOOL"), - schema.SchemaField("field10", "TIMESTAMP"), - schema.SchemaField("field11", "DATE"), - schema.SchemaField("field12", "TIME"), - schema.SchemaField("field13", "DATETIME"), - schema.SchemaField("field14", "GEOGRAPHY"), + schema.SchemaField("field09", "BOOLEAN"), + schema.SchemaField("field10", "BOOL"), + schema.SchemaField("field11", "TIMESTAMP"), + schema.SchemaField("field12", "DATE"), + schema.SchemaField("field13", "TIME"), + schema.SchemaField("field14", "DATETIME"), + schema.SchemaField("field15", "GEOGRAPHY"), ) + + if _BIGNUMERIC_SUPPORT: + fields += (schema.SchemaField("field08", "BIGNUMERIC"),) + field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) - expected_value_type = pyarrow.struct( - ( - pyarrow.field("field01", pyarrow.string()), - pyarrow.field("field02", pyarrow.binary()), - pyarrow.field("field03", pyarrow.int64()), - pyarrow.field("field04", pyarrow.int64()), - pyarrow.field("field05", pyarrow.float64()), - pyarrow.field("field06", pyarrow.float64()), - pyarrow.field("field07", module_under_test.pyarrow_numeric()), - pyarrow.field("field08", pyarrow.bool_()), - pyarrow.field("field09", pyarrow.bool_()), - pyarrow.field("field10", module_under_test.pyarrow_timestamp()), - pyarrow.field("field11", pyarrow.date32()), - pyarrow.field("field12", module_under_test.pyarrow_time()), - pyarrow.field("field13", module_under_test.pyarrow_datetime()), - pyarrow.field("field14", pyarrow.string()), - ) + + expected = ( + pyarrow.field("field01", pyarrow.string()), + pyarrow.field("field02", pyarrow.binary()), + pyarrow.field("field03", pyarrow.int64()), + pyarrow.field("field04", pyarrow.int64()), + pyarrow.field("field05", pyarrow.float64()), + pyarrow.field("field06", pyarrow.float64()), + pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field09", pyarrow.bool_()), + pyarrow.field("field10", pyarrow.bool_()), + pyarrow.field("field11", module_under_test.pyarrow_timestamp()), + pyarrow.field("field12", pyarrow.date32()), + pyarrow.field("field13", module_under_test.pyarrow_time()), + pyarrow.field("field14", module_under_test.pyarrow_datetime()), + pyarrow.field("field15", pyarrow.string()), ) + if _BIGNUMERIC_SUPPORT: + expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) + expected_value_type = pyarrow.struct(expected) + assert pyarrow.types.is_list(actual) assert pyarrow.types.is_struct(actual.value_type) assert actual.value_type.num_fields == len(fields) @@ -385,6 +423,16 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), + pytest.param( + "BIGNUMERIC", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("3.141592653589793238462643383279"), + ], + marks=skip_if_no_bignumeric, + ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( @@ -841,41 +889,45 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), - schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"), - schema.SchemaField("field09", "BOOL", mode="REQUIRED"), - schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"), - schema.SchemaField("field11", "DATE", mode="REQUIRED"), - schema.SchemaField("field12", "TIME", mode="REQUIRED"), - schema.SchemaField("field13", "DATETIME", mode="REQUIRED"), - schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"), - ) - dataframe = pandas.DataFrame( - { - "field01": ["hello", "world"], - "field02": [b"abd", b"efg"], - "field03": [1, 2], - "field04": [3, 4], - "field05": [1.25, 9.75], - "field06": [-1.75, -3.5], - "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], - "field08": [True, False], - "field09": [False, True], - "field10": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), - ], - "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], - "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], - "field13": [ - datetime.datetime(1970, 1, 1, 0, 0, 0), - datetime.datetime(2012, 12, 21, 9, 7, 42), - ], - "field14": [ - "POINT(30 10)", - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - } + schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), + schema.SchemaField("field10", "BOOL", mode="REQUIRED"), + schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), + schema.SchemaField("field12", "DATE", mode="REQUIRED"), + schema.SchemaField("field13", "TIME", mode="REQUIRED"), + schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), + schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) + if _BIGNUMERIC_SUPPORT: + bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) + + data = { + "field01": ["hello", "world"], + "field02": [b"abd", b"efg"], + "field03": [1, 2], + "field04": [3, 4], + "field05": [1.25, 9.75], + "field06": [-1.75, -3.5], + "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field09": [True, False], + "field10": [False, True], + "field11": [ + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + ], + "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], + "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], + "field14": [ + datetime.datetime(1970, 1, 1, 0, 0, 0), + datetime.datetime(2012, 12, 21, 9, 7, 42), + ], + "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + } + if _BIGNUMERIC_SUPPORT: + data["field08"] = [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ] + dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) arrow_schema = arrow_table.schema @@ -1089,6 +1141,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): "bytes_field": b"some bytes", "string_field": u"some characters", "numeric_field": decimal.Decimal("123.456"), + "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), } ] ) @@ -1109,6 +1162,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + current_schema += ( + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), + ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1131,6 +1188,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), ) + if _BIGNUMERIC_SUPPORT: + expected_schema += ( + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), + ) + by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index fffa46aa8..c28c014d4 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,6 +25,7 @@ import google.cloud._helpers from google.cloud.bigquery import table +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -51,6 +52,14 @@ def test_scalar_to_query_parameter(self): "TIMESTAMP", ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ( + decimal.Decimal("1.1234567890123456789012345678901234567890"), + "BIGNUMERIC", + ) + ) + for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) parameter = _helpers.scalar_to_query_parameter(value) @@ -104,6 +113,11 @@ def test_array_to_query_parameter_valid_argument(self): ), ] + if _BIGNUMERIC_SUPPORT: + expected_types.append( + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") + ) + for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index cf268daf1..ae2c29d09 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -166,6 +166,16 @@ def test_to_api_repr_w_numeric(self): param = klass.positional(type_="NUMERIC", value="123456789.123456789") self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bignumeric(self): + big_num_string = "{d38}.{d38}".format(d38="9" * 38) + EXPECTED = { + "parameterType": {"type": "BIGNUMERIC"}, + "parameterValue": {"value": big_num_string}, + } + klass = self._get_target_class() + param = klass.positional(type_="BIGNUMERIC", value=big_num_string) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_bool(self): EXPECTED = { "parameterType": {"type": "BOOL"}, From c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 24 Feb 2021 16:26:00 +0100 Subject: [PATCH 026/230] fix: error using empty array of structs parameter (#474) * fix: error using empty array of structs parameter * Add QueryParameterType classes * Use query parameter types with ArrayQueryParameter * Adjust system test to changed ArrayQueryParameter * Clarify a comment about an assertion Co-authored-by: Tim Swast * Clarify when name/descr. is omitted from API repr * Rename subtypes to fields * Add fields property to StructQueryParameterType * Add a check for empty struct fields * Define scalar SQL parameter types as type objects Co-authored-by: Tim Swast --- google/cloud/bigquery/__init__.py | 6 + google/cloud/bigquery/enums.py | 21 ++ google/cloud/bigquery/query.py | 301 +++++++++++++++++++++-- tests/system/test_client.py | 15 ++ tests/unit/test_query.py | 383 +++++++++++++++++++++++++++++- 5 files changed, 711 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 29d375b03..f609468f5 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -66,8 +66,11 @@ from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.query import ArrayQueryParameter +from google.cloud.bigquery.query import ArrayQueryParameterType from google.cloud.bigquery.query import ScalarQueryParameter +from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter +from google.cloud.bigquery.query import StructQueryParameterType from google.cloud.bigquery.query import UDFResource from google.cloud.bigquery.retry import DEFAULT_RETRY from google.cloud.bigquery.routine import DeterminismLevel @@ -93,6 +96,9 @@ "ArrayQueryParameter", "ScalarQueryParameter", "StructQueryParameter", + "ArrayQueryParameterType", + "ScalarQueryParameterType", + "StructQueryParameterType", # Datasets "Dataset", "DatasetReference", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e353b3132..b378f091b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -18,6 +18,7 @@ import itertools from google.cloud.bigquery_v2 import types as gapic_types +from google.cloud.bigquery.query import ScalarQueryParameterType class Compression(object): @@ -215,6 +216,26 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" +class SqlParameterScalarTypes: + """Supported scalar SQL query parameter types as type objects.""" + + STRING = ScalarQueryParameterType("STRING") + BYTES = ScalarQueryParameterType("BYTES") + INTEGER = ScalarQueryParameterType("INT64") + INT64 = ScalarQueryParameterType("INT64") + FLOAT = ScalarQueryParameterType("FLOAT64") + FLOAT64 = ScalarQueryParameterType("FLOAT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + BOOLEAN = ScalarQueryParameterType("BOOL") + BOOL = ScalarQueryParameterType("BOOL") + GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") + DATE = ScalarQueryParameterType("DATE") + TIME = ScalarQueryParameterType("TIME") + DATETIME = ScalarQueryParameterType("DATETIME") + + class WriteDisposition(object): """Specifies the action that occurs if destination table already exists. diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index ecec73e99..42547cd73 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -48,6 +48,239 @@ def __ne__(self, other): return not self == other +class _AbstractQueryParameterType: + """Base class for representing query parameter types. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype + """ + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.QueryParameterType: Instance + """ + raise NotImplementedError + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + raise NotImplementedError + + +class ScalarQueryParameterType(_AbstractQueryParameterType): + """Type representation for scalar query parameters. + + Args: + type_ (str): + One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', + 'DATETIME', or 'DATE'. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, type_, *, name=None, description=None): + self._type = type_ + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: Instance + """ + type_ = resource["type"] + return cls(type_) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return {"type": self._type} + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._type!r}{name}{description})" + + +class ArrayQueryParameterType(_AbstractQueryParameterType): + """Type representation for array query parameters. + + Args: + array_type (Union[ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, array_type, *, name=None, description=None): + self._array_type = array_type + self.name = name + self.description = description + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.ArrayQueryParameterType: Instance + """ + array_item_type = resource["arrayType"]["type"] + + if array_item_type in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + else: + klass = ScalarQueryParameterType + + item_type_instance = klass.from_api_repr(resource["arrayType"]) + return cls(item_type_instance) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + # Name and description are only used if the type is a field inside a struct + # type, but it's StructQueryParameterType's responsibilty to use these two + # attributes in the API representation when needed. Here we omit them. + return { + "type": "ARRAY", + "arrayType": self._array_type.to_api_repr(), + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + return f"{self.__class__.__name__}({self._array_type!r}{name}{description})" + + +class StructQueryParameterType(_AbstractQueryParameterType): + """Type representation for struct query parameters. + + Args: + fields (Iterable[Union[ \ + ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \ + ]]): + An non-empty iterable describing the struct's field types. + name (Optional[str]): + The name of the query parameter. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + description (Optional[str]): + The query parameter description. Primarily used if the type is + one of the subfields in ``StructQueryParameterType`` instance. + """ + + def __init__(self, *fields, name=None, description=None): + if not fields: + raise ValueError("Struct type must have at least one field defined.") + + self._fields = fields # fields is a tuple (immutable), no shallow copy needed + self.name = name + self.description = description + + @property + def fields(self): + return self._fields # no copy needed, self._fields is an immutable sequence + + @classmethod + def from_api_repr(cls, resource): + """Factory: construct parameter type from JSON resource. + + Args: + resource (Dict): JSON mapping of parameter + + Returns: + google.cloud.bigquery.query.StructQueryParameterType: Instance + """ + fields = [] + + for struct_field in resource["structTypes"]: + type_repr = struct_field["type"] + if type_repr["type"] in {"STRUCT", "RECORD"}: + klass = StructQueryParameterType + elif type_repr["type"] == "ARRAY": + klass = ArrayQueryParameterType + else: + klass = ScalarQueryParameterType + + type_instance = klass.from_api_repr(type_repr) + type_instance.name = struct_field.get("name") + type_instance.description = struct_field.get("description") + fields.append(type_instance) + + return cls(*fields) + + def to_api_repr(self): + """Construct JSON API representation for the parameter type. + + Returns: + Dict: JSON mapping + """ + fields = [] + + for field in self._fields: + item = {"type": field.to_api_repr()} + if field.name is not None: + item["name"] = field.name + if field.description is not None: + item["description"] = field.description + + fields.append(item) + + return { + "type": "STRUCT", + "structTypes": fields, + } + + def __repr__(self): + name = f", name={self.name!r}" if self.name is not None else "" + description = ( + f", description={self.description!r}" + if self.description is not None + else "" + ) + items = ", ".join(repr(field) for field in self._fields) + return f"{self.__class__.__name__}({items}{name}{description})" + + class _AbstractQueryParameter(object): """Base class for named / positional query parameters. """ @@ -184,28 +417,43 @@ class ArrayQueryParameter(_AbstractQueryParameter): Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, + `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. """ def __init__(self, name, array_type, values): self.name = name - self.array_type = array_type self.values = values + if isinstance(array_type, str): + if not values and array_type in {"RECORD", "STRUCT"}: + raise ValueError( + "Missing detailed struct item type info for an empty array, " + "please provide a StructQueryParameterType instance." + ) + self.array_type = array_type + @classmethod def positional(cls, array_type, values): """Factory for positional parameters. Args: - array_type (str): - Name of type of array elements. One of `'STRING'`, `'INT64'`, - `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`. + array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]): + The type of array elements. If given as a string, it must be one of + `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, + `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`. + If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty, + the exact item type cannot be deduced, thus a ``StructQueryParameterType`` + instance needs to be passed in. - values (List[appropriate scalar type]): The parameter array values. + values (List[appropriate type]): The parameter array values. Returns: google.cloud.bigquery.query.ArrayQueryParameter: Instance without name @@ -263,22 +511,40 @@ def to_api_repr(self): Dict: JSON mapping """ values = self.values - if self.array_type == "RECORD" or self.array_type == "STRUCT": + + if self.array_type in {"RECORD", "STRUCT"} or isinstance( + self.array_type, StructQueryParameterType + ): reprs = [value.to_api_repr() for value in values] - a_type = reprs[0]["parameterType"] a_values = [repr_["parameterValue"] for repr_ in reprs] + + if reprs: + a_type = reprs[0]["parameterType"] + else: + # This assertion always evaluates to True because the + # constructor disallows STRUCT/RECORD type defined as a + # string with empty values. + assert isinstance(self.array_type, StructQueryParameterType) + a_type = self.array_type.to_api_repr() else: - a_type = {"type": self.array_type} - converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type) + # Scalar array item type. + if isinstance(self.array_type, str): + a_type = {"type": self.array_type} + else: + a_type = self.array_type.to_api_repr() + + converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"]) if converter is not None: values = [converter(value) for value in values] a_values = [{"value": value} for value in values] + resource = { "parameterType": {"type": "ARRAY", "arrayType": a_type}, "parameterValue": {"arrayValues": a_values}, } if self.name is not None: resource["name"] = self.name + return resource def _key(self): @@ -289,7 +555,14 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`. """ - return (self.name, self.array_type.upper(), self.values) + if isinstance(self.array_type, str): + item_type = self.array_type + elif isinstance(self.array_type, ScalarQueryParameterType): + item_type = self.array_type._type + else: + item_type = "STRUCT" + + return (self.name, item_type.upper(), self.values) def __eq__(self, other): if not isinstance(other, ArrayQueryParameter): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 684a42c30..ed48b0bfe 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2168,7 +2168,9 @@ def test_query_w_query_params(self): from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter + from google.cloud.bigquery.query import ScalarQueryParameterType from google.cloud.bigquery.query import StructQueryParameter + from google.cloud.bigquery.query import StructQueryParameterType question = "What is the answer to life, the universe, and everything?" question_param = ScalarQueryParameter( @@ -2227,6 +2229,14 @@ def test_query_w_query_params(self): characters_param = ArrayQueryParameter( name=None, array_type="RECORD", values=[phred_param, bharney_param] ) + empty_struct_array_param = ArrayQueryParameter( + name="empty_array_param", + values=[], + array_type=StructQueryParameterType( + ScalarQueryParameterType(name="foo", type_="INT64"), + ScalarQueryParameterType(name="bar", type_="STRING"), + ), + ) hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param) sidekick_param = StructQueryParameter( "sidekick", bharney_name_param, bharney_age_param @@ -2317,6 +2327,11 @@ def test_query_w_query_params(self): ], "query_parameters": [characters_param], }, + { + "sql": "SELECT @empty_array_param", + "expected": [], + "query_parameters": [empty_struct_array_param], + }, { "sql": "SELECT @roles", "expected": { diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index ae2c29d09..c8be2911f 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -43,6 +43,318 @@ def test___eq__(self): self.assertNotEqual(udf, wrong_type) +class Test__AbstractQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import _AbstractQueryParameterType + + return _AbstractQueryParameterType + + @classmethod + def _make_one(cls, *args, **kw): + return cls._get_target_class()(*args, **kw) + + def test_from_api_virtual(self): + klass = self._get_target_class() + with self.assertRaises(NotImplementedError): + klass.from_api_repr({}) + + def test_to_api_virtual(self): + param_type = self._make_one() + with self.assertRaises(NotImplementedError): + param_type.to_api_repr() + + +class Test_ScalarQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ScalarQueryParameterType + + return ScalarQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + klass = self._get_target_class() + result = klass.from_api_repr({"type": "BOOLEAN"}) + self.assertEqual(result._type, "BOOLEAN") + self.assertIsNone(result.name) + self.assertIsNone(result.description) + + def test_to_api_repr(self): + param_type = self._make_one("BYTES", name="foo", description="bar") + result = param_type.to_api_repr() + self.assertEqual(result, {"type": "BYTES"}) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BYTES") + self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("BYTES", name="foo", description="this is foo") + self.assertEqual( + repr(param_type), + "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", + ) + + +class Test_ArrayQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import ArrayQueryParameterType + + return ArrayQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_from_api_repr(self): + from google.cloud.bigquery.query import StructQueryParameterType + + api_resource = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + item_type = result._array_type + self.assertIsInstance(item_type, StructQueryParameterType) + + self.assertIsNone(item_type.name) + self.assertIsNone(item_type.description) + + field = item_type.fields[0] + self.assertEqual(field.name, "weight") + self.assertEqual(field.description, "in kg") + self.assertEqual(field._type, "INTEGER") + + field = item_type.fields[1] + self.assertEqual(field.name, "last_name") + self.assertIsNone(field.description) + self.assertEqual(field._type, "STRING") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + array_item_type = StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="weight", description="in kg"), + ScalarQueryParameterType("STRING", name="last_name"), + ) + param_type = self._make_one(array_item_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + { + "name": "weight", + "type": {"type": "INTEGER"}, + "description": "in kg", + }, + {"name": "last_name", "type": {"type": "STRING"}}, + ], + }, + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + param_type = self._make_one("BOOLEAN") + self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')") + + def test_repr_all_optional_attrs(self): + param_type = self._make_one("INT64", name="bar", description="this is bar") + self.assertEqual( + repr(param_type), + "ArrayQueryParameterType('INT64', name='bar', description='this is bar')", + ) + + +class Test_StructQueryParameterType(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.query import StructQueryParameterType + + return StructQueryParameterType + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_raises_error_without_any_fields(self): + with self.assertRaisesRegex(ValueError, ".*at least one field.*"): + self._make_one() + + def test_from_api_repr(self): + from google.cloud.bigquery.query import ArrayQueryParameterType + from google.cloud.bigquery.query import ScalarQueryParameterType + + api_resource = { + "type": "STRUCT", + "structTypes": [ + { + "name": "age", + "type": {"type": "INTEGER"}, + "description": "in years", + }, + { + "name": "aliases", + "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + }, + { + "description": "a nested struct", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + + klass = self._get_target_class() + result = klass.from_api_repr(api_resource) + + self.assertIsNone(result.name) + self.assertIsNone(result.description) + self.assertEqual(len(result.fields), 3) + + field = result.fields[0] + self.assertIsInstance(field, ScalarQueryParameterType) + self.assertEqual(field.name, "age") + self.assertEqual(field.description, "in years") + + field = result.fields[1] + self.assertIsInstance(field, ArrayQueryParameterType) + self.assertEqual(field.name, "aliases") + self.assertIsNone(field.description) + self.assertIsInstance(field._array_type, ScalarQueryParameterType) + self.assertEqual(field._array_type._type, "STRING") + + field = result.fields[2] + self.assertIsInstance(field, self._get_target_class()) + self.assertIsNone(field.name) + self.assertEqual(field.description, "a nested struct") + + date_field = field.fields[0] + self.assertEqual(date_field._type, "DATE") + self.assertEqual(date_field.name, "nested_date") + self.assertIsNone(date_field.description) + + bool_field = field.fields[1] + self.assertEqual(bool_field._type, "BOOLEAN") + self.assertIsNone(bool_field.name) + self.assertEqual(bool_field.description, "nested bool field") + + def test_to_api_repr(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + date_type = ScalarQueryParameterType("DATE", name="day_of_birth") + param_type = self._make_one(int_type, date_type, name="foo", description="bar") + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + {"name": "day_of_birth", "type": {"type": "DATE"}}, + ], + } + self.assertEqual(result, expected_result) + + def test_to_api_repr_nested(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + struct_class = self._get_target_class() + + int_type = ScalarQueryParameterType("INTEGER", description="in years") + nested_struct_type = struct_class( + ScalarQueryParameterType("DATE", name="nested_date"), + ScalarQueryParameterType("BOOLEAN", description="nested bool field"), + name="nested", + ) + param_type = self._make_one( + int_type, nested_struct_type, name="foo", description="bar" + ) + + result = param_type.to_api_repr() + + expected_result = { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "INTEGER"}, "description": "in years"}, + { + "name": "nested", + "type": { + "type": "STRUCT", + "structTypes": [ + {"type": {"type": "DATE"}, "name": "nested_date"}, + { + "type": {"type": "BOOLEAN"}, + "description": "nested bool field", + }, + ], + }, + }, + ], + } + self.assertEqual(result, expected_result) + + def test_repr_no_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING") + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')" + ")" + ) + self.assertEqual(repr(param_type), expected) + + def test_repr_all_optional_attrs(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + param_type = self._make_one( + ScalarQueryParameterType("BOOLEAN"), + ScalarQueryParameterType("STRING"), + name="data_record", + description="this is it", + ) + expected = ( + "StructQueryParameterType(" + "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), " + "name='data_record', description='this is it'" + ")" + ) + self.assertEqual(repr(param_type), expected) + + class Test__AbstractQueryParameter(unittest.TestCase): @staticmethod def _get_target_class(): @@ -340,6 +652,10 @@ def test_ctor(self): self.assertEqual(param.array_type, "INT64") self.assertEqual(param.values, [1, 2]) + def test_ctor_empty_struct_array_wo_type_info(self): + with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"): + self._make_one(name="foo", array_type="STRUCT", values=[]) + def test___eq__(self): param = self._make_one(name="foo", array_type="INT64", values=[123]) self.assertEqual(param, param) @@ -467,6 +783,19 @@ def test_to_api_repr_wo_name(self): param = klass.positional(array_type="INT64", values=[1, 2]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_array_type_as_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + EXPECTED = { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}}, + "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]}, + } + klass = self._get_target_class() + param = klass.positional( + array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False], + ) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_unknown_type(self): EXPECTED = { "parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}}, @@ -503,6 +832,31 @@ def test_to_api_repr_w_record_type(self): param = klass.positional(array_type="RECORD", values=[struct]) self.assertEqual(param.to_api_repr(), EXPECTED) + def test_to_api_repr_w_empty_array_of_records_type(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + EXPECTED = { + "parameterType": { + "type": "ARRAY", + "arrayType": { + "type": "STRUCT", + "structTypes": [ + {"name": "foo", "type": {"type": "STRING"}}, + {"name": "bar", "type": {"type": "INT64"}}, + ], + }, + }, + "parameterValue": {"arrayValues": []}, + } + item_type = StructQueryParameterType( + ScalarQueryParameterType("STRING", name="foo"), + ScalarQueryParameterType("INT64", name="bar"), + ) + klass = self._get_target_class() + param = klass.positional(array_type=item_type, values=[]) + self.assertEqual(param.to_api_repr(), EXPECTED) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING", ["value"]) other = object() @@ -547,11 +901,38 @@ def test___ne___different_values(self): field2 = self._make_one("test", "INT64", [12]) self.assertNotEqual(field1, field2) - def test___repr__(self): + def test___repr__array_type_str(self): field1 = self._make_one("field1", "STRING", ["value"]) expected = "ArrayQueryParameter('field1', 'STRING', ['value'])" self.assertEqual(repr(field1), expected) + def test___repr__array_type_scalar_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + + int_items = self._make_one( + "int_items", ScalarQueryParameterType("INTEGER"), [64] + ) + expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])" + self.assertEqual(repr(int_items), expected) + + def test___repr__array_type_struct_type_instance(self): + from google.cloud.bigquery.query import ScalarQueryParameterType + from google.cloud.bigquery.query import StructQueryParameterType + + struct_items = self._make_one( + "struct_items", + StructQueryParameterType( + ScalarQueryParameterType("INTEGER", name="age"), + ScalarQueryParameterType("STRING", name="last_name"), + ), + [{"age": 18, "last_name": "Doe"}], + ) + expected = ( + "ArrayQueryParameter('struct_items', 'STRUCT', " + "[{'age': 18, 'last_name': 'Doe'}])" + ) + self.assertEqual(repr(struct_items), expected) + class Test_StructQueryParameter(unittest.TestCase): @staticmethod From 699498c8d1ea76dcc7e6347781fc699159dc9214 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 25 Feb 2021 08:42:04 -0800 Subject: [PATCH 027/230] chore: exclude tarball from code generation (#512) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/ca115e36-5d95-4acd-a2d8-7ac2f22a7261/targets - [x] To automatically regenerate this PR, check this box. --- synth.metadata | 3 +-- synth.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synth.metadata b/synth.metadata index dc183a72e..9412653c6 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1c6681aba872c00afb16a904a2ba9bae8e9618d3" + "sha": "1823cadee3acf95c516d0479400e4175349ea199" } }, { @@ -93,7 +93,6 @@ "CONTRIBUTING.rst", "LICENSE", "MANIFEST.in", - "bigquery-v2-py.tar.gz", "docs/_static/custom.css", "docs/_templates/layout.html", "docs/bigquery_v2/model_service.rst", diff --git a/synth.py b/synth.py index 3ab271c96..3c6440600 100644 --- a/synth.py +++ b/synth.py @@ -32,6 +32,7 @@ s.move( library, excludes=[ + "*.tar.gz", "docs/index.rst", "docs/bigquery_v2/*_service.rst", "docs/bigquery_v2/services.rst", From d7632799769248b09a8558ba18f5025ebdd9675a Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 25 Feb 2021 19:32:01 +0100 Subject: [PATCH 028/230] fix: QueryJob.exception() *returns* the errors, not raises them (#467) * fix: QueryJob.exception() should *return* errors * Reload query job on error, raise any reload errors * Catch errors on reloading failed query jobs * Add additional unit test * Increase retry deadline to mitigate test flakiness * Store the more informative exception in done() --- google/cloud/bigquery/job/query.py | 26 ++++++++-- tests/unit/job/test_base.py | 2 +- tests/unit/job/test_query.py | 83 +++++++++++++++++++++++++++++- 3 files changed, 104 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b3ca8d940..5c1118500 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -989,7 +989,8 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): unfinished jobs before checking. Default ``True``. Returns: - bool: True if the job is complete, False otherwise. + bool: ``True`` if the job is complete or if fetching its status resulted in + an error, ``False`` otherwise. """ # Do not refresh if the state is already done, as the job will not # change once complete. @@ -997,17 +998,34 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): if not reload or is_done: return is_done - self._reload_query_results(retry=retry, timeout=timeout) - # If an explicit timeout is not given, fall back to the transport timeout # stored in _blocking_poll() in the process of polling for job completion. transport_timeout = timeout if timeout is not None else self._transport_timeout + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + return True + else: + return self.state == _DONE_STATE + # Only reload the job once we know the query is complete. # This will ensure that fields such as the destination table are # correctly populated. if self._query_results.complete: - self.reload(retry=retry, timeout=transport_timeout) + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + return True return self.state == _DONE_STATE diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index bbeffba50..405ad6ee5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -967,7 +967,7 @@ def test_result_w_retry_wo_state(self): custom_predicate = mock.Mock() custom_predicate.return_value = True custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001, + predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1, ) self.assertIs(job.result(retry=custom_retry), job) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index a4ab11ab6..655a121e6 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -16,6 +16,7 @@ import copy import http import textwrap +import types import freezegun from google.api_core import exceptions @@ -308,7 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done(self): + def test_done_job_complete(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=True) job = self._get_target_class().from_api_repr(resource, client) @@ -356,6 +357,84 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) + def test_done_w_query_results_error_reload_ok_job_finished(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "DONE" + self.set_exception(copy.copy(bad_request_error)) + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert is_done + assert isinstance(job._exception, exceptions.BadRequest) + + def test_done_w_query_results_error_reload_ok_job_still_running(self): + client = _make_client(project=self.PROJECT) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + client._get_query_results = mock.Mock(side_effect=retry_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + job._exception = None + + def fake_reload(self, *args, **kwargs): + self._properties["status"]["state"] = "RUNNING" + + fake_reload_method = types.MethodType(fake_reload, job) + + with mock.patch.object(job, "reload", new=fake_reload_method): + is_done = job.done() + + assert not is_done + assert job._exception is None + + def test_done_w_query_results_error_reload_error(self): + client = _make_client(project=self.PROJECT) + bad_request_error = exceptions.BadRequest("Error in query") + client._get_query_results = mock.Mock(side_effect=bad_request_error) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + reload_error = exceptions.DataLoss("Oops, sorry!") + job.reload = mock.Mock(side_effect=reload_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is bad_request_error + + def test_done_w_job_query_results_ok_reload_error(self): + client = _make_client(project=self.PROJECT) + query_results = google.cloud.bigquery.query._QueryResults( + properties={ + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": "12345"}, + } + ) + client._get_query_results = mock.Mock(return_value=query_results) + + resource = self._make_resource(ended=False) + job = self._get_target_class().from_api_repr(resource, client) + retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) + job.reload = mock.Mock(side_effect=retry_error) + job._exception = None + + is_done = job.done() + + assert is_done + assert job._exception is retry_error + def test_query_plan(self): from google.cloud._helpers import _RFC3339_MICROS from google.cloud.bigquery.job import QueryPlanEntry @@ -973,7 +1052,7 @@ def test_result_w_retry(self): initial=0.001, maximum=0.001, multiplier=1.0, - deadline=0.001, + deadline=0.1, predicate=custom_predicate, ) From 81df4ba518dd82d6ef2519adb7803d4c90119a8b Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 25 Feb 2021 18:46:03 +0000 Subject: [PATCH 029/230] chore: release 2.10.0 (#533) :robot: I have created a release \*beep\* \*boop\* --- ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) ### Features * add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) ### Bug Fixes * error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) * QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) ### Documentation * **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) * explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 51fad831e..9afd523a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) + + +### Features + +* add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88)) + + +### Bug Fixes + +* error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a)) +* QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a)) + + +### Documentation + +* **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e)) +* explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3)) + ## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b2a8c5535..13e710fcc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.9.0" +__version__ = "2.10.0" From 1ba69273b25341783c46c4564e7ee632e421569b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:08 +0100 Subject: [PATCH 030/230] chore(deps): update dependency google-cloud-bigquery to v2.10.0 (#535) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e9fcfca03..8ff7fa850 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c638178fc..150fe2993 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.9.0 +google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 grpcio==1.35.0 From 6f44c9e7e33bde83e75650055221167f4a845ccc Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 1 Mar 2021 17:36:31 +0100 Subject: [PATCH 031/230] chore(deps): update dependency grpcio to v1.36.0 (#536) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 150fe2993..d645d8a1d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.35.0 +grpcio==1.36.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From 3917260bee6a9cf87bd5e2cdf23bf4c4e310ff32 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 4 Mar 2021 00:24:05 +0100 Subject: [PATCH 032/230] chore(deps): update dependency grpcio to v1.36.1 (#541) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.36.0` -> `==1.36.1` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/compatibility-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.36.1/confidence-slim/1.36.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d645d8a1d..a80b7fa05 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.2 -grpcio==1.36.0 +grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4 From f0259eb7ed4ff254ee238e87651992ff93481dae Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 5 Mar 2021 05:06:51 +0100 Subject: [PATCH 033/230] chore(deps): update dependency google-auth-oauthlib to v0.4.3 (#542) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index a80b7fa05..8ccbec38e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.10.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.2 +google-auth-oauthlib==0.4.3 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 816934b29a7917e4278063e19c56cd0d38b5569f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 9 Mar 2021 11:57:40 -0800 Subject: [PATCH 034/230] chore: upgrade gapic-generator-python to 0.42.2 (#543) PiperOrigin-RevId: 361662015 Source-Author: Google APIs Source-Date: Mon Mar 8 14:47:18 2021 -0800 Source-Repo: googleapis/googleapis Source-Sha: 28a591963253d52ce3a25a918cafbdd9928de8cf Source-Link: https://github.com/googleapis/googleapis/commit/28a591963253d52ce3a25a918cafbdd9928de8cf --- google/cloud/bigquery_v2/types/__init__.py | 28 +++++++++++----------- synth.metadata | 10 ++++---- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 00dc837c9..b76e65c65 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -16,6 +16,14 @@ # from .encryption_config import EncryptionConfiguration +from .model import ( + DeleteModelRequest, + GetModelRequest, + ListModelsRequest, + ListModelsResponse, + Model, + PatchModelRequest, +) from .model_reference import ModelReference from .standard_sql import ( StandardSqlDataType, @@ -23,26 +31,18 @@ StandardSqlStructType, ) from .table_reference import TableReference -from .model import ( - Model, - GetModelRequest, - PatchModelRequest, - DeleteModelRequest, - ListModelsRequest, - ListModelsResponse, -) __all__ = ( "EncryptionConfiguration", + "DeleteModelRequest", + "GetModelRequest", + "ListModelsRequest", + "ListModelsResponse", + "Model", + "PatchModelRequest", "ModelReference", "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", "TableReference", - "Model", - "GetModelRequest", - "PatchModelRequest", - "DeleteModelRequest", - "ListModelsRequest", - "ListModelsResponse", ) diff --git a/synth.metadata b/synth.metadata index 9412653c6..cab985521 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,15 +4,15 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "1823cadee3acf95c516d0479400e4175349ea199" + "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "e13001be33d69042a9505e698f792587a804a5cf", - "internalRef": "358152223" + "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", + "internalRef": "361662015" } }, { @@ -95,8 +95,6 @@ "MANIFEST.in", "docs/_static/custom.css", "docs/_templates/layout.html", - "docs/bigquery_v2/model_service.rst", - "docs/bigquery_v2/services.rst", "docs/bigquery_v2/types.rst", "docs/conf.py", "google/cloud/bigquery_v2/__init__.py", @@ -128,4 +126,4 @@ "setup.cfg", "testing/.gitignore" ] -} +} \ No newline at end of file From d5c7e11a1dc2a149d74294bfadbae62d70573e69 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 9 Mar 2021 21:02:56 +0100 Subject: [PATCH 035/230] feat: add context manager support to client (#540) --- google/cloud/bigquery/client.py | 6 ++++++ tests/unit/test_client.py | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index f8c0d7c93..bdbcb767c 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3423,6 +3423,12 @@ def schema_to_json(self, schema_list, destination): with open(destination, mode="w") as file_obj: return self._schema_to_json_file_object(json_schema_list, file_obj) + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + # pylint: disable=unused-argument def _item_to_project(iterator, resource): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 66add9c0a..6c3263ea5 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7218,6 +7218,28 @@ def test_list_rows_error(self): with self.assertRaises(TypeError): client.list_rows(1) + def test_context_manager_enter_returns_itself(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with mock.patch.object(client, "close"), client as context_var: + pass + + self.assertIs(client, context_var) + + def test_context_manager_exit_closes_client(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + fake_close = mock.Mock() + with mock.patch.object(client, "close", fake_close): + with client: + pass + + fake_close.assert_called_once() + class Test_make_job_id(unittest.TestCase): def _call_fut(self, job_id, prefix=None): From 3ce826e8805e7df4933ada29677c2c88709cd539 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Mar 2021 20:16:09 +0000 Subject: [PATCH 036/230] chore: release 2.11.0 (#545) :robot: I have created a release \*beep\* \*boop\* --- ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) ### Features * add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9afd523a4..512d38108 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) + + +### Features + +* add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69)) + ## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 13e710fcc..e6e357434 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.10.0" +__version__ = "2.11.0" From a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 10 Mar 2021 21:31:51 +0100 Subject: [PATCH 037/230] feat: make QueryJob.done() method more performant (#544) --- google/cloud/bigquery/job/query.py | 91 ++++++++++++------------------ tests/unit/job/test_query.py | 54 ++++-------------- 2 files changed, 45 insertions(+), 100 deletions(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 5c1118500..491983f8e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -19,6 +19,7 @@ import re from google.api_core import exceptions +from google.api_core.future import polling as polling_future import requests from google.cloud.bigquery.dataset import Dataset @@ -42,7 +43,6 @@ from google.cloud.bigquery._tqdm_helpers import wait_for_query from google.cloud.bigquery.job.base import _AsyncJob -from google.cloud.bigquery.job.base import _DONE_STATE from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -974,61 +974,6 @@ def estimated_bytes_processed(self): result = int(result) return result - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): - """Refresh the job and checks if it is complete. - - Args: - retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves query results. If the job state is - ``DONE``, retrying is aborted early, as the job will not change anymore. - timeout (Optional[float]): - The number of seconds to wait for the underlying HTTP transport - before using ``retry``. - reload (Optional[bool]): - If ``True``, make an API call to refresh the job state of - unfinished jobs before checking. Default ``True``. - - Returns: - bool: ``True`` if the job is complete or if fetching its status resulted in - an error, ``False`` otherwise. - """ - # Do not refresh if the state is already done, as the job will not - # change once complete. - is_done = self.state == _DONE_STATE - if not reload or is_done: - return is_done - - # If an explicit timeout is not given, fall back to the transport timeout - # stored in _blocking_poll() in the process of polling for job completion. - transport_timeout = timeout if timeout is not None else self._transport_timeout - - try: - self._reload_query_results(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - # Reloading also updates error details on self, thus no need for an - # explicit self.set_exception() call if reloading succeeds. - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError: - # Use the query results reload exception, as it generally contains - # much more useful error information. - self.set_exception(exc) - return True - else: - return self.state == _DONE_STATE - - # Only reload the job once we know the query is complete. - # This will ensure that fields such as the destination table are - # correctly populated. - if self._query_results.complete: - try: - self.reload(retry=retry, timeout=transport_timeout) - except exceptions.GoogleAPIError as exc: - self.set_exception(exc) - return True - - return self.state == _DONE_STATE - def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout @@ -1130,6 +1075,40 @@ def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): timeout=transport_timeout, ) + def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): + """Check if the query has finished running and raise if it's not. + + If the query has finished, also reload the job itself. + """ + # If an explicit timeout is not given, fall back to the transport timeout + # stored in _blocking_poll() in the process of polling for job completion. + transport_timeout = timeout if timeout is not None else self._transport_timeout + + try: + self._reload_query_results(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + # Reloading also updates error details on self, thus no need for an + # explicit self.set_exception() call if reloading succeeds. + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError: + # Use the query results reload exception, as it generally contains + # much more useful error information. + self.set_exception(exc) + finally: + return + + # Only reload the job once we know the query is complete. + # This will ensure that fields such as the destination table are + # correctly populated. + if not self._query_results.complete: + raise polling_future._OperationNotComplete() + else: + try: + self.reload(retry=retry, timeout=transport_timeout) + except exceptions.GoogleAPIError as exc: + self.set_exception(exc) + def result( self, page_size=None, diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 655a121e6..4665933ea 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -309,16 +309,7 @@ def test_cancelled(self): self.assertTrue(job.cancelled()) - def test_done_job_complete(self): - client = _make_client(project=self.PROJECT) - resource = self._make_resource(ended=True) - job = self._get_target_class().from_api_repr(resource, client) - job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr( - {"jobComplete": True, "jobReference": resource["jobReference"]} - ) - self.assertTrue(job.done()) - - def test_done_w_timeout(self): + def test__done_or_raise_w_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -326,7 +317,7 @@ def test_done_w_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=42) + job._done_or_raise(timeout=42) fake_get_results.assert_called_once() call_args = fake_get_results.call_args @@ -335,7 +326,7 @@ def test_done_w_timeout(self): call_args = fake_reload.call_args self.assertEqual(call_args.kwargs.get("timeout"), 42) - def test_done_w_timeout_and_longer_internal_api_timeout(self): + def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self): client = _make_client(project=self.PROJECT) resource = self._make_resource(ended=False) job = self._get_target_class().from_api_repr(resource, client) @@ -344,7 +335,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): with mock.patch.object( client, "_get_query_results" ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload: - job.done(timeout=5.5) + job._done_or_raise(timeout=5.5) # The expected timeout used is simply the given timeout, as the latter # is shorter than the job's internal done timeout. @@ -357,7 +348,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self): call_args = fake_reload.call_args self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout) - def test_done_w_query_results_error_reload_ok_job_finished(self): + def test__done_or_raise_w_query_results_error_reload_ok(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -373,32 +364,11 @@ def fake_reload(self, *args, **kwargs): fake_reload_method = types.MethodType(fake_reload, job) with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() + job._done_or_raise() - assert is_done assert isinstance(job._exception, exceptions.BadRequest) - def test_done_w_query_results_error_reload_ok_job_still_running(self): - client = _make_client(project=self.PROJECT) - retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError) - client._get_query_results = mock.Mock(side_effect=retry_error) - - resource = self._make_resource(ended=False) - job = self._get_target_class().from_api_repr(resource, client) - job._exception = None - - def fake_reload(self, *args, **kwargs): - self._properties["status"]["state"] = "RUNNING" - - fake_reload_method = types.MethodType(fake_reload, job) - - with mock.patch.object(job, "reload", new=fake_reload_method): - is_done = job.done() - - assert not is_done - assert job._exception is None - - def test_done_w_query_results_error_reload_error(self): + def test__done_or_raise_w_query_results_error_reload_error(self): client = _make_client(project=self.PROJECT) bad_request_error = exceptions.BadRequest("Error in query") client._get_query_results = mock.Mock(side_effect=bad_request_error) @@ -409,12 +379,11 @@ def test_done_w_query_results_error_reload_error(self): job.reload = mock.Mock(side_effect=reload_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is bad_request_error - def test_done_w_job_query_results_ok_reload_error(self): + def test__done_or_raise_w_job_query_results_ok_reload_error(self): client = _make_client(project=self.PROJECT) query_results = google.cloud.bigquery.query._QueryResults( properties={ @@ -430,9 +399,8 @@ def test_done_w_job_query_results_ok_reload_error(self): job.reload = mock.Mock(side_effect=retry_error) job._exception = None - is_done = job.done() + job._done_or_raise() - assert is_done assert job._exception is retry_error def test_query_plan(self): @@ -1905,8 +1873,6 @@ def test_reload_w_timeout(self): ) def test_iter(self): - import types - begun_resource = self._make_resource() query_resource = { "jobComplete": True, From 0d7212cec1e786b88b5825318406ac64e30e2a9d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 10 Mar 2021 21:39:07 +0100 Subject: [PATCH 038/230] chore(deps): update dependency google-cloud-bigquery to v2.11.0 (#546) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8ff7fa850..34896627e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8ccbec38e..b55d2b3a4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.10.0 +google-cloud-bigquery==2.11.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From efdf1c653770f7c03c17e31e3c2f279bb685637b Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 15 Mar 2021 09:52:04 -0500 Subject: [PATCH 039/230] refactor: split pandas system tests to new module (#548) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Follow-up to https://github.com/googleapis/python-bigquery/pull/448 Towards #366 --- tests/system/conftest.py | 39 ++ tests/system/helpers.py | 94 ++++ tests/system/test_client.py | 953 ++---------------------------------- tests/system/test_pandas.py | 801 ++++++++++++++++++++++++++++++ 4 files changed, 969 insertions(+), 918 deletions(-) create mode 100644 tests/system/conftest.py create mode 100644 tests/system/helpers.py create mode 100644 tests/system/test_pandas.py diff --git a/tests/system/conftest.py b/tests/system/conftest.py new file mode 100644 index 000000000..4b5fcb543 --- /dev/null +++ b/tests/system/conftest.py @@ -0,0 +1,39 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import helpers + + +@pytest.fixture(scope="session") +def bigquery_client(): + from google.cloud import bigquery + + return bigquery.Client() + + +@pytest.fixture(scope="session") +def bqstorage_client(bigquery_client): + from google.cloud import bigquery_storage + + return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) + + +@pytest.fixture +def dataset_id(bigquery_client): + dataset_id = f"bqsystem_{helpers.temp_suffix()}" + bigquery_client.create_dataset(dataset_id) + yield dataset_id + bigquery_client.delete_dataset(dataset_id, delete_contents=True) diff --git a/tests/system/helpers.py b/tests/system/helpers.py new file mode 100644 index 000000000..76e609345 --- /dev/null +++ b/tests/system/helpers.py @@ -0,0 +1,94 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import uuid + +import google.api_core.exceptions +import test_utils.retry + +from google.cloud._helpers import UTC + + +_naive = datetime.datetime(2016, 12, 5, 12, 41, 9) +_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) +_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat()) +_stamp_microseconds = _stamp + ".250000" +_zoned = _naive.replace(tzinfo=UTC) +_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC) +_numeric = decimal.Decimal("123456789.123456789") + + +# Examples of most data types to test with query() and DB-API. +STANDARD_SQL_EXAMPLES = [ + ("SELECT 1", 1), + ("SELECT 1.3", 1.3), + ("SELECT TRUE", True), + ('SELECT "ABC"', "ABC"), + ('SELECT CAST("foo" AS BYTES)', b"foo"), + ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned), + ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive), + ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,), + ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()), + ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()), + ('SELECT NUMERIC "%s"' % (_numeric,), _numeric), + ("SELECT (1, 2)", {"_field_1": 1, "_field_2": 2}), + ( + "SELECT ((1, 2), (3, 4), 5)", + { + "_field_1": {"_field_1": 1, "_field_2": 2}, + "_field_2": {"_field_1": 3, "_field_2": 4}, + "_field_3": 5, + }, + ), + ("SELECT [1, 2, 3]", [1, 2, 3]), + ( + "SELECT ([1, 2], 3, [4, 5])", + {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, + ), + ( + "SELECT [(1, 2, 3), (4, 5, 6)]", + [ + {"_field_1": 1, "_field_2": 2, "_field_3": 3}, + {"_field_1": 4, "_field_2": 5, "_field_3": 6}, + ], + ), + ( + "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", + [{"_field_1": [1, 2, 3], "_field_2": 4}, {"_field_1": [5, 6], "_field_2": 7}], + ), + ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]), + ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"), +] + + +def temp_suffix(): + now = datetime.datetime.now() + return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}" + + +def _rate_limit_exceeded(forbidden): + """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" + return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) + + +# We need to wait to stay within the rate limits. +# The alternative outcome is a 403 Forbidden response from upstream, which +# they return instead of the more appropriate 429. +# See https://cloud.google.com/bigquery/quota-policy +retry_403 = test_utils.retry.RetryErrors( + google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded, +) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ed48b0bfe..133f609a6 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -13,7 +13,6 @@ # limitations under the License. import base64 -import collections import concurrent.futures import csv import datetime @@ -29,9 +28,11 @@ import psutil import pytest -import pytz import pkg_resources +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + try: from google.cloud import bigquery_storage except ImportError: # pragma: NO COVER @@ -42,10 +43,6 @@ except ImportError: # pragma: NO COVER fastavro = None -try: - import pandas -except ImportError: # pragma: NO COVER - pandas = None try: import pyarrow import pyarrow.types @@ -56,7 +53,6 @@ from google.api_core.exceptions import BadRequest from google.api_core.exceptions import ClientError from google.api_core.exceptions import Conflict -from google.api_core.exceptions import Forbidden from google.api_core.exceptions import GoogleAPICallError from google.api_core.exceptions import NotFound from google.api_core.exceptions import InternalServerError @@ -65,7 +61,6 @@ from google.api_core.iam import Policy from google.cloud import bigquery from google.cloud import bigquery_v2 -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import Table @@ -121,14 +116,8 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0") PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") -if pandas: - PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version -else: - PANDAS_INSTALLED_VERSION = None - if pyarrow: PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version else: @@ -154,18 +143,6 @@ def _load_json_schema(filename="schema.json"): return _parse_schema_resource(json.load(schema_file)) -def _rate_limit_exceeded(forbidden): - """Predicate: pass only exceptions with 'rateLimitExceeded' as reason.""" - return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors) - - -# We need to wait to stay within the rate limits. -# The alternative outcome is a 403 Forbidden response from upstream, which -# they return instead of the more appropriate 429. -# See https://cloud.google.com/bigquery/quota-policy -retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded) - - class Config(object): """Run-time configuration to be modified at set-up. @@ -262,7 +239,7 @@ def test_get_dataset(self): dataset_arg = Dataset(dataset_ref) dataset_arg.friendly_name = "Friendly" dataset_arg.description = "Description" - dataset = retry_403(client.create_dataset)(dataset_arg) + dataset = helpers.retry_403(client.create_dataset)(dataset_arg) self.to_delete.append(dataset) dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -345,7 +322,7 @@ def test_create_table(self): table_arg = Table(dataset.table(table_id), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -380,7 +357,7 @@ def test_create_table_with_policy(self): table_arg = Table(dataset.table(table_id), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -416,7 +393,7 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): table_arg.time_partitioning = TimePartitioning(field="transaction_time") table_arg.clustering_fields = ["user_email", "store_code"] - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -430,7 +407,7 @@ def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) self.assertTrue(_dataset_exists(dataset_ref)) Config.CLIENT.delete_dataset(dataset_id) self.assertFalse(_dataset_exists(dataset_ref)) @@ -439,11 +416,11 @@ def test_delete_dataset_delete_contents_true(self): dataset_id = _make_dataset_id("delete_table_true_with_content") project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) - dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref)) table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) Config.CLIENT.delete_dataset(dataset, delete_contents=True) self.assertFalse(_table_exists(table)) @@ -455,7 +432,7 @@ def test_delete_dataset_delete_contents_false(self): table_id = "test_table" table_arg = Table(dataset.table(table_id), schema=SCHEMA) - retry_403(Config.CLIENT.create_table)(table_arg) + helpers.retry_403(Config.CLIENT.create_table)(table_arg) with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) @@ -504,7 +481,7 @@ def test_list_tables(self): ] for table_name in tables_to_create: table = Table(dataset.table(table_name), schema=SCHEMA) - created_table = retry_403(Config.CLIENT.create_table)(table) + created_table = helpers.retry_403(Config.CLIENT.create_table)(table) self.to_delete.insert(0, created_table) # Retrieve the tables. @@ -534,7 +511,7 @@ def test_update_table(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertIsNone(table.friendly_name) @@ -574,7 +551,7 @@ def test_update_table_schema(self): TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE") @@ -674,7 +651,7 @@ def test_insert_rows_then_dump_table(self): ] table_arg = Table(dataset.table(TABLE_ID), schema=schema) self.assertFalse(_table_exists(table_arg)) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -732,413 +709,6 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_automatic_schema(self): - """Test that a DataFrame with dtypes that map well to BigQuery types - can be uploaded without specifying a schema. - - https://github.com/googleapis/google-cloud-python/issues/9044 - """ - df_data = collections.OrderedDict( - [ - ("bool_col", pandas.Series([True, False, True], dtype="bool")), - ( - "ts_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), - ), - ( - "dt_col", - pandas.Series( - [ - datetime.datetime(2010, 1, 2, 3, 44, 50), - datetime.datetime(2011, 2, 3, 14, 50, 59), - datetime.datetime(2012, 3, 14, 15, 16), - ], - dtype="datetime64[ns]", - ), - ), - ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), - ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), - ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), - ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), - ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), - ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), - ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), - ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), - ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), - ] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( - Config.CLIENT.project, dataset_id - ) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual( - tuple(table.schema), - ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - # BigQuery does not support uploading DATETIME values from - # Parquet files. See: - # https://github.com/googleapis/google-cloud-python/issues/9996 - bigquery.SchemaField("dt_col", "TIMESTAMP"), - bigquery.SchemaField("float32_col", "FLOAT"), - bigquery.SchemaField("float64_col", "FLOAT"), - bigquery.SchemaField("int8_col", "INTEGER"), - bigquery.SchemaField("int16_col", "INTEGER"), - bigquery.SchemaField("int32_col", "INTEGER"), - bigquery.SchemaField("int64_col", "INTEGER"), - bigquery.SchemaField("uint8_col", "INTEGER"), - bigquery.SchemaField("uint16_col", "INTEGER"), - bigquery.SchemaField("uint32_col", "INTEGER"), - ), - ) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded if a BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf( - pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION, - "Only `pandas version >=1.0.0` is supported", - ) - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self): - """Test that a DataFrame containing column with None-type values and int64 datatype - can be uploaded without specifying a schema. - - https://github.com/googleapis/python-bigquery/issues/22 - """ - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - Config.CLIENT.project, dataset_id - ) - df_data = collections.OrderedDict( - [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] - ) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),)) - self.assertEqual(table.num_rows, 4) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_nulls(self): - """Test that a DataFrame with null columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/7370 - """ - # Schema with all scalar types. - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - num_rows = 100 - nulls = [None] * num_rows - df_data = [ - ("bool_col", nulls), - ("bytes_col", nulls), - ("date_col", nulls), - ("dt_col", nulls), - ("float_col", nulls), - ("geo_col", nulls), - ("int_col", nulls), - ("num_col", nulls), - ("str_col", nulls), - ("time_col", nulls), - ("ts_col", nulls), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, num_rows) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_required(self): - """Test that a DataFrame with required columns can be uploaded if a - BigQuery schema is specified. - - See: https://github.com/googleapis/google-cloud-python/issues/8093 - """ - table_schema = ( - bigquery.SchemaField("name", "STRING", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ) - - records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] - dataframe = pandas.DataFrame(records, columns=["name", "age"]) - job_config = bigquery.LoadJobConfig(schema=table_schema) - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_required".format( - Config.CLIENT.project, dataset_id - ) - - # Create the table before loading so that schema mismatch errors are - # identified. - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 2) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_load_table_from_dataframe_w_explicit_schema(self): - # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. - # See: - # https://github.com/googleapis/python-bigquery/issues/61 - # https://issuetracker.google.com/issues/151765076 - scalars_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) - - table_schema = scalars_schema + ( - # TODO: Array columns can't be read due to NULLABLE versus REPEATED - # mode mismatch. See: - # https://issuetracker.google.com/133415569#comment3 - # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), - # TODO: Support writing StructArrays to Parquet. See: - # https://jira.apache.org/jira/browse/ARROW-2587 - # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), - ) - - df_data = [ - ("bool_col", [True, None, False]), - ("bytes_col", [b"abc", None, b"def"]), - ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) - df_data = collections.OrderedDict(df_data) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig(schema=table_schema) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf( - pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION, - "Only `pyarrow version >=0.17.0` is supported", - ) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_struct_datatype(self): - """Test that a DataFrame with struct datatype can be uploaded if a - BigQuery schema is specified. - - https://github.com/googleapis/python-bigquery/issues/21 - """ - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( - Config.CLIENT.project, dataset_id - ) - table_schema = [ - bigquery.SchemaField( - "bar", - "RECORD", - fields=[ - bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), - bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), - ], - mode="REQUIRED", - ), - ] - table = retry_403(Config.CLIENT.create_table)( - Table(table_id, schema=table_schema) - ) - self.to_delete.insert(0, table) - - df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] - dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) - - load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(table.schema, table_schema) - self.assertEqual(table.num_rows, 3) - def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), @@ -1160,7 +730,7 @@ def test_load_table_from_json_basic_use(self): # Create the table before loading so that schema mismatch errors are # identified. - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1175,149 +745,6 @@ def test_load_table_from_json_basic_use(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = ( - bigquery.SchemaField("bool_col", "BOOLEAN"), - bigquery.SchemaField("bytes_col", "BYTES"), - bigquery.SchemaField("date_col", "DATE"), - bigquery.SchemaField("dt_col", "DATETIME"), - bigquery.SchemaField("float_col", "FLOAT"), - bigquery.SchemaField("geo_col", "GEOGRAPHY"), - bigquery.SchemaField("int_col", "INTEGER"), - bigquery.SchemaField("num_col", "NUMERIC"), - bigquery.SchemaField("bignum_col", "BIGNUMERIC"), - bigquery.SchemaField("str_col", "STRING"), - bigquery.SchemaField("time_col", "TIME"), - bigquery.SchemaField("ts_col", "TIMESTAMP"), - ) - df_data = collections.OrderedDict( - [ - ("bool_col", [True, None, False]), - ("bytes_col", ["abc", None, "def"]), - ( - "date_col", - [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)], - ), - ( - "dt_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0), - None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - ], - ), - ("float_col", [float("-inf"), float("nan"), float("inf")]), - ( - "geo_col", - [ - "POINT(30 10)", - None, - "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", - ], - ), - ("int_col", [-9223372036854775808, None, 9223372036854775807]), - ( - "num_col", - [ - decimal.Decimal("-99999999999999999999999999999.999999999"), - None, - decimal.Decimal("99999999999999999999999999999.999999999"), - ], - ), - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ), - ("str_col", [u"abc", None, u"def"]), - ( - "time_col", - [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], - ), - ( - "ts_col", - [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - None, - datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc - ), - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 3) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self): - from google.cloud.bigquery.job import SourceFormat - - table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) - df_data = collections.OrderedDict( - [ - ( - "float_col", - [ - 0.14285714285714285, - 0.51428571485748, - 0.87128748, - 1.807960649, - 2.0679610649, - 2.4406779661016949, - 3.7148514257, - 3.8571428571428572, - 1.51251252e40, - ], - ), - ] - ) - dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - - dataset_id = _make_dataset_id("bq_load_test") - self.temp_dataset(dataset_id) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - Config.CLIENT.project, dataset_id - ) - - job_config = bigquery.LoadJobConfig( - schema=table_schema, source_format=SourceFormat.CSV - ) - load_job = Config.CLIENT.load_table_from_dataframe( - dataframe, table_id, job_config=job_config - ) - load_job.result() - - table = Config.CLIENT.get_table(table_id) - rows = self._fetch_single_page(table) - floats = [r.values()[0] for r in rows] - self.assertEqual(tuple(table.schema), table_schema) - self.assertEqual(table.num_rows, 9) - self.assertEqual(floats, df_data["float_col"]) - def test_load_table_from_json_schema_autodetect(self): json_rows = [ {"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False}, @@ -1339,7 +766,7 @@ def test_load_table_from_json_schema_autodetect(self): bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"), ) # create the table before loading so that the column order is predictable - table = retry_403(Config.CLIENT.create_table)( + table = helpers.retry_403(Config.CLIENT.create_table)( Table(table_id, schema=table_schema) ) self.to_delete.insert(0, table) @@ -1374,7 +801,7 @@ def test_load_avro_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("bq_load_test")) table_arg = dataset.table(table_name) - table = retry_403(Config.CLIENT.create_table)(Table(table_arg)) + table = helpers.retry_403(Config.CLIENT.create_table)(Table(table_arg)) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1405,7 +832,7 @@ def test_load_table_from_uri_then_dump_table(self): dataset = self.temp_dataset(_make_dataset_id("load_gcs_then_dump")) table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) config = bigquery.LoadJobConfig() @@ -1623,7 +1050,7 @@ def test_get_set_iam_policy(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1648,7 +1075,7 @@ def test_test_iam_permissions(self): table_ref = Table(dataset.table(table_id)) self.assertFalse(_table_exists(table_ref)) - table = retry_403(Config.CLIENT.create_table)(table_ref) + table = helpers.retry_403(Config.CLIENT.create_table)(table_ref) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) @@ -1672,7 +1099,7 @@ def test_job_cancel(self): dataset = self.temp_dataset(DATASET_ID) table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX) @@ -1743,75 +1170,12 @@ def test_query_w_legacy_sql_types(self): self.assertEqual(len(rows[0]), 1) self.assertEqual(rows[0][0], example["expected"]) - def _generate_standard_sql_types_examples(self): - naive = datetime.datetime(2016, 12, 5, 12, 41, 9) - naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000) - stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat()) - stamp_microseconds = stamp + ".250000" - zoned = naive.replace(tzinfo=UTC) - zoned_microseconds = naive_microseconds.replace(tzinfo=UTC) - numeric = decimal.Decimal("123456789.123456789") - return [ - {"sql": "SELECT 1", "expected": 1}, - {"sql": "SELECT 1.3", "expected": 1.3}, - {"sql": "SELECT TRUE", "expected": True}, - {"sql": 'SELECT "ABC"', "expected": "ABC"}, - {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"}, - {"sql": 'SELECT TIMESTAMP "%s"' % (stamp,), "expected": zoned}, - { - "sql": 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,), - "expected": zoned_microseconds, - }, - {"sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), "expected": naive}, - { - "sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp_microseconds,), - "expected": naive_microseconds, - }, - {"sql": 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), "expected": naive.date()}, - {"sql": 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), "expected": naive.time()}, - {"sql": 'SELECT NUMERIC "%s"' % (numeric,), "expected": numeric}, - {"sql": "SELECT (1, 2)", "expected": {"_field_1": 1, "_field_2": 2}}, - { - "sql": "SELECT ((1, 2), (3, 4), 5)", - "expected": { - "_field_1": {"_field_1": 1, "_field_2": 2}, - "_field_2": {"_field_1": 3, "_field_2": 4}, - "_field_3": 5, - }, - }, - {"sql": "SELECT [1, 2, 3]", "expected": [1, 2, 3]}, - { - "sql": "SELECT ([1, 2], 3, [4, 5])", - "expected": {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]}, - }, - { - "sql": "SELECT [(1, 2, 3), (4, 5, 6)]", - "expected": [ - {"_field_1": 1, "_field_2": 2, "_field_3": 3}, - {"_field_1": 4, "_field_2": 5, "_field_3": 6}, - ], - }, - { - "sql": "SELECT [([1, 2, 3], 4), ([5, 6], 7)]", - "expected": [ - {u"_field_1": [1, 2, 3], u"_field_2": 4}, - {u"_field_1": [5, 6], u"_field_2": 7}, - ], - }, - { - "sql": "SELECT ARRAY(SELECT STRUCT([1, 2]))", - "expected": [{u"_field_1": [1, 2]}], - }, - {"sql": "SELECT ST_GeogPoint(1, 2)", "expected": "POINT(1 2)"}, - ] - def test_query_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - rows = list(Config.CLIENT.query(example["sql"])) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + rows = list(Config.CLIENT.query(sql)) self.assertEqual(len(rows), 1) self.assertEqual(len(rows[0]), 1) - self.assertEqual(rows[0][0], example["expected"]) + self.assertEqual(rows[0][0], expected) def test_query_w_failed_query(self): from google.api_core.exceptions import BadRequest @@ -1950,13 +1314,12 @@ def test_query_statistics(self): self.assertGreater(len(plan), stages_with_inputs) def test_dbapi_w_standard_sql_types(self): - examples = self._generate_standard_sql_types_examples() - for example in examples: - Config.CURSOR.execute(example["sql"]) + for sql, expected in helpers.STANDARD_SQL_EXAMPLES: + Config.CURSOR.execute(sql) self.assertEqual(Config.CURSOR.rowcount, 1) row = Config.CURSOR.fetchone() self.assertEqual(len(row), 1) - self.assertEqual(row[0], example["expected"]) + self.assertEqual(row[0], expected) row = Config.CURSOR.fetchone() self.assertIsNone(row) @@ -2107,7 +1470,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id): greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE") table_ref = dataset.table(table_id) table_arg = Table(table_ref, schema=[greeting]) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) with _NamedTemporaryFile() as temp: @@ -2480,152 +1843,6 @@ def test_query_iter(self): row_tuples = [r.values() for r in query_job] self.assertEqual(row_tuples, [(1,)]) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_query_results_to_dataframe(self): - QUERY = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - df = Config.CLIENT.query(QUERY).result().to_dataframe() - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) # verify the column names - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_query_results_to_dataframe_w_bqstorage(self): - query = """ - SELECT id, author, time_ts, dead - FROM `bigquery-public-data.hacker_news.comments` - LIMIT 10 - """ - - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 10) # verify the number of rows - column_names = ["id", "author", "time_ts", "dead"] - self.assertEqual(list(df), column_names) - exp_datatypes = { - "id": int, - "author": str, - "time_ts": pandas.Timestamp, - "dead": bool, - } - for index, row in df.iterrows(): - for col in column_names: - # all the schema fields are nullable, so None is acceptable - if not row[col] is None: - self.assertIsInstance(row[col], exp_datatypes[col]) - - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_insert_rows_from_dataframe(self): - SF = bigquery.SchemaField - schema = [ - SF("float_col", "FLOAT", mode="REQUIRED"), - SF("int_col", "INTEGER", mode="REQUIRED"), - SF("bool_col", "BOOLEAN", mode="REQUIRED"), - SF("string_col", "STRING", mode="NULLABLE"), - ] - - dataframe = pandas.DataFrame( - [ - { - "float_col": 1.11, - "bool_col": True, - "string_col": "my string", - "int_col": 10, - }, - { - "float_col": 2.22, - "bool_col": False, - "string_col": "another string", - "int_col": 20, - }, - { - "float_col": 3.33, - "bool_col": False, - "string_col": "another string", - "int_col": 30, - }, - { - "float_col": 4.44, - "bool_col": True, - "string_col": "another string", - "int_col": 40, - }, - { - "float_col": 5.55, - "bool_col": False, - "string_col": "another string", - "int_col": 50, - }, - { - "float_col": 6.66, - "bool_col": True, - # Include a NaN value, because pandas often uses NaN as a - # NULL value indicator. - "string_col": float("NaN"), - "int_col": 60, - }, - ] - ) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("issue_7553")) - table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) - self.to_delete.insert(0, table) - - chunk_errors = Config.CLIENT.insert_rows_from_dataframe( - table, dataframe, chunk_size=3 - ) - for errors in chunk_errors: - assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - Config.CLIENT.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] - expected = [ - tuple(None if col != col else col for col in data_row) - for data_row in dataframe.itertuples(index=False) - ] - - assert len(row_tuples) == len(expected) - - for row, expected_row in zip(row_tuples, expected): - self.assertCountEqual(row, expected_row) # column order does not matter - def test_insert_rows_nested_nested(self): # See #2951 SF = bigquery.SchemaField @@ -2656,7 +1873,7 @@ def test_insert_rows_nested_nested(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2696,7 +1913,7 @@ def test_insert_rows_nested_nested_dictionary(self): table_id = "test_table" dataset = self.temp_dataset(_make_dataset_id("issue_2951")) table_arg = Table(dataset.table(table_id), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) Config.CLIENT.insert_rows(table, to_insert) @@ -2740,8 +1957,8 @@ def test_create_routine(self): str(routine.reference) ) - routine = retry_403(Config.CLIENT.create_routine)(routine) - query_job = retry_403(Config.CLIENT.query)(query_string) + routine = helpers.retry_403(Config.CLIENT.create_routine)(routine) + query_job = helpers.retry_403(Config.CLIENT.query)(query_string) rows = list(query_job.result()) assert len(rows) == 1 @@ -2752,7 +1969,7 @@ def test_create_table_rows_fetch_nested_schema(self): dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) schema = _load_json_schema() table_arg = Table(dataset.table(table_name), schema=schema) - table = retry_403(Config.CLIENT.create_table)(table_arg) + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) self.to_delete.insert(0, table) self.assertTrue(_table_exists(table)) self.assertEqual(table.table_id, table_name) @@ -2872,85 +2089,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_nested_table_to_dataframe(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - SF = bigquery.SchemaField - schema = [ - SF("string_col", "STRING", mode="NULLABLE"), - SF( - "record_col", - "RECORD", - mode="NULLABLE", - fields=[ - SF("nested_string", "STRING", mode="NULLABLE"), - SF("nested_repeated", "INTEGER", mode="REPEATED"), - SF( - "nested_record", - "RECORD", - mode="NULLABLE", - fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], - ), - ], - ), - SF("bigfloat_col", "FLOAT", mode="NULLABLE"), - SF("smallfloat_col", "FLOAT", mode="NULLABLE"), - ] - record = { - "nested_string": "another string value", - "nested_repeated": [0, 1, 2], - "nested_record": {"nested_nested_string": "some deep insight"}, - } - to_insert = [ - { - "string_col": "Some value", - "record_col": record, - "bigfloat_col": 3.14, - "smallfloat_col": 2.72, - } - ] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe( - dtypes={"smallfloat_col": "float16"} - ) - - self.assertIsInstance(df, pandas.DataFrame) - self.assertEqual(len(df), 1) # verify the number of rows - exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] - self.assertEqual(list(df), exp_columns) # verify the column names - row = df.iloc[0] - # verify the row content - self.assertEqual(row["string_col"], "Some value") - expected_keys = tuple(sorted(record.keys())) - row_keys = tuple(sorted(row["record_col"].keys())) - self.assertEqual(row_keys, expected_keys) - # Can't compare numpy arrays, which pyarrow encodes the embedded - # repeated column to, so convert to list. - self.assertEqual(list(row["record_col"]["nested_repeated"]), [0, 1, 2]) - # verify that nested data can be accessed with indices/keys - self.assertEqual(row["record_col"]["nested_repeated"][0], 0) - self.assertEqual( - row["record_col"]["nested_record"]["nested_nested_string"], - "some deep insight", - ) - # verify dtypes - self.assertEqual(df.dtypes["bigfloat_col"].name, "float64") - self.assertEqual(df.dtypes["smallfloat_col"].name, "float16") - def test_list_rows_empty_table(self): from google.cloud.bigquery.table import RowIterator @@ -2999,34 +2137,13 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - @unittest.skipIf(pandas is None, "Requires `pandas`") - @unittest.skipIf( - bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" - ) - def test_list_rows_max_results_w_bqstorage(self): - table_ref = DatasetReference("bigquery-public-data", "utility_us").table( - "country_code_iso" - ) - bqstorage_client = bigquery_storage.BigQueryReadClient( - credentials=Config.CLIENT._credentials - ) - - row_iterator = Config.CLIENT.list_rows( - table_ref, - selected_fields=[bigquery.SchemaField("country_name", "STRING")], - max_results=100, - ) - dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) - - self.assertEqual(len(dataframe.index), 100) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) dataset = Dataset(dataset_ref) if location: dataset.location = location - dataset = retry_403(Config.CLIENT.create_dataset)(dataset) + dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset) self.to_delete.append(dataset) return dataset diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py new file mode 100644 index 000000000..1164e36da --- /dev/null +++ b/tests/system/test_pandas.py @@ -0,0 +1,801 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for pandas connector.""" + +import collections +import datetime +import decimal +import json +import io +import operator + +import pkg_resources +import pytest +import pytz + +from google.cloud import bigquery +from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +from . import helpers + + +bigquery_storage = pytest.importorskip( + "google.cloud.bigquery_storage", minversion="2.0.0" +) +pandas = pytest.importorskip("pandas", minversion="0.23.0") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0") + + +PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") + + +def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): + """Test that a DataFrame with dtypes that map well to BigQuery types + can be uploaded without specifying a schema. + + https://github.com/googleapis/google-cloud-python/issues/9044 + """ + df_data = collections.OrderedDict( + [ + ("bool_col", pandas.Series([True, False, True], dtype="bool")), + ( + "ts_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ).dt.tz_localize(pytz.utc), + ), + ( + "dt_col", + pandas.Series( + [ + datetime.datetime(2010, 1, 2, 3, 44, 50), + datetime.datetime(2011, 2, 3, 14, 50, 59), + datetime.datetime(2012, 3, 14, 15, 16), + ], + dtype="datetime64[ns]", + ), + ), + ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")), + ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")), + ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")), + ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")), + ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")), + ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")), + ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")), + ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")), + ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")), + ] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format( + bigquery_client.project, dataset_id + ) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + # BigQuery does not support uploading DATETIME values from + # Parquet files. See: + # https://github.com/googleapis/google-cloud-python/issues/9996 + bigquery.SchemaField("dt_col", "TIMESTAMP"), + bigquery.SchemaField("float32_col", "FLOAT"), + bigquery.SchemaField("float64_col", "FLOAT"), + bigquery.SchemaField("int8_col", "INTEGER"), + bigquery.SchemaField("int16_col", "INTEGER"), + bigquery.SchemaField("int32_col", "INTEGER"), + bigquery.SchemaField("int64_col", "INTEGER"), + bigquery.SchemaField("uint8_col", "INTEGER"), + bigquery.SchemaField("uint16_col", "INTEGER"), + bigquery.SchemaField("uint32_col", "INTEGER"), + ) + assert table.num_rows == 3 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded if a BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),) + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +@pytest.mark.skipif( + PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION, + reason="Only `pandas version >=1.0.0` is supported", +) +def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( + bigquery_client, dataset_id +): + """Test that a DataFrame containing column with None-type values and int64 datatype + can be uploaded without specifying a schema. + + https://github.com/googleapis/python-bigquery/issues/22 + """ + + table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( + bigquery_client.project, dataset_id + ) + df_data = collections.OrderedDict( + [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] + ) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),) + assert table.num_rows == 4 + + +def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): + """Test that a DataFrame with null columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/7370 + """ + # Schema with all scalar types. + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + num_rows = 100 + nulls = [None] * num_rows + df_data = [ + ("bool_col", nulls), + ("bytes_col", nulls), + ("date_col", nulls), + ("dt_col", nulls), + ("float_col", nulls), + ("geo_col", nulls), + ("int_col", nulls), + ("num_col", nulls), + ("str_col", nulls), + ("time_col", nulls), + ("ts_col", nulls), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append(("bignum_col", nulls)) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_nulls".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == num_rows + + +def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): + """Test that a DataFrame with required columns can be uploaded if a + BigQuery schema is specified. + + See: https://github.com/googleapis/google-cloud-python/issues/8093 + """ + table_schema = ( + bigquery.SchemaField("name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ) + + records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}] + dataframe = pandas.DataFrame(records, columns=["name", "age"]) + job_config = bigquery.LoadJobConfig(schema=table_schema) + table_id = "{}.{}.load_table_from_dataframe_w_required".format( + bigquery_client.project, dataset_id + ) + + # Create the table before loading so that schema mismatch errors are + # identified. + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table) + assert tuple(table.schema) == table_schema + assert table.num_rows == 2 + + +def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): + # Schema with all scalar types. + # TODO: Uploading DATETIME columns currently fails, thus that field type + # is temporarily removed from the test. + # See: + # https://github.com/googleapis/python-bigquery/issues/61 + # https://issuetracker.google.com/issues/151765076 + scalars_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + if _BIGNUMERIC_SUPPORT: + scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) + + table_schema = scalars_schema + ( + # TODO: Array columns can't be read due to NULLABLE versus REPEATED + # mode mismatch. See: + # https://issuetracker.google.com/133415569#comment3 + # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"), + # TODO: Support writing StructArrays to Parquet. See: + # https://jira.apache.org/jira/browse/ARROW-2587 + # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema), + ) + + df_data = [ + ("bool_col", [True, None, False]), + ("bytes_col", [b"abc", None, b"def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), + # ( + # "dt_col", + # [ + # datetime.datetime(1, 1, 1, 0, 0, 0), + # None, + # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + # ], + # ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + ["POINT(30 10)", None, "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + ], + ), + ] + if _BIGNUMERIC_SUPPORT: + df_data.append( + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ) + ) + df_data = collections.OrderedDict(df_data) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig(schema=table_schema) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id): + """Test that a DataFrame with struct datatype can be uploaded if a + BigQuery schema is specified. + + https://github.com/googleapis/python-bigquery/issues/21 + """ + table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format( + bigquery_client.project, dataset_id + ) + table_schema = [ + bigquery.SchemaField( + "bar", + "RECORD", + fields=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + ], + mode="REQUIRED", + ), + ] + table = helpers.retry_403(bigquery_client.create_table)( + bigquery.Table(table_id, schema=table_schema) + ) + + df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}] + dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"]) + + load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert table.schema == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = ( + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("bytes_col", "BYTES"), + bigquery.SchemaField("date_col", "DATE"), + bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), + bigquery.SchemaField("str_col", "STRING"), + bigquery.SchemaField("time_col", "TIME"), + bigquery.SchemaField("ts_col", "TIMESTAMP"), + ) + df_data = collections.OrderedDict( + [ + ("bool_col", [True, None, False]), + ("bytes_col", ["abc", None, "def"]), + ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), + ("float_col", [float("-inf"), float("nan"), float("inf")]), + ( + "geo_col", + [ + "POINT(30 10)", + None, + "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))", + ], + ), + ("int_col", [-9223372036854775808, None, 9223372036854775807]), + ( + "num_col", + [ + decimal.Decimal("-99999999999999999999999999999.999999999"), + None, + decimal.Decimal("99999999999999999999999999999.999999999"), + ], + ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), + ("str_col", ["abc", None, "def"]), + ( + "time_col", + [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)], + ), + ( + "ts_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + None, + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + ), + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + assert tuple(table.schema) == table_schema + assert table.num_rows == 3 + + +def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( + bigquery_client, dataset_id +): + from google.cloud.bigquery.job import SourceFormat + + table_schema = (bigquery.SchemaField("float_col", "FLOAT"),) + df_data = collections.OrderedDict( + [ + ( + "float_col", + [ + 0.14285714285714285, + 0.51428571485748, + 0.87128748, + 1.807960649, + 2.0679610649, + 2.4406779661016949, + 3.7148514257, + 3.8571428571428572, + 1.51251252e40, + ], + ), + ] + ) + dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) + + table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( + bigquery_client.project, dataset_id + ) + + job_config = bigquery.LoadJobConfig( + schema=table_schema, source_format=SourceFormat.CSV + ) + load_job = bigquery_client.load_table_from_dataframe( + dataframe, table_id, job_config=job_config + ) + load_job.result() + + table = bigquery_client.get_table(table_id) + rows = bigquery_client.list_rows(table_id) + floats = [r.values()[0] for r in rows] + assert tuple(table.schema) == table_schema + assert table.num_rows == 9 + assert floats == df_data["float_col"] + + +def test_query_results_to_dataframe(bigquery_client): + QUERY = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + df = bigquery_client.query(QUERY).result().to_dataframe() + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names # verify the column names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for _, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_query_results_to_dataframe_w_bqstorage(bigquery_client): + query = """ + SELECT id, author, time_ts, dead + FROM `bigquery-public-data.hacker_news.comments` + LIMIT 10 + """ + + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + df = bigquery_client.query(query).result().to_dataframe(bqstorage_client) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 10 # verify the number of rows + column_names = ["id", "author", "time_ts", "dead"] + assert list(df) == column_names + exp_datatypes = { + "id": int, + "author": str, + "time_ts": pandas.Timestamp, + "dead": bool, + } + for index, row in df.iterrows(): + for col in column_names: + # all the schema fields are nullable, so None is acceptable + if not row[col] is None: + assert isinstance(row[col], exp_datatypes[col]) + + +def test_insert_rows_from_dataframe(bigquery_client, dataset_id): + SF = bigquery.SchemaField + schema = [ + SF("float_col", "FLOAT", mode="REQUIRED"), + SF("int_col", "INTEGER", mode="REQUIRED"), + SF("bool_col", "BOOLEAN", mode="REQUIRED"), + SF("string_col", "STRING", mode="NULLABLE"), + ] + + dataframe = pandas.DataFrame( + [ + { + "float_col": 1.11, + "bool_col": True, + "string_col": "my string", + "int_col": 10, + }, + { + "float_col": 2.22, + "bool_col": False, + "string_col": "another string", + "int_col": 20, + }, + { + "float_col": 3.33, + "bool_col": False, + "string_col": "another string", + "int_col": 30, + }, + { + "float_col": 4.44, + "bool_col": True, + "string_col": "another string", + "int_col": 40, + }, + { + "float_col": 5.55, + "bool_col": False, + "string_col": "another string", + "int_col": 50, + }, + { + "float_col": 6.66, + "bool_col": True, + # Include a NaN value, because pandas often uses NaN as a + # NULL value indicator. + "string_col": float("NaN"), + "int_col": 60, + }, + ] + ) + + table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe" + table_arg = bigquery.Table(table_id, schema=schema) + table = helpers.retry_403(bigquery_client.create_table)(table_arg) + + chunk_errors = bigquery_client.insert_rows_from_dataframe( + table, dataframe, chunk_size=3 + ) + for errors in chunk_errors: + assert not errors + + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer. + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] + expected = [ + # Pandas often represents NULL values as NaN. Convert to None for + # easier comparison. + tuple(None if col != col else col for col in data_row) + for data_row in dataframe.itertuples(index=False) + ] + + assert len(row_tuples) == len(expected) + + for row, expected_row in zip(row_tuples, expected): + assert ( + # Use Counter to verify the same number of values in each, because + # column order does not matter. + collections.Counter(row) + == collections.Counter(expected_row) + ) + + +def test_nested_table_to_dataframe(bigquery_client, dataset_id): + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + SF = bigquery.SchemaField + schema = [ + SF("string_col", "STRING", mode="NULLABLE"), + SF( + "record_col", + "RECORD", + mode="NULLABLE", + fields=[ + SF("nested_string", "STRING", mode="NULLABLE"), + SF("nested_repeated", "INTEGER", mode="REPEATED"), + SF( + "nested_record", + "RECORD", + mode="NULLABLE", + fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")], + ), + ], + ), + SF("bigfloat_col", "FLOAT", mode="NULLABLE"), + SF("smallfloat_col", "FLOAT", mode="NULLABLE"), + ] + record = { + "nested_string": "another string value", + "nested_repeated": [0, 1, 2], + "nested_record": {"nested_nested_string": "some deep insight"}, + } + to_insert = [ + { + "string_col": "Some value", + "record_col": record, + "bigfloat_col": 3.14, + "smallfloat_col": 2.72, + } + ] + rows = [json.dumps(row) for row in to_insert] + body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) + table_id = f"{bigquery_client.project}.{dataset_id}.test_nested_table_to_dataframe" + job_config = bigquery.LoadJobConfig() + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = schema + # Load a table using a local JSON file from memory. + bigquery_client.load_table_from_file(body, table_id, job_config=job_config).result() + + df = bigquery_client.list_rows(table_id, selected_fields=schema).to_dataframe( + dtypes={"smallfloat_col": "float16"} + ) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 1 # verify the number of rows + exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"] + assert list(df) == exp_columns # verify the column names + row = df.iloc[0] + # verify the row content + assert row["string_col"] == "Some value" + expected_keys = tuple(sorted(record.keys())) + row_keys = tuple(sorted(row["record_col"].keys())) + assert row_keys == expected_keys + # Can't compare numpy arrays, which pyarrow encodes the embedded + # repeated column to, so convert to list. + assert list(row["record_col"]["nested_repeated"]) == [0, 1, 2] + # verify that nested data can be accessed with indices/keys + assert row["record_col"]["nested_repeated"][0] == 0 + assert ( + row["record_col"]["nested_record"]["nested_nested_string"] + == "some deep insight" + ) + # verify dtypes + assert df.dtypes["bigfloat_col"].name == "float64" + assert df.dtypes["smallfloat_col"].name == "float16" + + +def test_list_rows_max_results_w_bqstorage(bigquery_client): + table_ref = bigquery.DatasetReference("bigquery-public-data", "utility_us").table( + "country_code_iso" + ) + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=bigquery_client._credentials + ) + + row_iterator = bigquery_client.list_rows( + table_ref, + selected_fields=[bigquery.SchemaField("country_name", "STRING")], + max_results=100, + ) + with pytest.warns( + UserWarning, match="Cannot use bqstorage_client if max_results is set" + ): + dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) + + assert len(dataframe.index) == 100 From a460f938f1d31c23067a1e09bf6227dd18e92364 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 16 Mar 2021 08:13:39 -0700 Subject: [PATCH 040/230] chore: add pre-commit-config to renovate ignore paths (#552) Disable renovate PRs on the .pre-commit-config.yaml which is templated from synthtool. https://docs.renovatebot.com/configuration-options/#ignorepaths Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Mon Mar 15 09:05:39 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 2c54c473779ea731128cea61a3a6c975a08a5378 Source-Link: https://github.com/googleapis/synthtool/commit/2c54c473779ea731128cea61a3a6c975a08a5378 --- renovate.json | 3 ++- synth.metadata | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/renovate.json b/renovate.json index 4fa949311..f08bc22c9 100644 --- a/renovate.json +++ b/renovate.json @@ -1,5 +1,6 @@ { "extends": [ "config:base", ":preserveSemverRanges" - ] + ], + "ignorePaths": [".pre-commit-config.yaml"] } diff --git a/synth.metadata b/synth.metadata index cab985521..9f81d3045 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f0259eb7ed4ff254ee238e87651992ff93481dae" + "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "4dca4132c6d63788c6675e1b1e11e7b9225f8694" + "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" } } ], From 1b946ba23ee7df86114c6acb338ec34e6c92af6d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 16:24:21 +0100 Subject: [PATCH 041/230] fix: remove DB-API dependency on pyarrow with decimal query parameters (#551) * fix: DB API pyarrow dependency with decimal values DB API should gracefully handle the case when the optional pyarrow dependency is not installed. * Blacken DB API helpers tests * Refine the logic for recognizing NUMERIC Decimals --- google/cloud/bigquery/dbapi/_helpers.py | 23 ++++++--- tests/unit/test_dbapi__helpers.py | 66 +++++++++++++++++-------- 2 files changed, 60 insertions(+), 29 deletions(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 6b36d6e43..69694c98c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -19,16 +19,15 @@ import functools import numbers -try: - import pyarrow -except ImportError: # pragma: NO COVER - pyarrow = None - from google.cloud import bigquery from google.cloud.bigquery import table from google.cloud.bigquery.dbapi import exceptions +_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") +_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") + + def scalar_to_query_parameter(value, name=None): """Convert a scalar value into a query parameter. @@ -189,12 +188,20 @@ def bigquery_scalar_type(value): elif isinstance(value, numbers.Real): return "FLOAT64" elif isinstance(value, decimal.Decimal): - # We check for NUMERIC before BIGNUMERIC in order to support pyarrow < 3.0. - scalar_object = pyarrow.scalar(value) - if isinstance(scalar_object, pyarrow.Decimal128Scalar): + vtuple = value.as_tuple() + # NUMERIC values have precision of 38 (number of digits) and scale of 9 (number + # of fractional digits), and their max absolute value must be strictly smaller + # than 1.0E+29. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + if ( + len(vtuple.digits) <= 38 # max precision: 38 + and vtuple.exponent >= -9 # max scale: 9 + and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX + ): return "NUMERIC" else: return "BIGNUMERIC" + elif isinstance(value, str): return "STRING" elif isinstance(value, bytes): diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index c28c014d4..9a505c1ec 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -25,7 +25,6 @@ import google.cloud._helpers from google.cloud.bigquery import table -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -39,9 +38,8 @@ def test_scalar_to_query_parameter(self): (123, "INT64"), (-123456789, "INT64"), (1.25, "FLOAT64"), - (decimal.Decimal("1.25"), "NUMERIC"), (b"I am some bytes", "BYTES"), - (u"I am a string", "STRING"), + ("I am a string", "STRING"), (datetime.date(2017, 4, 1), "DATE"), (datetime.time(12, 34, 56), "TIME"), (datetime.datetime(2012, 3, 4, 5, 6, 7), "DATETIME"), @@ -51,14 +49,17 @@ def test_scalar_to_query_parameter(self): ), "TIMESTAMP", ), + (decimal.Decimal("1.25"), "NUMERIC"), + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max NUMERIC value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max NUMERIC value, despite precision <=38 + ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ( - decimal.Decimal("1.1234567890123456789012345678901234567890"), - "BIGNUMERIC", - ) - ) for value, expected_type in expected_types: msg = "value: {} expected_type: {}".format(value, expected_type) @@ -71,6 +72,33 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) + def test_decimal_to_query_parameter(self): # TODO: merge with previous test + + expected_types = [ + (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), + (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value + (decimal.Decimal("1.123456789"), "NUMERIC"), + (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 + (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), + ( + decimal.Decimal("12345678901234567890123456789012345678"), + "BIGNUMERIC", # larger than max size, even if precision <=38 + ), + ] + + for value, expected_type in expected_types: + msg = f"value: {value} expected_type: {expected_type}" + + parameter = _helpers.scalar_to_query_parameter(value) + self.assertIsNone(parameter.name, msg=msg) + self.assertEqual(parameter.type_, expected_type, msg=msg) + self.assertEqual(parameter.value, value, msg=msg) + + named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") + self.assertEqual(named_parameter.name, "myvar", msg=msg) + self.assertEqual(named_parameter.type_, expected_type, msg=msg) + self.assertEqual(named_parameter.value, value, msg=msg) + def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) @@ -89,8 +117,9 @@ def test_array_to_query_parameter_valid_argument(self): ([123, -456, 0], "INT64"), ([1.25, 2.50], "FLOAT64"), ([decimal.Decimal("1.25")], "NUMERIC"), + ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"), ([b"foo", b"bar"], "BYTES"), - ([u"foo", u"bar"], "STRING"), + (["foo", "bar"], "STRING"), ([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"), ([datetime.time(12, 34, 56), datetime.time(10, 20, 30)], "TIME"), ( @@ -113,11 +142,6 @@ def test_array_to_query_parameter_valid_argument(self): ), ] - if _BIGNUMERIC_SUPPORT: - expected_types.append( - ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC") - ) - for values, expected_type in expected_types: msg = "value: {} expected_type: {}".format(values, expected_type) parameter = _helpers.array_to_query_parameter(values) @@ -134,7 +158,7 @@ def test_array_to_query_parameter_empty_argument(self): _helpers.array_to_query_parameter([]) def test_array_to_query_parameter_unsupported_sequence(self): - unsupported_iterables = [{10, 20, 30}, u"foo", b"bar", bytearray([65, 75, 85])] + unsupported_iterables = [{10, 20, 30}, "foo", b"bar", bytearray([65, 75, 85])] for iterable in unsupported_iterables: with self.assertRaises(exceptions.ProgrammingError): _helpers.array_to_query_parameter(iterable) @@ -144,7 +168,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): _helpers.array_to_query_parameter([object(), 2, 7]) def test_to_query_parameters_w_dict(self): - parameters = {"somebool": True, "somestring": u"a-string-value"} + parameters = {"somebool": True, "somestring": "a-string-value"} query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: @@ -154,7 +178,7 @@ def test_to_query_parameters_w_dict(self): sorted( [ ("somebool", "BOOL", True), - ("somestring", "STRING", u"a-string-value"), + ("somestring", "STRING", "a-string-value"), ] ), ) @@ -177,14 +201,14 @@ def test_to_query_parameters_w_dict_dict_param(self): _helpers.to_query_parameters(parameters) def test_to_query_parameters_w_list(self): - parameters = [True, u"a-string-value"] + parameters = [True, "a-string-value"] query_parameters = _helpers.to_query_parameters(parameters) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) self.assertSequenceEqual( sorted(query_parameter_tuples), - sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]), + sorted([(None, "BOOL", True), (None, "STRING", "a-string-value")]), ) def test_to_query_parameters_w_list_array_param(self): From af542f275aa5c09d34fd75cf172c3dd1db1520a4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 16 Mar 2021 15:36:02 +0000 Subject: [PATCH 042/230] chore: release 2.12.0 (#547) :robot: I have created a release \*beep\* \*boop\* --- ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) ### Features * make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) ### Bug Fixes * remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 512d38108..e5ed7bc9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) + + +### Features + +* make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64)) + + +### Bug Fixes + +* remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d)) + ## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index e6e357434..67e043bde 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.11.0" +__version__ = "2.12.0" From 4b14a29503987fa36d8c796508f080c64bca3eab Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 16 Mar 2021 18:26:04 +0100 Subject: [PATCH 043/230] test: remove duplicate test for Decimal scalars (#553) --- tests/unit/test_dbapi__helpers.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 9a505c1ec..4b2724de0 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -72,33 +72,6 @@ def test_scalar_to_query_parameter(self): self.assertEqual(named_parameter.type_, expected_type, msg=msg) self.assertEqual(named_parameter.value, value, msg=msg) - def test_decimal_to_query_parameter(self): # TODO: merge with previous test - - expected_types = [ - (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"), - (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max value - (decimal.Decimal("1.123456789"), "NUMERIC"), - (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9 - (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"), - ( - decimal.Decimal("12345678901234567890123456789012345678"), - "BIGNUMERIC", # larger than max size, even if precision <=38 - ), - ] - - for value, expected_type in expected_types: - msg = f"value: {value} expected_type: {expected_type}" - - parameter = _helpers.scalar_to_query_parameter(value) - self.assertIsNone(parameter.name, msg=msg) - self.assertEqual(parameter.type_, expected_type, msg=msg) - self.assertEqual(parameter.value, value, msg=msg) - - named_parameter = _helpers.scalar_to_query_parameter(value, name="myvar") - self.assertEqual(named_parameter.name, "myvar", msg=msg) - self.assertEqual(named_parameter.type_, expected_type, msg=msg) - self.assertEqual(named_parameter.value, value, msg=msg) - def test_scalar_to_query_parameter_w_unexpected_type(self): with self.assertRaises(exceptions.ProgrammingError): _helpers.scalar_to_query_parameter(value={"a": "dictionary"}) From 7447f05f6a92bc4c047dff9d0377598b7af15f18 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 16 Mar 2021 18:49:35 +0100 Subject: [PATCH 044/230] chore(deps): update dependency google-cloud-bigquery to v2.12.0 (#554) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 34896627e..ef9264454 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b55d2b3a4..db1c4b66a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.11.0 +google-cloud-bigquery==2.12.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From 84e646e6b7087a1626e56ad51eeb130f4ddfa2fb Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 19 Mar 2021 12:54:53 -0500 Subject: [PATCH 045/230] fix: avoid policy tags 403 error in `load_table_from_dataframe` (#557) * WIP: fix: don't set policy tags in load job from dataframe * copy fields parameter for struct support * update tests to allow missing description property * fix load from dataframe test on python 3.6 Also, check that sent schema matches DataFrame order, not table order --- google/cloud/bigquery/client.py | 13 +++- google/cloud/bigquery/schema.py | 43 ++++++----- tests/unit/job/test_load_config.py | 12 +-- tests/unit/test_client.py | 113 +++++++++++++++++++---------- tests/unit/test_external_config.py | 9 +-- tests/unit/test_schema.py | 109 +++++++++++----------------- 6 files changed, 150 insertions(+), 149 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index bdbcb767c..305d60d3b 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2291,9 +2291,18 @@ def load_table_from_dataframe( name for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) ) - # schema fields not present in the dataframe are not needed job_config.schema = [ - field for field in table.schema if field.name in columns_and_indexes + # Field description and policy tags are not needed to + # serialize a data frame. + SchemaField( + field.name, + field.field_type, + mode=field.mode, + fields=field.fields, + ) + # schema fields not present in the dataframe are not needed + for field in table.schema + if field.name in columns_and_indexes ] job_config.schema = _pandas_helpers.dataframe_to_bq_schema( diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 9be27f3e8..680dcc138 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -19,6 +19,7 @@ from google.cloud.bigquery_v2 import types +_DEFAULT_VALUE = object() _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: @@ -73,14 +74,18 @@ def __init__( name, field_type, mode="NULLABLE", - description=None, + description=_DEFAULT_VALUE, fields=(), policy_tags=None, ): - self._name = name - self._field_type = field_type - self._mode = mode - self._description = description + self._properties = { + "name": name, + "type": field_type, + } + if mode is not None: + self._properties["mode"] = mode.upper() + if description is not _DEFAULT_VALUE: + self._properties["description"] = description self._fields = tuple(fields) self._policy_tags = policy_tags @@ -98,7 +103,7 @@ def from_api_repr(cls, api_repr): """ # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description") + description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) return cls( @@ -113,7 +118,7 @@ def from_api_repr(cls, api_repr): @property def name(self): """str: The name of the field.""" - return self._name + return self._properties["name"] @property def field_type(self): @@ -122,7 +127,7 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._field_type + return self._properties["type"] @property def mode(self): @@ -131,17 +136,17 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._mode + return self._properties.get("mode") @property def is_nullable(self): """bool: whether 'mode' is 'nullable'.""" - return self._mode == "NULLABLE" + return self.mode == "NULLABLE" @property def description(self): """Optional[str]: description for the field.""" - return self._description + return self._properties.get("description") @property def fields(self): @@ -164,13 +169,7 @@ def to_api_repr(self): Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - # Put together the basic representation. See http://bit.ly/2hOAT5u. - answer = { - "mode": self.mode.upper(), - "name": self.name, - "type": self.field_type.upper(), - "description": self.description, - } + answer = self._properties.copy() # If this is a RECORD type, then sub-fields are also included, # add this to the serialized representation. @@ -193,10 +192,10 @@ def _key(self): Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ return ( - self._name, - self._field_type.upper(), - self._mode.upper(), - self._description, + self.name, + self.field_type.upper(), + self.mode.upper(), + self.description, self._fields, self._policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index c18f51bff..63f15ec5a 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -434,13 +434,11 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -449,24 +447,18 @@ def test_schema_setter_fields(self): def test_schema_setter_valid_mappings_list(self): config = self._get_target_class()() - schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - config.schema = schema - full_name_repr = { "name": "full_name", "type": "STRING", "mode": "REQUIRED", - "description": None, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, } + schema = [full_name_repr, age_repr] + config.schema = schema self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6c3263ea5..26ef340de 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1596,18 +1596,8 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query}, @@ -1641,18 +1631,8 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2602,7 +2582,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2613,8 +2593,10 @@ def test_update_table(self): } ) schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2647,7 +2629,7 @@ def test_update_table(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "New field description", }, ] }, @@ -2773,13 +2755,24 @@ def test_update_table_w_query(self): "name": "age", "type": "INTEGER", "mode": "REQUIRED", - "description": None, + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), ] resource = self._make_table_resource() resource.update( @@ -7658,18 +7651,47 @@ def test_load_table_from_file_w_invalid_job_config(self): def test_load_table_from_dataframe(self): from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES from google.cloud.bigquery import job - from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.schema import PolicyTagList, SchemaField client = self._make_client() - records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] - dataframe = pandas.DataFrame(records) + records = [ + {"id": 1, "age": 100, "accounts": [2, 3]}, + {"id": 2, "age": 60, "accounts": [5]}, + {"id": 3, "age": 40, "accounts": []}, + ] + # Mixup column order so that we can verify sent schema matches the + # serialized order, not the table column order. + column_order = ["age", "accounts", "id"] + dataframe = pandas.DataFrame(records, columns=column_order) + table_fields = { + "id": SchemaField( + "id", + "INTEGER", + mode="REQUIRED", + description="integer column", + policy_tags=PolicyTagList(names=("foo", "bar")), + ), + "age": SchemaField( + "age", + "INTEGER", + mode="NULLABLE", + description="age column", + policy_tags=PolicyTagList(names=("baz",)), + ), + "accounts": SchemaField( + "accounts", "INTEGER", mode="REPEATED", description="array column", + ), + } + get_table_schema = [ + table_fields["id"], + table_fields["age"], + table_fields["accounts"], + ] get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, - return_value=mock.Mock( - schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")] - ), + return_value=mock.Mock(schema=get_table_schema), ) load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -7695,8 +7717,21 @@ def test_load_table_from_dataframe(self): sent_file = load_table_from_file.mock_calls[0][1][1] assert sent_file.closed - sent_config = load_table_from_file.mock_calls[0][2]["job_config"] - assert sent_config.source_format == job.SourceFormat.PARQUET + sent_config = load_table_from_file.mock_calls[0][2]["job_config"].to_api_repr()[ + "load" + ] + assert sent_config["sourceFormat"] == job.SourceFormat.PARQUET + for field_index, field in enumerate(sent_config["schema"]["fields"]): + assert field["name"] == column_order[field_index] + table_field = table_fields[field["name"]] + assert field["name"] == table_field.name + assert field["type"] == table_field.field_type + assert field["mode"] == table_field.mode + assert len(field.get("fields", [])) == len(table_field.fields) + # Omit unnecessary fields when they come from getting the table + # (not passed in via job_config) + assert "description" not in field + assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 4b6ef5118..4ca2e9012 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -77,14 +77,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - } - ] + "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 71bf6b5ae..87baaf379 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -35,19 +35,19 @@ def _make_one(self, *args, **kw): def test_constructor_defaults(self): field = self._make_one("test", "STRING") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(field.fields, ()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) def test_constructor_subfields(self): sub_field1 = self._make_one("area_code", "STRING") @@ -55,13 +55,13 @@ def test_constructor_subfields(self): field = self._make_one( "phone_number", "RECORD", fields=[sub_field1, sub_field2] ) - self.assertEqual(field._name, "phone_number") - self.assertEqual(field._field_type, "RECORD") - self.assertEqual(field._mode, "NULLABLE") - self.assertIsNone(field._description) - self.assertEqual(len(field._fields), 2) - self.assertIs(field._fields[0], sub_field1) - self.assertIs(field._fields[1], sub_field2) + self.assertEqual(field.name, "phone_number") + self.assertEqual(field.field_type, "RECORD") + self.assertEqual(field.mode, "NULLABLE") + self.assertIsNone(field.description) + self.assertEqual(len(field.fields), 2) + self.assertEqual(field.fields[0], sub_field1) + self.assertEqual(field.fields[1], sub_field2) def test_constructor_with_policy_tags(self): from google.cloud.bigquery.schema import PolicyTagList @@ -70,12 +70,12 @@ def test_constructor_with_policy_tags(self): field = self._make_one( "test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy ) - self.assertEqual(field._name, "test") - self.assertEqual(field._field_type, "STRING") - self.assertEqual(field._mode, "REQUIRED") - self.assertEqual(field._description, "Testing") - self.assertEqual(field._fields, ()) - self.assertEqual(field._policy_tags, policy) + self.assertEqual(field.name, "test") + self.assertEqual(field.field_type, "STRING") + self.assertEqual(field.mode, "REQUIRED") + self.assertEqual(field.description, "Testing") + self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, policy) def test_to_api_repr(self): from google.cloud.bigquery.schema import PolicyTagList @@ -92,7 +92,6 @@ def test_to_api_repr(self): "mode": "NULLABLE", "name": "foo", "type": "INTEGER", - "description": None, "policyTags": {"names": ["foo", "bar"]}, }, ) @@ -104,18 +103,10 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [ - { - "mode": "NULLABLE", - "name": "bar", - "type": "INTEGER", - "description": None, - } - ], + "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], "mode": "REQUIRED", "name": "foo", "type": record_type, - "description": None, }, ) @@ -168,17 +159,17 @@ def test_from_api_repr_defaults(self): def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") - self.assertIs(schema_field.name, name) + self.assertEqual(schema_field.name, name) def test_field_type_property(self): field_type = "BOOLEAN" schema_field = self._make_one("whether", field_type) - self.assertIs(schema_field.field_type, field_type) + self.assertEqual(schema_field.field_type, field_type) def test_mode_property(self): mode = "REPEATED" schema_field = self._make_one("again", "FLOAT", mode=mode) - self.assertIs(schema_field.mode, mode) + self.assertEqual(schema_field.mode, mode) def test_is_nullable(self): mode = "NULLABLE" @@ -193,14 +184,14 @@ def test_is_not_nullable(self): def test_description_property(self): description = "It holds some data." schema_field = self._make_one("do", "TIMESTAMP", description=description) - self.assertIs(schema_field.description, description) + self.assertEqual(schema_field.description, description) def test_fields_property(self): sub_field1 = self._make_one("one", "STRING") sub_field2 = self._make_one("fish", "INTEGER") fields = (sub_field1, sub_field2) schema_field = self._make_one("boat", "RECORD", fields=fields) - self.assertIs(schema_field.fields, fields) + self.assertEqual(schema_field.fields, fields) def test_to_standard_sql_simple_type(self): sql_type = self._get_standard_sql_data_type_class() @@ -532,17 +523,10 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} ) def test_w_description(self): @@ -552,7 +536,13 @@ def test_w_description(self): full_name = SchemaField( "full_name", "STRING", mode="REQUIRED", description=DESCRIPTION ) - age = SchemaField("age", "INTEGER", mode="REQUIRED") + age = SchemaField( + "age", + "INTEGER", + mode="REQUIRED", + # Explicitly unset description. + description=None, + ) resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( @@ -581,13 +571,7 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, ) self.assertEqual( resource[1], @@ -595,20 +579,9 @@ def test_w_subfields(self): "name": "phone", "type": "RECORD", "mode": "REPEATED", - "description": None, "fields": [ - { - "name": "type", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "number", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, ], }, ) From 97ee6ec6cd4bc9f833cd506dc6d244d103654cfd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:53:09 -0500 Subject: [PATCH 046/230] fix: avoid overly strict dependency on pyarrow 3.x (#564) Exclude "bignumeric_type" from the "all" extra --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 31b6a3ff7..99d3804ed 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ "grpcio >= 1.32.0, < 2.0dev", "pyarrow >= 1.0.0, < 4.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ @@ -61,6 +61,11 @@ all_extras = [] for extra in extras: + # Exclude this extra from all to avoid overly strict dependencies on core + # libraries such as pyarrow. + # https://github.com/googleapis/python-bigquery/issues/563 + if extra in {"bignumeric_type"}: + continue all_extras.extend(extras[extra]) extras["all"] = all_extras From d93986e0259952257f2571f60719b52099c29c0c Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 22 Mar 2021 03:54:39 -0500 Subject: [PATCH 047/230] feat: add `ExternalConfig.connection_id` property to connect to external sources (#560) * feat: add `ExternalConfig.connection_id` property to connect to external sources * add tests * fix unit tests --- google/cloud/bigquery/external_config.py | 17 +++++++++++++++++ tests/unit/test_external_config.py | 8 ++++++++ 2 files changed, 25 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 112dfdba4..59e4960f9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -760,6 +760,23 @@ def schema(self): prop = self._properties.get("schema", {}) return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + @property + def connection_id(self): + """Optional[str]: [Experimental] ID of a BigQuery Connection API + resource. + + .. WARNING:: + + This feature is experimental. Pre-GA features may have limited + support, and changes to pre-GA features may not be compatible with + other pre-GA versions. + """ + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value): + self._properties["connectionId"] = value + @schema.setter def schema(self, value): prop = value diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 4ca2e9012..648a8717e 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -74,6 +74,7 @@ def test_to_api_repr_base(self): ec.autodetect = True ec.ignore_unknown_values = False ec.compression = "compression" + ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { @@ -87,10 +88,17 @@ def test_to_api_repr_base(self): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "connectionId": "path/to/connection", "schema": exp_schema, } self.assertEqual(got_resource, exp_resource) + def test_connection_id(self): + ec = external_config.ExternalConfig("") + self.assertIsNone(ec.connection_id) + ec.connection_id = "path/to/connection" + self.assertEqual(ec.connection_id, "path/to/connection") + def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None From d2d532c4949b8e3ca674d90c24daafeaa7bb2bce Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Mon, 22 Mar 2021 07:41:26 -0700 Subject: [PATCH 048/230] chore(python): add kokoro configs for periodic builds against head (#565) This change should be non-destructive. Note for library repo maintainers: After applying this change, you can easily add (or change) periodic builds against head by adding config files in google3. See python-pubsub repo for example. Source-Author: Takashi Matsuo Source-Date: Fri Mar 19 11:17:59 2021 -0700 Source-Repo: googleapis/synthtool Source-Sha: 79c8dd7ee768292f933012d3a69a5b4676404cda Source-Link: https://github.com/googleapis/synthtool/commit/79c8dd7ee768292f933012d3a69a5b4676404cda --- .kokoro/samples/python3.6/periodic-head.cfg | 11 +++ .kokoro/samples/python3.7/periodic-head.cfg | 11 +++ .kokoro/samples/python3.8/periodic-head.cfg | 11 +++ .kokoro/test-samples-against-head.sh | 28 ++++++ .kokoro/test-samples-impl.sh | 102 ++++++++++++++++++++ .kokoro/test-samples.sh | 96 +++--------------- synth.metadata | 11 ++- 7 files changed, 187 insertions(+), 83 deletions(-) create mode 100644 .kokoro/samples/python3.6/periodic-head.cfg create mode 100644 .kokoro/samples/python3.7/periodic-head.cfg create mode 100644 .kokoro/samples/python3.8/periodic-head.cfg create mode 100755 .kokoro/test-samples-against-head.sh create mode 100755 .kokoro/test-samples-impl.sh diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.6/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.7/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.8/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh new file mode 100755 index 000000000..689948a23 --- /dev/null +++ b/.kokoro/test-samples-against-head.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A customized test runner for samples. +# +# For periodic builds, you can specify this file for testing against head. + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +cd github/python-bigquery + +exec .kokoro/test-samples-impl.sh diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh new file mode 100755 index 000000000..cf5de74c1 --- /dev/null +++ b/.kokoro/test-samples-impl.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# `-e` enables the script to automatically fail when a command fails +# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero +set -eo pipefail +# Enables `**` to include files nested inside sub-folders +shopt -s globstar + +# Exit early if samples directory doesn't exist +if [ ! -d "./samples" ]; then + echo "No tests run. `./samples` not found" + exit 0 +fi + +# Disable buffering, so that the logs stream through. +export PYTHONUNBUFFERED=1 + +# Debug: show build environment +env | grep KOKORO + +# Install nox +python3.6 -m pip install --upgrade --quiet nox + +# Use secrets acessor service account to get secrets +if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then + gcloud auth activate-service-account \ + --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ + --project="cloud-devrel-kokoro-resources" +fi + +# This script will create 3 files: +# - testing/test-env.sh +# - testing/service-account.json +# - testing/client-secrets.json +./scripts/decrypt-secrets.sh + +source ./testing/test-env.sh +export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json + +# For cloud-run session, we activate the service account for gcloud sdk. +gcloud auth activate-service-account \ + --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" + +export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json + +echo -e "\n******************** TESTING PROJECTS ********************" + +# Switch to 'fail at end' to allow all tests to complete before exiting. +set +e +# Use RTN to return a non-zero value if the test fails. +RTN=0 +ROOT=$(pwd) +# Find all requirements.txt in the samples directory (may break on whitespace). +for file in samples/**/requirements.txt; do + cd "$ROOT" + # Navigate to the project folder. + file=$(dirname "$file") + cd "$file" + + echo "------------------------------------------------------------" + echo "- testing $file" + echo "------------------------------------------------------------" + + # Use nox to execute the tests for the project. + python3.6 -m nox -s "$RUN_TESTS_SESSION" + EXIT=$? + + # If this is a periodic build, send the test log to the FlakyBot. + # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot + $KOKORO_GFILE_DIR/linux_amd64/flakybot + fi + + if [[ $EXIT -ne 0 ]]; then + RTN=1 + echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" + else + echo -e "\n Testing completed.\n" + fi + +done +cd "$ROOT" + +# Workaround for Kokoro permissions issue: delete secrets +rm testing/{test-env.sh,client-secrets.json,service-account.json} + +exit "$RTN" diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh index 3ce8994cb..62ef534cd 100755 --- a/.kokoro/test-samples.sh +++ b/.kokoro/test-samples.sh @@ -13,6 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +# The default test runner for samples. +# +# For periodic builds, we rewinds the repo to the latest release, and +# run test-samples-impl.sh. # `-e` enables the script to automatically fail when a command fails # `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero @@ -24,87 +28,19 @@ cd github/python-bigquery # Run periodic samples tests at latest release if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then + # preserving the test runner implementation. + cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh" + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + echo "Now we rewind the repo back to the latest release..." LATEST_RELEASE=$(git describe --abbrev=0 --tags) git checkout $LATEST_RELEASE -fi - -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" - exit 0 -fi - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Debug: show build environment -env | grep KOKORO - -# Install nox -python3.6 -m pip install --upgrade --quiet nox - -# Use secrets acessor service account to get secrets -if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then - gcloud auth activate-service-account \ - --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \ - --project="cloud-devrel-kokoro-resources" -fi - -# This script will create 3 files: -# - testing/test-env.sh -# - testing/service-account.json -# - testing/client-secrets.json -./scripts/decrypt-secrets.sh - -source ./testing/test-env.sh -export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json - -# For cloud-run session, we activate the service account for gcloud sdk. -gcloud auth activate-service-account \ - --key-file "${GOOGLE_APPLICATION_CREDENTIALS}" - -export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json - -echo -e "\n******************** TESTING PROJECTS ********************" - -# Switch to 'fail at end' to allow all tests to complete before exiting. -set +e -# Use RTN to return a non-zero value if the test fails. -RTN=0 -ROOT=$(pwd) -# Find all requirements.txt in the samples directory (may break on whitespace). -for file in samples/**/requirements.txt; do - cd "$ROOT" - # Navigate to the project folder. - file=$(dirname "$file") - cd "$file" - - echo "------------------------------------------------------------" - echo "- testing $file" - echo "------------------------------------------------------------" - - # Use nox to execute the tests for the project. - python3.6 -m nox -s "$RUN_TESTS_SESSION" - EXIT=$? - - # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. - if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then - chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot - $KOKORO_GFILE_DIR/linux_amd64/flakybot + echo "The current head is: " + echo $(git rev-parse --verify HEAD) + echo "--- IMPORTANT IMPORTANT IMPORTANT ---" + # move back the test runner implementation if there's no file. + if [ ! -f .kokoro/test-samples-impl.sh ]; then + cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh fi +fi - if [[ $EXIT -ne 0 ]]; then - RTN=1 - echo -e "\n Testing failed: Nox returned a non-zero exit code. \n" - else - echo -e "\n Testing completed.\n" - fi - -done -cd "$ROOT" - -# Workaround for Kokoro permissions issue: delete secrets -rm testing/{test-env.sh,client-secrets.json,service-account.json} - -exit "$RTN" +exec .kokoro/test-samples-impl.sh diff --git a/synth.metadata b/synth.metadata index 9f81d3045..2425b03fb 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "efdf1c653770f7c03c17e31e3c2f279bb685637b" + "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "2c54c473779ea731128cea61a3a6c975a08a5378" + "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" } } ], @@ -74,16 +74,21 @@ ".kokoro/samples/lint/presubmit.cfg", ".kokoro/samples/python3.6/common.cfg", ".kokoro/samples/python3.6/continuous.cfg", + ".kokoro/samples/python3.6/periodic-head.cfg", ".kokoro/samples/python3.6/periodic.cfg", ".kokoro/samples/python3.6/presubmit.cfg", ".kokoro/samples/python3.7/common.cfg", ".kokoro/samples/python3.7/continuous.cfg", + ".kokoro/samples/python3.7/periodic-head.cfg", ".kokoro/samples/python3.7/periodic.cfg", ".kokoro/samples/python3.7/presubmit.cfg", ".kokoro/samples/python3.8/common.cfg", ".kokoro/samples/python3.8/continuous.cfg", + ".kokoro/samples/python3.8/periodic-head.cfg", ".kokoro/samples/python3.8/periodic.cfg", ".kokoro/samples/python3.8/presubmit.cfg", + ".kokoro/test-samples-against-head.sh", + ".kokoro/test-samples-impl.sh", ".kokoro/test-samples.sh", ".kokoro/trampoline.sh", ".kokoro/trampoline_v2.sh", From 8f274e8fad7308eca09c055d17d31f58fdc86909 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 22 Mar 2021 09:49:45 -0500 Subject: [PATCH 049/230] chore: release 2.13.0 (#568) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5ed7bc9d..9cdcdf5fb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) + + +### Features + +* add `ExternalConfig.connection_id` property to connect to external sources ([#560](https://www.github.com/googleapis/python-bigquery/issues/560)) ([d93986e](https://www.github.com/googleapis/python-bigquery/commit/d93986e0259952257f2571f60719b52099c29c0c)) + + +### Bug Fixes + +* avoid overly strict dependency on pyarrow 3.x ([#564](https://www.github.com/googleapis/python-bigquery/issues/564)) ([97ee6ec](https://www.github.com/googleapis/python-bigquery/commit/97ee6ec6cd4bc9f833cd506dc6d244d103654cfd)) +* avoid policy tags 403 error in `load_table_from_dataframe` ([#557](https://www.github.com/googleapis/python-bigquery/issues/557)) ([84e646e](https://www.github.com/googleapis/python-bigquery/commit/84e646e6b7087a1626e56ad51eeb130f4ddfa2fb)) + ## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 67e043bde..b6000e20f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.12.0" +__version__ = "2.13.0" From a3edb8b921e029e2c03d33302d408ad5d4e9d4ad Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 23 Mar 2021 15:06:24 -0500 Subject: [PATCH 050/230] fix: add ConnectionError to default retry (#571) --- google/cloud/bigquery/retry.py | 8 ++++---- setup.py | 1 + testing/constraints-3.6.txt | 1 + tests/unit/test_retry.py | 9 +++++++++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 4bc4b757f..20a8e7b13 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +import requests.exceptions _RETRYABLE_REASONS = frozenset( @@ -21,9 +22,11 @@ ) _UNSTRUCTURED_RETRYABLE_TYPES = ( + ConnectionError, exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ConnectionError, ) @@ -33,10 +36,7 @@ def _should_retry(exc): We retry if and only if the 'reason' is 'backendError' or 'rateLimitExceeded'. """ - if not hasattr(exc, "errors"): - return False - - if len(exc.errors) == 0: + if not hasattr(exc, "errors") or len(exc.errors) == 0: # Check for unstructured error returns, e.g. from GFE return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) diff --git a/setup.py b/setup.py index 99d3804ed..12a9bde31 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "google-resumable-media >= 0.6.0, < 2.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", + "requests >= 2.18.0, < 3.0.0dev", ] extras = { "bqstorage": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index c4a5c51be..322373eba 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -17,5 +17,6 @@ pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==1.0.0 +requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index d9f867cb3..318a54d34 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -15,6 +15,7 @@ import unittest import mock +import requests.exceptions class Test_should_retry(unittest.TestCase): @@ -42,6 +43,14 @@ def test_w_rateLimitExceeded(self): exc = mock.Mock(errors=[{"reason": "rateLimitExceeded"}], spec=["errors"]) self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_connectionerror(self): + exc = ConnectionError() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_connectionerror(self): + exc = requests.exceptions.ConnectionError() + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From 0fc3a55eb3051ed114f8b3d3d8cdec054994cd84 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 23 Mar 2021 21:06:52 +0100 Subject: [PATCH 051/230] chore(deps): update dependency google-cloud-bigquery to v2.13.0 (#570) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ef9264454..c5f60911e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index db1c4b66a..abbe6fde4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.12.0 +google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.3 grpcio==1.36.1 From e175d3a26f68e1bc5148bf055089dbfc1b83c76a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 23 Mar 2021 20:20:07 +0000 Subject: [PATCH 052/230] chore: release 2.13.1 (#572) :robot: I have created a release \*beep\* \*boop\* --- ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ### Bug Fixes * add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cdcdf5fb..5dc2c8838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) + + +### Bug Fixes + +* add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad)) + ## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index b6000e20f..2330d0c2c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.0" +__version__ = "2.13.1" From 907a1e08007d6f71ddec3a2259631cf476f7d311 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 31 Mar 2021 08:00:39 -0700 Subject: [PATCH 053/230] chore(deps): update precommit hook pycqa/flake8 to v3.9.0 (#574) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pycqa/flake8](https://gitlab.com/pycqa/flake8) | repository | minor | `3.8.4` -> `3.9.0` | --- ### Release Notes
pycqa/flake8 ### [`v3.9.0`](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0) [Compare Source](https://gitlab.com/pycqa/flake8/compare/3.8.4...3.9.0)
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Author: WhiteSource Renovate Source-Date: Tue Mar 23 17:38:03 2021 +0100 Source-Repo: googleapis/synthtool Source-Sha: f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 Source-Link: https://github.com/googleapis/synthtool/commit/f5c5904fb0c6aa3b3730eadf4e5a4485afc65726 --- .pre-commit-config.yaml | 2 +- synth.metadata | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9024b15d..32302e488 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 + rev: 3.9.0 hooks: - id: flake8 diff --git a/synth.metadata b/synth.metadata index 2425b03fb..3b34bf519 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "84e646e6b7087a1626e56ad51eeb130f4ddfa2fb" + "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "79c8dd7ee768292f933012d3a69a5b4676404cda" + "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" } } ], From 1756f404c5201645bedda43d5cf06d469acd30c0 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:01:09 +0200 Subject: [PATCH 054/230] chore(deps): update dependency google-auth-oauthlib to v0.4.4 (#578) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index abbe6fde4..9f6073c8f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 -google-auth-oauthlib==0.4.3 +google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From b425f7ccb9f67224a309924896d2faf611c633c9 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 31 Mar 2021 17:38:03 +0200 Subject: [PATCH 055/230] chore(deps): update dependency matplotlib to v3.4.1 (#576) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [matplotlib](https://matplotlib.org) ([source](https://togithub.com/matplotlib/matplotlib)) | `==3.3.4` -> `==3.4.1` | [![age](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/compatibility-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/matplotlib/3.4.1/confidence-slim/3.3.4)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
matplotlib/matplotlib ### [`v3.4.1`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.1) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.4.0...v3.4.1) This is the first bugfix release of the 3.4.x series. This release contains several critical bug-fixes: - fix errorbar when specifying fillstyle - fix Inkscape cleanup at exit on Windows for tests - fix legends of colour-mapped scatter plots - fix positioning of annotation fancy arrows - fix size and color rendering for 3D scatter plots - fix suptitle manual positioning when using constrained layout - respect antialiasing settings in cairo backends as well ### [`v3.4.0`](https://togithub.com/matplotlib/matplotlib/releases/v3.4.0) [Compare Source](https://togithub.com/matplotlib/matplotlib/compare/v3.3.4...v3.4.0) Highlights of this release include: - Figure and Axes creation / management - New subfigure functionality - Single-line string notation for `subplot_mosaic` - Changes to behavior of Axes creation methods (`gca`, `add_axes`, `add_subplot`) - `add_subplot`/`add_axes` gained an _axes_class_ parameter - Subplot and subplot2grid can now work with constrained layout - Plotting methods - `axline` supports transform parameter - New automatic labeling for bar charts - A list of hatches can be specified to `bar` and `barh` - Setting `BarContainer` orientation - Contour plots now default to using `ScalarFormatter` - `Axes.errorbar` cycles non-color properties correctly - `errorbar` _errorevery_ parameter matches _markevery_ - `hexbin` supports data reference for _C_ parameter - Support callable for formatting of Sankey labels - `Axes.spines` access shortcuts - New `stairs` method and `StepPatch` artist - Added _orientation_ parameter for stem plots - Angles on Bracket arrow styles - `TickedStroke` patheffect - Colors and colormaps - Collection color specification and mapping - Transparency (alpha) can be set as an array in collections - pcolormesh has improved transparency handling by enabling snapping - IPython representations for Colormap objects - `Colormap.set_extremes` and `Colormap.with_extremes` - Get under/over/bad colors of Colormap objects - New `cm.unregister_cmap` function - New `CenteredNorm` for symmetrical data around a center - New `FuncNorm` for arbitrary normalizations - GridSpec-based colorbars can now be positioned above or to the left of the main axes - Titles, ticks, and labels - supxlabel and supylabel - Shared-axes `subplots` tick label visibility is now correct for top or left labels - An iterable object with labels can be passed to `Axes.plot` - Fonts and Text - Text transform can rotate text direction - `matplotlib.mathtext` now supports _overset_ and _underset_ LaTeX symbols - _math_fontfamily_ parameter to change `Text` font family - `TextArea`/`AnchoredText` support _horizontalalignment_ - PDF supports URLs on Text artists - rcParams improvements - New rcParams for dates: set converter and whether to use interval_multiples - Date formatters now respect _usetex_ rcParam - Setting _image.cmap_ to a Colormap - Tick and tick label colors can be set independently using rcParams - 3D Axes improvements - Errorbar method in 3D Axes - Stem plots in 3D Axes - 3D Collection properties are now modifiable - Panning in 3D Axes - Interactive tool improvements - New `RangeSlider` widget - Sliders can now snap to arbitrary values - Pausing and Resuming Animations - Sphinx extensions - `plot_directive` _caption_ option - Backend-specific improvements - Consecutive rasterized draws now merged - Support raw/rgba frame format in `FFMpegFileWriter` - nbAgg/WebAgg support middle-click and double-click - nbAgg support binary communication - Indexed color for PNG images in PDF files when possible - Improved font subsettings in PDF/PS - Kerning added to strings in PDFs - Fully-fractional HiDPI in QtAgg - wxAgg supports fullscreen toggle
--- ### Renovate configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9f6073c8f..6024d7655 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -4,7 +4,8 @@ google-auth-oauthlib==0.4.4 grpcio==1.36.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' -matplotlib==3.3.4 +matplotlib==3.3.4; python_version < '3.7' +matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' pyarrow==3.0.0 From 14eb2da54ae46c5a0947f04540f8fcb86a2c2cdc Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 5 Apr 2021 15:47:58 +0200 Subject: [PATCH 056/230] chore: loosen opentelemetry dependencies (#587) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 12a9bde31..607ffb63f 100644 --- a/setup.py +++ b/setup.py @@ -53,9 +53,9 @@ "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ - "opentelemetry-api==0.11b0", - "opentelemetry-sdk==0.11b0", - "opentelemetry-instrumentation==0.11b0", + "opentelemetry-api >= 0.11b0", + "opentelemetry-sdk >= 0.11b0", + "opentelemetry-instrumentation >= 0.11b0", ], } From c1195147a6e9220f26558a301427dd447646da3a Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 6 Apr 2021 04:26:17 -0600 Subject: [PATCH 057/230] chore: use gcp-sphinx-docfx-yaml (#584) Porting change in https://github.com/googleapis/synthtool/pull/1011 --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index df36d237e..a738d8c00 100644 --- a/noxfile.py +++ b/noxfile.py @@ -275,7 +275,7 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml") + session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From 973e23649b59973494e5c706504bc833453155a8 Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Wed, 7 Apr 2021 09:01:43 -0700 Subject: [PATCH 058/230] chore: Add license headers for python config files (#592) Source-Author: Anthonios Partheniou Source-Date: Tue Apr 6 11:32:03 2021 -0400 Source-Repo: googleapis/synthtool Source-Sha: 5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc Source-Link: https://github.com/googleapis/synthtool/commit/5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc --- .pre-commit-config.yaml | 14 ++++++++++++++ docs/conf.py | 13 +++++++++++++ synth.metadata | 6 +++--- 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 32302e488..8912e9b5d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# # See https://pre-commit.com for more information # See https://pre-commit.com/hooks.html for more hooks repos: diff --git a/docs/conf.py b/docs/conf.py index 37e0c46af..fdea01aad 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,17 @@ # -*- coding: utf-8 -*- +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. # # google-cloud-bigquery documentation build configuration file # diff --git a/synth.metadata b/synth.metadata index 3b34bf519..114359b88 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "e175d3a26f68e1bc5148bf055089dbfc1b83c76a" + "sha": "c1195147a6e9220f26558a301427dd447646da3a" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "f5c5904fb0c6aa3b3730eadf4e5a4485afc65726" + "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" } } ], From be4961257f077b96b595cfcd6553650bd4c618ad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 7 Apr 2021 20:14:18 +0200 Subject: [PATCH 059/230] chore(deps): update dependency grpcio to v1.37.0 (#596) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6024d7655..734cdf445 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.13.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 -grpcio==1.36.1 +grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 8f4c0b84dac3840532d7865247b8ad94b625b897 Mon Sep 17 00:00:00 2001 From: Kevin Deggelman Date: Thu, 8 Apr 2021 07:16:06 -0700 Subject: [PATCH 060/230] docs: update the description of the return value of `_QueryResults.rows()` (#594) Updated the description of the return value of `rows` to be more accurate. --- google/cloud/bigquery/query.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 42547cd73..495c4effb 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -815,7 +815,7 @@ def total_rows(self): https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows Returns: - Optional[int}: Count generated on the server (None until set by the server). + Optional[int]: Count generated on the server (None until set by the server). """ total_rows = self._properties.get("totalRows") if total_rows is not None: @@ -858,7 +858,7 @@ def rows(self): Returns: Optional[List[google.cloud.bigquery.table.Row]]: - Fields describing the schema (None until set by the server). + Rows containing the results of the query. """ return _rows_from_json(self._properties.get("rows", ()), self.schema) From c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Mon, 12 Apr 2021 07:20:24 -0600 Subject: [PATCH 061/230] feat: accept DatasetListItem where DatasetReference is accepted (#597) * split out and pytestify list_tables tests. Also, exercise dataset polymorphism in some of the tests. * list_tables now accepts DatasetListItem objects * Get coverage to 100% But why do we run coverage on test code? * lint * Update exception text for DatasetListItem * Bypass opentelemetry tracing in unit tests. * Got rid of opentelemetry tracing checks. They aren't needed. * abstracted dataset-argument handling And applied it to `list_tables` and `list_models`. * Converted list_model tests to pytest and included check for dataset polymorphism * removed unneeded blanl lines. * Made list_routines accept DatasetListItem and conveted list_routines tests to pytest. * create_dataset accepts DatasetListItem Also converted create_dataset tests to pytest. (And fixed some long lines.) * Converted list_routine tests to pytest * include string dataset representation in dataset polymorphism. * removed some unused imports * Updated delete_dataset tests - Polymorphoc on dataset - pytest * black * lint * We don't actually need to avoid opentelemetry And a 3.6 test dependened on it. * fixed docstrings to include DatasetListItem in dataset polymorphic APIs. --- google/cloud/bigquery/client.py | 61 +- tests/unit/conftest.py | 23 + tests/unit/helpers.py | 49 + tests/unit/test_client.py | 3266 +++++++++++------------------ tests/unit/test_create_dataset.py | 349 +++ tests/unit/test_delete_dataset.py | 64 + tests/unit/test_list_models.py | 72 + tests/unit/test_list_routines.py | 75 + tests/unit/test_list_tables.py | 145 ++ 9 files changed, 2005 insertions(+), 2099 deletions(-) create mode 100644 tests/unit/conftest.py create mode 100644 tests/unit/test_create_dataset.py create mode 100644 tests/unit/test_delete_dataset.py create mode 100644 tests/unit/test_list_models.py create mode 100644 tests/unit/test_list_routines.py create mode 100644 tests/unit/test_list_tables.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 305d60d3b..10127e10d 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -449,6 +449,22 @@ def _create_bqstorage_client(self): return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + def _dataset_from_arg(self, dataset): + if isinstance(dataset, str): + dataset = DatasetReference.from_string( + dataset, default_project=self.project + ) + + if not isinstance(dataset, (Dataset, DatasetReference)): + if isinstance(dataset, DatasetListItem): + dataset = dataset.reference + else: + raise TypeError( + "dataset must be a Dataset, DatasetReference, DatasetListItem," + " or string" + ) + return dataset + def create_dataset( self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None ): @@ -461,6 +477,7 @@ def create_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A :class:`~google.cloud.bigquery.dataset.Dataset` to create. @@ -491,10 +508,7 @@ def create_dataset( >>> dataset = client.create_dataset(dataset) """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) + dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): dataset = Dataset(dataset) @@ -1133,6 +1147,7 @@ def list_models( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose models to list from the @@ -1160,13 +1175,7 @@ def list_models( :class:`~google.cloud.bigquery.model.Model` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") + dataset = self._dataset_from_arg(dataset) path = "%s/models" % dataset.path span_attributes = {"path": path} @@ -1210,6 +1219,7 @@ def list_routines( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose routines to list from the @@ -1237,14 +1247,7 @@ def list_routines( :class:`~google.cloud.bigquery.routine.Routine`s contained within the requested dataset, limited by ``max_results``. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "{}/routines".format(dataset.path) span_attributes = {"path": path} @@ -1288,6 +1291,7 @@ def list_tables( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset whose tables to list from the @@ -1315,14 +1319,7 @@ def list_tables( :class:`~google.cloud.bigquery.table.TableListItem` contained within the requested dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset, DatasetReference, or string") - + dataset = self._dataset_from_arg(dataset) path = "%s/tables" % dataset.path span_attributes = {"path": path} @@ -1365,6 +1362,7 @@ def delete_dataset( dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ google.cloud.bigquery.dataset.DatasetReference, \ + google.cloud.bigquery.dataset.DatasetListItem, \ str, \ ]): A reference to the dataset to delete. If a string is passed @@ -1384,14 +1382,7 @@ def delete_dataset( Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the dataset. """ - if isinstance(dataset, str): - dataset = DatasetReference.from_string( - dataset, default_project=self.project - ) - - if not isinstance(dataset, (Dataset, DatasetReference)): - raise TypeError("dataset must be a Dataset or a DatasetReference") - + dataset = self._dataset_from_arg(dataset) params = {} path = dataset.path if delete_contents: diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py new file mode 100644 index 000000000..07fc9b4ad --- /dev/null +++ b/tests/unit/conftest.py @@ -0,0 +1,23 @@ +import pytest + +from .helpers import make_client + + +@pytest.fixture +def client(): + yield make_client() + + +@pytest.fixture +def PROJECT(): + yield "PROJECT" + + +@pytest.fixture +def DS_ID(): + yield "DATASET_ID" + + +@pytest.fixture +def LOCATION(): + yield "us-central" diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py index b51b0bbb7..67aeaca35 100644 --- a/tests/unit/helpers.py +++ b/tests/unit/helpers.py @@ -12,6 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. +import google.cloud.bigquery.client +import google.cloud.bigquery.dataset +import mock +import pytest + def make_connection(*responses): import google.cloud.bigquery._http @@ -31,3 +36,47 @@ def _to_pyarrow(value): import pyarrow return pyarrow.array([value])[0] + + +def make_client(project="PROJECT", **kw): + credentials = mock.Mock(spec=google.auth.credentials.Credentials) + return google.cloud.bigquery.client.Client(project, credentials, **kw) + + +def make_dataset_reference_string(project, ds_id): + return f"{project}.{ds_id}" + + +def make_dataset(project, ds_id): + return google.cloud.bigquery.dataset.Dataset( + google.cloud.bigquery.dataset.DatasetReference(project, ds_id) + ) + + +def make_dataset_list_item(project, ds_id): + return google.cloud.bigquery.dataset.DatasetListItem( + dict(datasetReference=dict(projectId=project, datasetId=ds_id)) + ) + + +def identity(x): + return x + + +def get_reference(x): + return x.reference + + +dataset_like = [ + (google.cloud.bigquery.dataset.DatasetReference, identity), + (make_dataset, identity), + (make_dataset_list_item, get_reference), + ( + make_dataset_reference_string, + google.cloud.bigquery.dataset.DatasetReference.from_string, + ), +] + +dataset_polymorphic = pytest.mark.parametrize( + "make_dataset,get_reference", dataset_like +) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 26ef340de..96e51678f 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -856,2505 +856,1643 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." - def test_create_dataset_minimal(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_routine_w_minimal_resource(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - } creds = _make_credentials() + path = "/projects/test-routine-project/datasets/test_routines/routines" + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) + conn = client._connection = make_connection(resource) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + actual_routine = client.create_routine(routine, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) + final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % PATH, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - }, - timeout=7.5, + method="POST", path=path, data=resource, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, RoutineReference.from_string(full_routine_id) ) - def test_create_dataset_w_attrs(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry + def test_create_routine_w_conflict(self): + from google.cloud.bigquery.routine import Routine - PATH = "projects/%s/datasets" % self.PROJECT - DESCRIPTION = "DESC" - FRIENDLY_NAME = "FN" - LOCATION = "US" - USER_EMAIL = "phred@example.com" - LABELS = {"color": "red"} - VIEW = { - "projectId": "my-proj", - "datasetId": "starry-skies", - "tableId": "northern-hemisphere", + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.create_routine(routine) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "labels": LABELS, - "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") + def test_span_status_is_set(self): + from google.cloud.bigquery.routine import Routine + + tracer_provider = TracerProvider() + memory_exporter = InMemorySpanExporter() + span_processor = SimpleExportSpanProcessor(memory_exporter) + tracer_provider.add_span_processor(span_processor) + trace.set_tracer_provider(tracer_provider) + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists") + ) + path = "/projects/test-routine-project/datasets/test_routines/routines" + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_routine(routine) + + span_list = memory_exporter.get_finished_spans() + self.assertTrue(span_list[0].status is not None) + + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } } + conn.api_request.assert_called_once_with( + method="POST", path=path, data=resource, timeout=None, + ) + + def test_create_routine_w_conflict_exists_ok(self): + from google.cloud.bigquery.routine import Routine + creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE) - entries = [ - AccessEntry("OWNER", "userByEmail", USER_EMAIL), - AccessEntry(None, "view", VIEW), - ] + resource = { + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", + } + } + path = "/projects/test-routine-project/datasets/test_routines/routines" - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.access_entries = entries - before.description = DESCRIPTION - before.friendly_name = FRIENDLY_NAME - before.default_table_expiration_ms = 3600 - before.location = LOCATION - before.labels = LABELS + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("routine already exists"), resource + ) + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) + actual_routine = client.create_routine(routine, exists_ok=True) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + final_attributes.assert_called_with( + {"path": "%s/minimal_routine" % path}, client, None + ) + + self.assertEqual(actual_routine.project, "test-routine-project") + self.assertEqual(actual_routine.dataset_id, "test_routines") + self.assertEqual(actual_routine.routine_id, "minimal_routine") + conn.api_request.assert_has_calls( + [ + mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="GET", + path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + timeout=None, + ), + ] + ) + + def test_create_table_w_day_partition(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + got = client.create_table(table, timeout=7.5) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.description, DESCRIPTION) - self.assertEqual(after.friendly_name, FRIENDLY_NAME) - self.assertEqual(after.location, LOCATION) - self.assertEqual(after.default_table_expiration_ms, 3600) - self.assertEqual(after.labels, LABELS) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": "3600", - "access": [ - {"role": "OWNER", "userByEmail": USER_EMAIL}, - {"view": VIEW}, - ], - "labels": LABELS, + "timePartitioning": {"type": "DAY"}, + "labels": {}, }, - timeout=None, + timeout=7.5, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_custom_property(self): + def test_create_table_w_custom_property(self): # The library should handle sending properties to the API that are not # yet part of the library - from google.cloud.bigquery.dataset import Dataset + from google.cloud.bigquery.table import Table - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" conn = client._connection = make_connection(resource) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before._properties["newAlphaProperty"] = "unreleased property" + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after._properties["newAlphaProperty"], "unreleased property") + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "newAlphaProperty": "unreleased property", "labels": {}, }, timeout=None, ) + self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_wo_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_encryption_configuration(self): + from google.cloud.bigquery.encryption_configuration import ( + EncryptionConfiguration, + ) + from google.cloud.bigquery.table import Table - PATH = "projects/%s/datasets" % self.PROJECT - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.encryption_configuration = EncryptionConfiguration( + kms_key_name=self.KMS_KEY_NAME ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, + "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_client_location_w_dataset_location(self): - from google.cloud.bigquery.dataset import Dataset + def test_create_table_w_day_partition_and_expire(self): + from google.cloud.bigquery.table import Table + from google.cloud.bigquery.table import TimePartitioning - PATH = "projects/%s/datasets" % self.PROJECT - OTHER_LOCATION = "EU" - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": OTHER_LOCATION, - } + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection(RESOURCE) - - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - before = Dataset(ds_ref) - before.location = OTHER_LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.time_partitioning = TimePartitioning(expiration_ms=100) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - after = client.create_dataset(before) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + got = client.create_table(table) - self.assertEqual(after.dataset_id, self.DS_ID) - self.assertEqual(after.project, self.PROJECT) - self.assertEqual(after.etag, RESOURCE["etag"]) - self.assertEqual(after.full_dataset_id, RESOURCE["id"]) - self.assertEqual(after.location, OTHER_LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path="/%s" % PATH, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, + "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, - "location": OTHER_LOCATION, }, timeout=None, ) + self.assertEqual(table.time_partitioning.type_, "DAY") + self.assertEqual(table.time_partitioning.expiration_ms, 100) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_w_reference(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_schema_and_query(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + }, + "view": {"query": query}, + } ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.view_query = query + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] }, + "view": {"query": query, "useLegacySql": False}, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.schema, schema) + self.assertEqual(got.view_query, query) - def test_create_dataset_w_fully_qualified_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_external(self): + from google.cloud.bigquery.external_config import ExternalConfig + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.table import Table + + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + resource.update( + { + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, + } + } ) conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + ec = ExternalConfig("CSV") + ec.autodetect = True + table.external_data_configuration = ec + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID)) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(table) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "externalDataConfiguration": { + "sourceFormat": SourceFormat.CSV, + "autodetect": True, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual( + got.external_data_configuration.source_format, SourceFormat.CSV + ) + self.assertEqual(got.external_data_configuration.autodetect, True) - def test_create_dataset_w_string(self): - path = "/projects/%s/datasets" % self.PROJECT - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "%s:%s" % (self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_reference(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table(self.TABLE_REF) - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, + ) conn.api_request.assert_called_once_with( method="POST", - path=path, + path="/%s" % path, data={ - "datasetReference": { + "tableReference": { "projectId": self.PROJECT, "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, }, "labels": {}, - "location": self.LOCATION, }, timeout=None, ) + self.assertEqual(got.table_id, self.TABLE_ID) - def test_create_dataset_alreadyexists_w_exists_ok_false(self): - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_dataset(self.DS_ID) - - def test_create_dataset_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets".format(self.PROJECT) - get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "id": "{}:{}".format(self.PROJECT, self.DS_ID), - "location": self.LOCATION, - } + def test_create_table_w_fully_qualified_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("dataset already exists"), resource - ) + client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() + conn = client._connection = make_connection(resource) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - dataset = client.create_dataset(self.DS_ID, exists_ok=True) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset.etag, resource["etag"]) - self.assertEqual(dataset.full_dataset_id, resource["id"]) - self.assertEqual(dataset.location, self.LOCATION) + got = client.create_table( + "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) + ) - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "datasetReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - }, - "labels": {}, - "location": self.LOCATION, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_minimal_resource(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_w_string(self): + path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) creds = _make_credentials() - path = "/projects/test-routine-project/datasets/test_routines/routines" - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } client = self._make_one(project=self.PROJECT, credentials=creds) + resource = self._make_table_resource() conn = client._connection = make_connection(resource) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) + got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, RoutineReference.from_string(full_routine_id) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, + client, + None, ) - def test_create_routine_w_conflict(self): - from google.cloud.bigquery.routine import Routine + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, + ) + self.assertEqual(got.table_id, self.TABLE_ID) + def test_create_table_alreadyexists_w_exists_ok_false(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + google.api_core.exceptions.AlreadyExists("table already exists") ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) with pytest.raises(google.api_core.exceptions.AlreadyExists): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.create_routine(routine) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, - ) - - @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") - def test_span_status_is_set(self): - from google.cloud.bigquery.routine import Routine - - tracer_provider = TracerProvider() - memory_exporter = InMemorySpanExporter() - span_processor = SimpleExportSpanProcessor(memory_exporter) - tracer_provider.add_span_processor(span_processor) - trace.set_tracer_provider(tracer_provider) + client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists") + final_attributes.assert_called_with( + {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, ) - path = "/projects/test-routine-project/datasets/test_routines/routines" - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - client.create_routine(routine) - - span_list = memory_exporter.get_finished_spans() - self.assertTrue(span_list[0].status is not None) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, + timeout=None, ) - def test_create_routine_w_conflict_exists_ok(self): - from google.cloud.bigquery.routine import Routine - + def test_create_table_alreadyexists_w_exists_ok_true(self): + post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + get_path = "/projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID + ) + resource = self._make_table_resource() creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = { - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - } - } - path = "/projects/test-routine-project/datasets/test_routines/routines" - + client = self._make_one( + project=self.PROJECT, credentials=creds, location=self.LOCATION + ) conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("routine already exists"), resource + google.api_core.exceptions.AlreadyExists("table already exists"), resource ) - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routine = Routine(full_routine_id) + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - actual_routine = client.create_routine(routine, exists_ok=True) + got = client.create_table( + "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True + ) - final_attributes.assert_called_with( - {"path": "%s/minimal_routine" % path}, client, None - ) + final_attributes.assert_called_with({"path": get_path}, client, None) + + self.assertEqual(got.project, self.PROJECT) + self.assertEqual(got.dataset_id, self.DS_ID) + self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(actual_routine.project, "test-routine-project") - self.assertEqual(actual_routine.dataset_id, "test_routines") - self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), mock.call( - method="GET", - path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", + method="POST", + path=post_path, + data={ + "tableReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "tableId": self.TABLE_ID, + }, + "labels": {}, + }, timeout=None, ), + mock.call(method="GET", path=get_path, timeout=None), ] ) - def test_create_table_w_day_partition(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + def test_close(self): + creds = _make_credentials() + http = mock.Mock() + http._auth_request.session = mock.Mock() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + client.close() + + http.close.assert_called_once() + http._auth_request.session.close.assert_called_once() + + def test_get_model(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning() + + model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table, timeout=7.5) + got = client.get_model(model_ref, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY"}, - "labels": {}, - }, - timeout=7.5, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.table import Table + self.assertEqual(got.model_id, self.MODEL_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_model_w_string(self): + path = "projects/%s/datasets/%s/models/%s" % ( + self.PROJECT, + self.DS_ID, + self.MODEL_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + } + } conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + got = client.get_model(model_id) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "newAlphaProperty": "unreleased property", - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=None ) - self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") - self.assertEqual(got.table_id, self.TABLE_ID) + self.assertEqual(got.model_id, self.MODEL_ID) - def test_create_table_w_encryption_configuration(self): - from google.cloud.bigquery.encryption_configuration import ( - EncryptionConfiguration, - ) - from google.cloud.bigquery.table import Table + def test_get_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineReference - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.encryption_configuration = EncryptionConfiguration( - kms_key_name=self.KMS_KEY_NAME - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, + full_routine_id = "test-routine-project.test_routines.minimal_routine" + routines = [ + full_routine_id, + Routine(full_routine_id), + RoutineReference.from_string(full_routine_id), + ] + for routine in routines: + creds = _make_credentials() + resource = { + "etag": "im-an-etag", + "routineReference": { + "projectId": "test-routine-project", + "datasetId": "test_routines", + "routineId": "minimal_routine", }, - "labels": {}, - "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_day_partition_and_expire(self): - from google.cloud.bigquery.table import Table - from google.cloud.bigquery.table import TimePartitioning + "routineType": "SCALAR_FUNCTION", + } + path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.time_partitioning = TimePartitioning(expiration_ms=100) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + actual_routine = client.get_routine(routine, timeout=7.5) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "timePartitioning": {"type": "DAY", "expirationMs": "100"}, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(table.time_partitioning.type_, "DAY") - self.assertEqual(table.time_partitioning.expiration_ms, 100) - self.assertEqual(got.table_id, self.TABLE_ID) + final_attributes.assert_called_once_with({"path": path}, client, None) - def test_create_table_w_schema_and_query(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=7.5, + ) + self.assertEqual( + actual_routine.reference, + RoutineReference.from_string(full_routine_id), + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.etag, + "im-an-etag", + msg="routine={}".format(repr(routine)), + ) + self.assertEqual( + actual_routine.type_, + "SCALAR_FUNCTION", + msg="routine={}".format(repr(routine)), + ) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - query = "SELECT * from %s:%s" % (self.DS_ID, self.TABLE_ID) + def test_get_table(self): + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INTEGER", mode="REQUIRED"), - ] conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.view_query = query - with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - got = client.create_table(table) + table = client.get_table(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None - ) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - }, - "view": {"query": query, "useLegacySql": False}, - "labels": {}, - }, - timeout=None, + method="GET", path="/%s" % path, timeout=7.5 ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.schema, schema) - self.assertEqual(got.view_query, query) - - def test_create_table_w_external(self): - from google.cloud.bigquery.external_config import ExternalConfig - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.table import Table + self.assertEqual(table.table_id, self.TABLE_ID) - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) + def test_get_table_sets_user_agent(self): creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - resource.update( - { - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - } - } + http = mock.create_autospec(requests.Session) + mock_response = http.request( + url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY ) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - ec = ExternalConfig("CSV") - ec.autodetect = True - table.external_data_configuration = ec - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(table) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": table.dataset_id}, client, None + http.reset_mock() + http.is_mtls = False + mock_response.status_code = 200 + mock_response.json.return_value = self._make_table_resource() + user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") + client = self._make_one( + project=self.PROJECT, + credentials=creds, + client_info=user_agent_override, + _http=http, ) - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "externalDataConfiguration": { - "sourceFormat": SourceFormat.CSV, - "autodetect": True, - }, - "labels": {}, + client.get_table(self.TABLE_REF) + + expected_user_agent = user_agent_override.to_user_agent() + http.request.assert_called_once_with( + url=mock.ANY, + method="GET", + headers={ + "X-Goog-API-Client": expected_user_agent, + "Accept-Encoding": "gzip", + "User-Agent": expected_user_agent, }, + data=mock.ANY, timeout=None, ) - self.assertEqual(got.table_id, self.TABLE_ID) - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual( - got.external_data_configuration.source_format, SourceFormat.CSV - ) - self.assertEqual(got.external_data_configuration.autodetect, True) - - def test_create_table_w_reference(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) + self.assertIn("my-application/1.2.3", expected_user_agent) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table(self.TABLE_REF) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_fully_qualified_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.TABLE_ID) - ) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_w_string(self): - path = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "dataset_id": self.TABLE_REF.dataset_id}, - client, - None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path="/%s" % path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - self.assertEqual(got.table_id, self.TABLE_ID) - - def test_create_table_alreadyexists_w_exists_ok_false(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists") - ) - - with pytest.raises(google.api_core.exceptions.AlreadyExists): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.create_table("{}.{}".format(self.DS_ID, self.TABLE_ID)) - - final_attributes.assert_called_with( - {"path": post_path, "dataset_id": self.TABLE_REF.dataset_id}, client, None, - ) - - conn.api_request.assert_called_once_with( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ) - - def test_create_table_alreadyexists_w_exists_ok_true(self): - post_path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) - get_path = "/projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID - ) - resource = self._make_table_resource() - creds = _make_credentials() - client = self._make_one( - project=self.PROJECT, credentials=creds, location=self.LOCATION - ) - conn = client._connection = make_connection( - google.api_core.exceptions.AlreadyExists("table already exists"), resource - ) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.create_table( - "{}.{}".format(self.DS_ID, self.TABLE_ID), exists_ok=True - ) - - final_attributes.assert_called_with({"path": get_path}, client, None) - - self.assertEqual(got.project, self.PROJECT) - self.assertEqual(got.dataset_id, self.DS_ID) - self.assertEqual(got.table_id, self.TABLE_ID) - - conn.api_request.assert_has_calls( - [ - mock.call( - method="POST", - path=post_path, - data={ - "tableReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": self.TABLE_ID, - }, - "labels": {}, - }, - timeout=None, - ), - mock.call(method="GET", path=get_path, timeout=None), - ] - ) - - def test_close(self): - creds = _make_credentials() - http = mock.Mock() - http._auth_request.session = mock.Mock() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - client.close() - - http.close.assert_called_once() - http._auth_request.session.close.assert_called_once() - - def test_get_model(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_ref = DatasetReference(self.PROJECT, self.DS_ID).model(self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_ref, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_model_w_string(self): - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - } - } - conn = client._connection = make_connection(resource) - - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - got = client.get_model(model_id) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None - ) - self.assertEqual(got.model_id, self.MODEL_ID) - - def test_get_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineReference - - full_routine_id = "test-routine-project.test_routines.minimal_routine" - routines = [ - full_routine_id, - Routine(full_routine_id), - RoutineReference.from_string(full_routine_id), - ] - for routine in routines: - creds = _make_credentials() - resource = { - "etag": "im-an-etag", - "routineReference": { - "projectId": "test-routine-project", - "datasetId": "test_routines", - "routineId": "minimal_routine", - }, - "routineType": "SCALAR_FUNCTION", - } - path = "/projects/test-routine-project/datasets/test_routines/routines/minimal_routine" - - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.get_routine(routine, timeout=7.5) - - final_attributes.assert_called_once_with({"path": path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path=path, timeout=7.5, - ) - self.assertEqual( - actual_routine.reference, - RoutineReference.from_string(full_routine_id), - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.etag, - "im-an-etag", - msg="routine={}".format(repr(routine)), - ) - self.assertEqual( - actual_routine.type_, - "SCALAR_FUNCTION", - msg="routine={}".format(repr(routine)), - ) - - def test_get_table(self): - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - resource = self._make_table_resource() - conn = client._connection = make_connection(resource) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 - ) - self.assertEqual(table.table_id, self.TABLE_ID) - - def test_get_table_sets_user_agent(self): - creds = _make_credentials() - http = mock.create_autospec(requests.Session) - mock_response = http.request( - url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY - ) - http.reset_mock() - http.is_mtls = False - mock_response.status_code = 200 - mock_response.json.return_value = self._make_table_resource() - user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3") - client = self._make_one( - project=self.PROJECT, - credentials=creds, - client_info=user_agent_override, - _http=http, - ) - - client.get_table(self.TABLE_REF) - - expected_user_agent = user_agent_override.to_user_agent() - http.request.assert_called_once_with( - url=mock.ANY, - method="GET", - headers={ - "X-Goog-API-Client": expected_user_agent, - "Accept-Encoding": "gzip", - "User-Agent": expected_user_agent, - }, - data=mock.ANY, - timeout=None, - ) - self.assertIn("my-application/1.2.3", expected_user_agent) - - def test_get_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - BODY = {"options": {"requestedPolicyVersion": 1}} - ETAG = "CARDI" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - RETURNED = { - "resourceId": PATH, - "etag": ETAG, - "version": VERSION, - "bindings": [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ], - } - EXPECTED = { - binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] - } - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - self.assertIsInstance(policy, Policy) - self.assertEqual(policy.etag, RETURNED["etag"]) - self.assertEqual(policy.version, RETURNED["version"]) - self.assertEqual(dict(policy), EXPECTED) - - def test_get_iam_policy_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/{}/datasets/{}/tables/{}".format( - self.PROJECT, self.DS_ID, self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.get_iam_policy(table_resource_string) - - def test_get_iam_policy_w_invalid_version(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(ValueError): - client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) - - def test_set_iam_policy(self): - from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE - from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - ETAG = "foo" - VERSION = 1 - OWNER1 = "user:phred@example.com" - OWNER2 = "group:cloud-logs@google.com" - EDITOR1 = "domain:google.com" - EDITOR2 = "user:phred@example.com" - VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" - VIEWER2 = "user:phred@example.com" - BINDINGS = [ - {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, - {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, - {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, - ] - MASK = "bindings,etag" - RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} - - policy = Policy() - for binding in BINDINGS: - policy[binding["role"]] = binding["members"] - - BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - returned_policy = client.set_iam_policy( - self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - self.assertEqual(returned_policy.etag, ETAG) - self.assertEqual(returned_policy.version, VERSION) - self.assertEqual(dict(returned_policy), dict(policy)) - - def test_set_iam_policy_no_mask(self): - from google.api_core.iam import Policy - - PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - RETURNED = {"etag": "foo", "version": 1, "bindings": []} - - policy = Policy() - BODY = {"policy": policy.to_api_repr()} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_set_iam_policy_invalid_policy(self): - from google.api_core.iam import Policy - - policy = Policy() - invalid_policy_repr = policy.to_api_repr() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - with self.assertRaises(TypeError): - client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) - - def test_set_iam_policy_w_invalid_table(self): - from google.api_core.iam import Policy - - policy = Policy() - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - with self.assertRaises(TypeError): - client.set_iam_policy(table_resource_string, policy) - - def test_test_iam_permissions(self): - PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - BODY = {"permissions": PERMISSIONS} - RETURNED = {"permissions": PERMISSIONS} - - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection(RETURNED) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - - final_attributes.assert_called_once_with({"path": PATH}, client, None) - - conn.api_request.assert_called_once_with( - method="POST", path=PATH, data=BODY, timeout=7.5 - ) - - def test_test_iam_permissions_w_invalid_table(self): - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - - PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] - - with self.assertRaises(TypeError): - client.test_iam_permissions(table_resource_string, PERMISSIONS) - - def test_update_dataset_w_invalid_field(self): - from google.cloud.bigquery.dataset import Dataset - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(ValueError): - client.update_dataset( - Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] - ) - - def test_update_dataset(self): - from google.cloud.bigquery.dataset import Dataset, AccessEntry - - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - DESCRIPTION = "DESCRIPTION" - FRIENDLY_NAME = "TITLE" - LOCATION = "loc" - LABELS = {"priority": "high"} - ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] - EXP = 17 - RESOURCE = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "etag": "etag", - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "defaultTableExpirationMs": EXP, - "labels": LABELS, - "access": ACCESS, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(RESOURCE, RESOURCE) - ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - ds.description = DESCRIPTION - ds.friendly_name = FRIENDLY_NAME - ds.location = LOCATION - ds.default_table_expiration_ms = EXP - ds.labels = LABELS - ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] - fields = [ - "description", - "friendly_name", - "location", - "labels", - "access_entries", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={ - "description": DESCRIPTION, - "friendlyName": FRIENDLY_NAME, - "location": LOCATION, - "labels": LABELS, - "access": ACCESS, - }, - path="/" + PATH, - headers=None, - timeout=7.5, - ) - self.assertEqual(ds2.description, ds.description) - self.assertEqual(ds2.friendly_name, ds.friendly_name) - self.assertEqual(ds2.location, ds.location) - self.assertEqual(ds2.labels, ds.labels) - self.assertEqual(ds2.access_entries, ds.access_entries) - - # ETag becomes If-Match header. - ds._properties["etag"] = "etag" - client.update_dataset(ds, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_dataset_w_custom_property(self): - # The library should handle sending properties to the API that are not - # yet part of the library - from google.cloud.bigquery.dataset import Dataset - - path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - resource = { - "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, - "newAlphaProperty": "unreleased property", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) - dataset._properties["newAlphaProperty"] = "unreleased property" - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - dataset = client.update_dataset(dataset, ["newAlphaProperty"]) - - final_attributes.assert_called_once_with( - {"path": path, "fields": ["newAlphaProperty"]}, client, None - ) - - conn.api_request.assert_called_once_with( - method="PATCH", - data={"newAlphaProperty": "unreleased property"}, - path=path, - headers=None, - timeout=None, - ) - - self.assertEqual(dataset.dataset_id, self.DS_ID) - self.assertEqual(dataset.project, self.PROJECT) - self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - - def test_update_model(self): - from google.cloud.bigquery.model import Model - - path = "projects/%s/datasets/%s/models/%s" % ( - self.PROJECT, - self.DS_ID, - self.MODEL_ID, - ) - description = "description" - title = "title" - expires = datetime.datetime( - 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC - ) - resource = { - "modelReference": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "modelId": self.MODEL_ID, - }, - "description": description, - "etag": "etag", - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) - model = Model(model_id) - model.description = description - model.friendly_name = title - model.expires = expires - model.labels = {"x": "y"} - fields = ["description", "friendly_name", "labels", "expires"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_model = client.update_model(model, fields, timeout=7.5) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": fields}, client, None - ) - - sent = { - "description": description, - "expirationTime": str(google.cloud._helpers._millis(expires)), - "friendlyName": title, - "labels": {"x": "y"}, - } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_model.model_id, model.model_id) - self.assertEqual(updated_model.description, model.description) - self.assertEqual(updated_model.friendly_name, model.friendly_name) - self.assertEqual(updated_model.labels, model.labels) - self.assertEqual(updated_model.expires, model.expires) - - # ETag becomes If-Match header. - model._proto.etag = "etag" - client.update_model(model, []) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") - - def test_update_routine(self): - from google.cloud.bigquery.routine import Routine - from google.cloud.bigquery.routine import RoutineArgument - - full_routine_id = "routines-project.test_routines.updated_routine" - resource = { - "routineReference": { - "projectId": "routines-project", - "datasetId": "test_routines", - "routineId": "updated_routine", - }, - "routineType": "SCALAR_FUNCTION", - "language": "SQL", - "definitionBody": "x * 3", - "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], - "returnType": None, - "someNewField": "someValue", - } - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - routine = Routine(full_routine_id) - routine.arguments = [ - RoutineArgument( - name="x", - data_type=bigquery_v2.types.StandardSqlDataType( - type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 - ), - ) - ] - routine.body = "x * 3" - routine.language = "SQL" - routine.type_ = "SCALAR_FUNCTION" - routine._properties["someNewField"] = "someValue" - fields = [ - "arguments", - "language", - "body", - "type_", - "return_type", - "someNewField", - ] - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - actual_routine = client.update_routine(routine, fields, timeout=7.5,) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": fields}, client, None - ) - - # TODO: routineReference isn't needed when the Routines API supports - # partial updates. - sent = resource - conn.api_request.assert_called_once_with( - method="PUT", - data=sent, - path="/projects/routines-project/datasets/test_routines/routines/updated_routine", - headers=None, - timeout=7.5, - ) - self.assertEqual(actual_routine.arguments, routine.arguments) - self.assertEqual(actual_routine.body, routine.body) - self.assertEqual(actual_routine.language, routine.language) - self.assertEqual(actual_routine.type_, routine.type_) - - # ETag becomes If-Match header. - routine._properties["etag"] = "im-an-etag" - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.update_routine(routine, []) - - final_attributes.assert_called_once_with( - {"path": routine.path, "fields": []}, client, None - ) - - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - - def test_update_table(self): - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table - - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - description = "description" - title = "title" - resource = self._make_table_resource() - resource.update( - { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "etag": "etag", - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, - } - ) - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED", description=None), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="New field description" - ), - ] - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource, resource) - table = Table(self.TABLE_REF, schema=schema) - table.description = description - table.friendly_name = title - table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - updated_table = client.update_table(table, fields, timeout=7.5) - span_path = "/%s" % path + def test_get_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy - final_attributes.assert_called_once_with( - {"path": span_path, "fields": fields}, client, None + PATH = "/projects/{}/datasets/{}/tables/{}:getIamPolicy".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, ) - - sent = { - "schema": { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "New field description", - }, - ] - }, - "description": description, - "friendlyName": title, - "labels": {"x": "y"}, + BODY = {"options": {"requestedPolicyVersion": 1}} + ETAG = "CARDI" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + RETURNED = { + "resourceId": PATH, + "etag": ETAG, + "version": VERSION, + "bindings": [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ], + } + EXPECTED = { + binding["role"]: set(binding["members"]) for binding in RETURNED["bindings"] } - conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 - ) - self.assertEqual(updated_table.description, table.description) - self.assertEqual(updated_table.friendly_name, table.friendly_name) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.labels, table.labels) - # ETag becomes If-Match header. - table._properties["etag"] = "etag" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.update_table(table, []) + policy = client.get_iam_policy(self.TABLE_REF, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": []}, client, None + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 ) - req = conn.api_request.call_args - self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertIsInstance(policy, Policy) + self.assertEqual(policy.etag, RETURNED["etag"]) + self.assertEqual(policy.version, RETURNED["version"]) + self.assertEqual(dict(policy), EXPECTED) - def test_update_table_w_custom_property(self): - from google.cloud.bigquery.table import Table + def test_get_iam_policy_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/{}/datasets/{}/tables/{}".format( + self.PROJECT, self.DS_ID, self.TABLE_ID, + ) + + with self.assertRaises(TypeError): + client.get_iam_policy(table_resource_string) + + def test_get_iam_policy_w_invalid_version(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(ValueError): + client.get_iam_policy(self.TABLE_REF, requested_policy_version=2) + + def test_set_iam_policy(self): + from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE + from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["newAlphaProperty"] = "unreleased property" + ETAG = "foo" + VERSION = 1 + OWNER1 = "user:phred@example.com" + OWNER2 = "group:cloud-logs@google.com" + EDITOR1 = "domain:google.com" + EDITOR2 = "user:phred@example.com" + VIEWER1 = "serviceAccount:1234-abcdef@service.example.com" + VIEWER2 = "user:phred@example.com" + BINDINGS = [ + {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]}, + {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]}, + {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]}, + ] + MASK = "bindings,etag" + RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS} + + policy = Policy() + for binding in BINDINGS: + policy[binding["role"]] = binding["members"] + + BODY = {"policy": policy.to_api_repr(), "updateMask": MASK} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table._properties["newAlphaProperty"] = "unreleased property" + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["newAlphaProperty"]) + returned_policy = client.set_iam_policy( + self.TABLE_REF, policy, updateMask=MASK, timeout=7.5 + ) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + final_attributes.assert_called_once_with({"path": PATH}, client, None) + + conn.api_request.assert_called_once_with( + method="POST", path=PATH, data=BODY, timeout=7.5 + ) + self.assertEqual(returned_policy.etag, ETAG) + self.assertEqual(returned_policy.version, VERSION) + self.assertEqual(dict(returned_policy), dict(policy)) + + def test_set_iam_policy_no_mask(self): + from google.api_core.iam import Policy + + PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) + RETURNED = {"etag": "foo", "version": 1, "bindings": []} + + policy = Policy() + BODY = {"policy": policy.to_api_repr()} + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + client.set_iam_policy(self.TABLE_REF, policy, timeout=7.5) + + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"newAlphaProperty": "unreleased property"}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual( - updated_table._properties["newAlphaProperty"], "unreleased property" + + def test_set_iam_policy_invalid_policy(self): + from google.api_core.iam import Policy + + policy = Policy() + invalid_policy_repr = policy.to_api_repr() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with self.assertRaises(TypeError): + client.set_iam_policy(self.TABLE_REF, invalid_policy_repr) + + def test_set_iam_policy_w_invalid_table(self): + from google.api_core.iam import Policy + + policy = Policy() + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, ) - def test_update_table_only_use_legacy_sql(self): - from google.cloud.bigquery.table import Table + with self.assertRaises(TypeError): + client.set_iam_policy(table_resource_string, policy) - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_test_iam_permissions(self): + PATH = "/projects/%s/datasets/%s/tables/%s:testIamPermissions" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - resource = self._make_table_resource() - resource["view"] = {"useLegacySql": True} + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + BODY = {"permissions": PERMISSIONS} + RETURNED = {"permissions": PERMISSIONS} + creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF) - table.view_use_legacy_sql = True + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(RETURNED) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["view_use_legacy_sql"]) + client.test_iam_permissions(self.TABLE_REF, PERMISSIONS, timeout=7.5) - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, - ) + final_attributes.assert_called_once_with({"path": PATH}, client, None) conn.api_request.assert_called_once_with( - method="PATCH", - path="/%s" % path, - data={"view": {"useLegacySql": True}}, - headers=None, - timeout=None, + method="POST", path=PATH, data=BODY, timeout=7.5 ) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_update_table_w_query(self): - import datetime - from google.cloud._helpers import UTC - from google.cloud._helpers import _millis - from google.cloud.bigquery.schema import SchemaField - from google.cloud.bigquery.table import Table + def test_test_iam_permissions_w_invalid_table(self): + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - path = "projects/%s/datasets/%s/tables/%s" % ( + table_resource_string = "projects/%s/datasets/%s/tables/%s" % ( self.PROJECT, self.DS_ID, self.TABLE_ID, ) - query = "select fullname, age from person_ages" - location = "EU" - exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) - schema_resource = { - "fields": [ - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": None, - }, - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": "this is a column", - }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, - ] + + PERMISSIONS = ["bigquery.tables.get", "bigquery.tables.update"] + + with self.assertRaises(TypeError): + client.test_iam_permissions(table_resource_string, PERMISSIONS) + + def test_update_dataset_w_invalid_field(self): + from google.cloud.bigquery.dataset import Dataset + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + with self.assertRaises(ValueError): + client.update_dataset( + Dataset("{}.{}".format(self.PROJECT, self.DS_ID)), ["foo"] + ) + + def test_update_dataset(self): + from google.cloud.bigquery.dataset import Dataset, AccessEntry + + PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + DESCRIPTION = "DESCRIPTION" + FRIENDLY_NAME = "TITLE" + LOCATION = "loc" + LABELS = {"priority": "high"} + ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] + EXP = 17 + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": EXP, + "labels": LABELS, + "access": ACCESS, } - schema = [ - SchemaField( - "full_name", - "STRING", - mode="REQUIRED", - # Explicitly unset the description. - description=None, - ), - SchemaField( - "age", "INTEGER", mode="REQUIRED", description="this is a column" - ), - # Omit the description to not make updates to it. - SchemaField("country", "STRING"), - ] - resource = self._make_table_resource() - resource.update( - { - "schema": schema_resource, - "view": {"query": query, "useLegacySql": True}, - "location": location, - "expirationTime": _millis(exp_time), - } - ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource) - table = Table(self.TABLE_REF, schema=schema) - table.expires = exp_time - table.view_query = query - table.view_use_legacy_sql = True - updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] + conn = client._connection = make_connection(RESOURCE, RESOURCE) + ds = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + ds.description = DESCRIPTION + ds.friendly_name = FRIENDLY_NAME + ds.location = LOCATION + ds.default_table_expiration_ms = EXP + ds.labels = LABELS + ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + fields = [ + "description", + "friendly_name", + "location", + "labels", + "access_entries", + ] + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, updated_properties) + ds2 = client.update_dataset(ds, fields=fields, timeout=7.5,) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": updated_properties}, client, None, + {"path": "/%s" % PATH, "fields": fields}, client, None ) - self.assertEqual(updated_table.schema, table.schema) - self.assertEqual(updated_table.view_query, table.view_query) - self.assertEqual(updated_table.expires, table.expires) - self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - self.assertEqual(updated_table.location, location) - conn.api_request.assert_called_once_with( method="PATCH", - path="/%s" % path, data={ - "view": {"query": query, "useLegacySql": True}, - "expirationTime": str(_millis(exp_time)), - "schema": schema_resource, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "labels": LABELS, + "access": ACCESS, }, + path="/" + PATH, headers=None, - timeout=None, + timeout=7.5, ) + self.assertEqual(ds2.description, ds.description) + self.assertEqual(ds2.friendly_name, ds.friendly_name) + self.assertEqual(ds2.location, ds.location) + self.assertEqual(ds2.labels, ds.labels) + self.assertEqual(ds2.access_entries, ds.access_entries) - def test_update_table_w_schema_None(self): - # Simulate deleting schema: not sure if back-end will actually - # allow this operation, but the spec says it is optional. - path = "projects/%s/datasets/%s/tables/%s" % ( - self.PROJECT, - self.DS_ID, - self.TABLE_ID, - ) - resource1 = self._make_table_resource() - resource1.update( - { - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ] - } - } - ) - resource2 = self._make_table_resource() - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table = client.get_table( - # Test with string for table ID - "{}.{}.{}".format( - self.TABLE_REF.project, - self.TABLE_REF.dataset_id, - self.TABLE_REF.table_id, - ) - ) + # ETag becomes If-Match header. + ds._properties["etag"] = "etag" + client.update_dataset(ds, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) + def test_update_dataset_w_custom_property(self): + # The library should handle sending properties to the API that are not + # yet part of the library + from google.cloud.bigquery.dataset import Dataset - table.schema = None + path = "/projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "newAlphaProperty": "unreleased property", + } + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + conn = client._connection = make_connection(resource) + dataset = Dataset(DatasetReference(self.PROJECT, self.DS_ID)) + dataset._properties["newAlphaProperty"] = "unreleased property" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + dataset = client.update_dataset(dataset, ["newAlphaProperty"]) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["schema"]}, client, None + {"path": path, "fields": ["newAlphaProperty"]}, client, None ) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} - self.assertEqual(req[1]["data"], sent) - self.assertEqual(req[1]["path"], "/%s" % path) - self.assertEqual(len(updated_table.schema), 0) + conn.api_request.assert_called_once_with( + method="PATCH", + data={"newAlphaProperty": "unreleased property"}, + path=path, + headers=None, + timeout=None, + ) - def test_update_table_delete_property(self): - from google.cloud.bigquery.table import Table + self.assertEqual(dataset.dataset_id, self.DS_ID) + self.assertEqual(dataset.project, self.PROJECT) + self.assertEqual(dataset._properties["newAlphaProperty"], "unreleased property") - description = "description" - title = "title" - path = "projects/%s/datasets/%s/tables/%s" % ( + def test_update_model(self): + from google.cloud.bigquery.model import Model + + path = "projects/%s/datasets/%s/models/%s" % ( self.PROJECT, self.DS_ID, - self.TABLE_ID, + self.MODEL_ID, ) - resource1 = self._make_table_resource() - resource1.update({"description": description, "friendlyName": title}) - resource2 = self._make_table_resource() - resource2["description"] = None + description = "description" + title = "title" + expires = datetime.datetime( + 2012, 12, 21, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "modelReference": { + "projectId": self.PROJECT, + "datasetId": self.DS_ID, + "modelId": self.MODEL_ID, + }, + "description": description, + "etag": "etag", + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(resource1, resource2) - table = Table(self.TABLE_REF) - table.description = description - table.friendly_name = title - + conn = client._connection = make_connection(resource, resource) + model_id = "{}.{}.{}".format(self.PROJECT, self.DS_ID, self.MODEL_ID) + model = Model(model_id) + model.description = description + model.friendly_name = title + model.expires = expires + model.labels = {"x": "y"} + fields = ["description", "friendly_name", "labels", "expires"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - table2 = client.update_table(table, ["description", "friendly_name"]) + updated_model = client.update_model(model, fields, timeout=7.5) final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, - client, - None, + {"path": "/%s" % path, "fields": fields}, client, None ) - self.assertEqual(table2.description, table.description) - table2.description = None - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - table3 = client.update_table(table2, ["description"]) - - final_attributes.assert_called_once_with( - {"path": "/%s" % path, "fields": ["description"]}, client, None + sent = { + "description": description, + "expirationTime": str(google.cloud._helpers._millis(expires)), + "friendlyName": title, + "labels": {"x": "y"}, + } + conn.api_request.assert_called_once_with( + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_model.model_id, model.model_id) + self.assertEqual(updated_model.description, model.description) + self.assertEqual(updated_model.friendly_name, model.friendly_name) + self.assertEqual(updated_model.labels, model.labels) + self.assertEqual(updated_model.expires, model.expires) - self.assertEqual(len(conn.api_request.call_args_list), 2) - req = conn.api_request.call_args_list[1] - self.assertEqual(req[1]["method"], "PATCH") - self.assertEqual(req[1]["path"], "/%s" % path) - sent = {"description": None} - self.assertEqual(req[1]["data"], sent) - self.assertIsNone(table3.description) + # ETag becomes If-Match header. + model._proto.etag = "etag" + client.update_model(model, []) + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - def test_list_tables_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID) + def test_update_routine(self): + from google.cloud.bigquery.routine import Routine + from google.cloud.bigquery.routine import RoutineArgument + + full_routine_id = "routines-project.test_routines.updated_routine" + resource = { + "routineReference": { + "projectId": "routines-project", + "datasetId": "test_routines", + "routineId": "updated_routine", + }, + "routineType": "SCALAR_FUNCTION", + "language": "SQL", + "definitionBody": "x * 3", + "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], + "returnType": None, + "someNewField": "someValue", + } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) + conn = client._connection = make_connection(resource, resource) + routine = Routine(full_routine_id) + routine.arguments = [ + RoutineArgument( + name="x", + data_type=bigquery_v2.types.StandardSqlDataType( + type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 + ), + ) + ] + routine.body = "x * 3" + routine.language = "SQL" + routine.type_ = "SCALAR_FUNCTION" + routine._properties["someNewField"] = "someValue" + fields = [ + "arguments", + "language", + "body", + "type_", + "return_type", + "someNewField", + ] - dataset = DatasetReference(self.PROJECT, self.DS_ID) - iterator = client.list_tables(dataset, timeout=7.5) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + actual_routine = client.update_routine(routine, fields, timeout=7.5,) - final_attributes.assert_called_once_with({"path": path}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": fields}, client, None + ) - self.assertEqual(tables, []) - self.assertIsNone(token) + # TODO: routineReference isn't needed when the Routines API supports + # partial updates. + sent = resource conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + method="PUT", + data=sent, + path="/projects/routines-project/datasets/test_routines/routines/updated_routine", + headers=None, + timeout=7.5, ) + self.assertEqual(actual_routine.arguments, routine.arguments) + self.assertEqual(actual_routine.body, routine.body) + self.assertEqual(actual_routine.language, routine.language) + self.assertEqual(actual_routine.type_, routine.type_) - def test_list_models_empty_w_timeout(self): - path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID) - iterator = client.list_models(dataset_id, timeout=7.5) + # ETag becomes If-Match header. + routine._properties["etag"] = "im-an-etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - models = list(page) - token = iterator.next_page_token + client.update_routine(routine, []) - self.assertEqual(models, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=7.5 + final_attributes.assert_called_once_with( + {"path": routine.path, "fields": []}, client, None ) - def test_list_models_defaults(self): - from google.cloud.bigquery.model import Model + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "im-an-etag") - MODEL_1 = "model_one" - MODEL_2 = "model_two" - PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "models": [ - { - "modelReference": { - "modelId": MODEL_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - { - "modelReference": { - "modelId": MODEL_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - } - }, - ], - } + def test_update_table(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + description = "description" + title = "title" + resource = self._make_table_resource() + resource.update( + { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "etag": "etag", + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } + ) + schema = [ + SchemaField("full_name", "STRING", mode="REQUIRED", description=None), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="New field description" + ), + ] creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_models(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource, resource) + table = Table(self.TABLE_REF, schema=schema) + table.description = description + table.friendly_name = title + table.labels = {"x": "y"} + fields = ["schema", "description", "friendly_name", "labels"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - models = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, fields, timeout=7.5) + span_path = "/%s" % path - self.assertEqual(len(models), len(DATA["models"])) - for found, expected in zip(models, DATA["models"]): - self.assertIsInstance(found, Model) - self.assertEqual(found.model_id, expected["modelReference"]["modelId"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": span_path, "fields": fields}, client, None + ) + sent = { + "schema": { + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "New field description", + }, + ] + }, + "description": description, + "friendlyName": title, + "labels": {"x": "y"}, + } conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", data=sent, path="/" + path, headers=None, timeout=7.5 ) + self.assertEqual(updated_table.description, table.description) + self.assertEqual(updated_table.friendly_name, table.friendly_name) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.labels, table.labels) - def test_list_models_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo")) - - def test_list_routines_empty_w_timeout(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}) - - iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + # ETag becomes If-Match header. + table._properties["etag"] = "etag" with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + client.update_table(table, []) final_attributes.assert_called_once_with( - {"path": "/projects/test-routines/datasets/test_routines/routines"}, - client, - None, - ) - routines = list(page) - token = iterator.next_page_token - - self.assertEqual(routines, []) - self.assertIsNone(token) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/test-routines/datasets/test_routines/routines", - query_params={}, - timeout=7.5, + {"path": "/%s" % path, "fields": []}, client, None ) - def test_list_routines_defaults(self): - from google.cloud.bigquery.routine import Routine + req = conn.api_request.call_args + self.assertEqual(req[1]["headers"]["If-Match"], "etag") - project_id = "test-routines" - dataset_id = "test_routines" - path = "/projects/test-routines/datasets/test_routines/routines" - routine_1 = "routine_one" - routine_2 = "routine_two" - token = "TOKEN" - resource = { - "nextPageToken": token, - "routines": [ - { - "routineReference": { - "routineId": routine_1, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - { - "routineReference": { - "routineId": routine_2, - "datasetId": dataset_id, - "projectId": project_id, - } - }, - ], - } + def test_update_table_w_custom_property(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["newAlphaProperty"] = "unreleased property" creds = _make_credentials() - client = self._make_one(project=project_id, credentials=creds) + client = self._make_one(project=self.PROJECT, credentials=creds) conn = client._connection = make_connection(resource) - dataset = DatasetReference(client.project, dataset_id) + table = Table(self.TABLE_REF) + table._properties["newAlphaProperty"] = "unreleased property" - iterator = client.list_routines(dataset) - self.assertIs(iterator.dataset, dataset) with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": path}, client, None) - routines = list(page) - actual_token = iterator.next_page_token + updated_table = client.update_table(table, ["newAlphaProperty"]) - self.assertEqual(len(routines), len(resource["routines"])) - for found, expected in zip(routines, resource["routines"]): - self.assertIsInstance(found, Routine) - self.assertEqual( - found.routine_id, expected["routineReference"]["routineId"] - ) - self.assertEqual(actual_token, token) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["newAlphaProperty"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"newAlphaProperty": "unreleased property"}, + headers=None, + timeout=None, + ) + self.assertEqual( + updated_table._properties["newAlphaProperty"], "unreleased property" ) - def test_list_routines_wrong_type(self): - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_routines( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_list_tables_defaults(self): - from google.cloud.bigquery.table import TableListItem - - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "tables": [ - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - { - "kind": "bigquery#table", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", - }, - ], - } + def test_update_table_only_use_legacy_sql(self): + from google.cloud.bigquery.table import Table + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource = self._make_table_resource() + resource["view"] = {"useLegacySql": True} creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables(dataset) - self.assertIs(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF) + table.view_use_legacy_sql = True with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + updated_table = client.update_table(table, ["view_use_legacy_sql"]) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertEqual(token, TOKEN) + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["view_use_legacy_sql"]}, client, None, + ) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="PATCH", + path="/%s" % path, + data={"view": {"useLegacySql": True}}, + headers=None, + timeout=None, ) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) - def test_list_tables_explicit(self): - from google.cloud.bigquery.table import TableListItem + def test_update_table_w_query(self): + import datetime + from google.cloud._helpers import UTC + from google.cloud._helpers import _millis + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table - TABLE_1 = "table_one" - TABLE_2 = "table_two" - PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID) - TOKEN = "TOKEN" - DATA = { - "tables": [ + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + query = "select fullname, age from person_ages" + location = "EU" + exp_time = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) + schema_resource = { + "fields": [ { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1), - "tableReference": { - "tableId": TABLE_1, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": None, }, { - "kind": "bigquery#dataset", - "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2), - "tableReference": { - "tableId": TABLE_2, - "datasetId": self.DS_ID, - "projectId": self.PROJECT, - }, - "type": "TABLE", + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": "this is a column", }, + {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } - + schema = [ + SchemaField( + "full_name", + "STRING", + mode="REQUIRED", + # Explicitly unset the description. + description=None, + ), + SchemaField( + "age", "INTEGER", mode="REQUIRED", description="this is a column" + ), + # Omit the description to not make updates to it. + SchemaField("country", "STRING"), + ] + resource = self._make_table_resource() + resource.update( + { + "schema": schema_resource, + "view": {"query": query, "useLegacySql": True}, + "location": location, + "expirationTime": _millis(exp_time), + } + ) creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(DATA) - dataset = DatasetReference(self.PROJECT, self.DS_ID) - - iterator = client.list_tables( - # Test with string for dataset ID. - self.DS_ID, - max_results=3, - page_token=TOKEN, - ) - self.assertEqual(iterator.dataset, dataset) + conn = client._connection = make_connection(resource) + table = Table(self.TABLE_REF, schema=schema) + table.expires = exp_time + table.view_query = query + table.view_use_legacy_sql = True + updated_properties = ["schema", "view_query", "expires", "view_use_legacy_sql"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - page = next(iterator.pages) + updated_table = client.update_table(table, updated_properties) - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - tables = list(page) - token = iterator.next_page_token + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": updated_properties}, client, None, + ) - self.assertEqual(len(tables), len(DATA["tables"])) - for found, expected in zip(tables, DATA["tables"]): - self.assertIsInstance(found, TableListItem) - self.assertEqual(found.full_table_id, expected["id"]) - self.assertEqual(found.table_type, expected["type"]) - self.assertIsNone(token) + self.assertEqual(updated_table.schema, table.schema) + self.assertEqual(updated_table.view_query, table.view_query) + self.assertEqual(updated_table.expires, table.expires) + self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) + self.assertEqual(updated_table.location, location) conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"maxResults": 3, "pageToken": TOKEN}, + method="PATCH", + path="/%s" % path, + data={ + "view": {"query": query, "useLegacySql": True}, + "expirationTime": str(_millis(exp_time)), + "schema": schema_resource, + }, + headers=None, timeout=None, ) - def test_list_tables_wrong_type(self): + def test_update_table_w_schema_None(self): + # Simulate deleting schema: not sure if back-end will actually + # allow this operation, but the spec says it is optional. + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update( + { + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + } + } + ) + resource2 = self._make_table_resource() creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo")) - - def test_delete_dataset(self): - from google.cloud.bigquery.dataset import Dataset - from google.cloud.bigquery.dataset import DatasetReference + conn = client._connection = make_connection(resource1, resource2) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table = client.get_table( + # Test with string for table ID + "{}.{}.{}".format( + self.TABLE_REF.project, + self.TABLE_REF.dataset_id, + self.TABLE_REF.table_id, + ) + ) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID)) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection(*([{}] * len(datasets))) - for arg in datasets: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, timeout=7.5) + final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH}, client, None - ) + table.schema = None - conn.api_request.assert_called_with( - method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 - ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + updated_table = client.update_table(table, ["schema"]) - def test_delete_dataset_delete_contents(self): - from google.cloud.bigquery.dataset import Dataset + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["schema"]}, client, None + ) - PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - conn = client._connection = make_connection({}, {}) - ds_ref = DatasetReference(self.PROJECT, self.DS_ID) - for arg in (ds_ref, Dataset(ds_ref)): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(arg, delete_contents=True) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + sent = {"schema": None} + self.assertEqual(req[1]["data"], sent) + self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(len(updated_table.schema), 0) - final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "deleteContents": True}, client, None - ) - conn.api_request.assert_called_with( - method="DELETE", - path="/%s" % PATH, - query_params={"deleteContents": "true"}, - timeout=None, - ) + def test_update_table_delete_property(self): + from google.cloud.bigquery.table import Table - def test_delete_dataset_wrong_type(self): + description = "description" + title = "title" + path = "projects/%s/datasets/%s/tables/%s" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + resource1 = self._make_table_resource() + resource1.update({"description": description, "friendlyName": title}) + resource2 = self._make_table_resource() + resource2["description"] = None creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) - with self.assertRaises(TypeError): - client.delete_dataset( - DatasetReference(self.PROJECT, self.DS_ID).table("foo") - ) - - def test_delete_dataset_w_not_found_ok_false(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) - - with self.assertRaises(google.api_core.exceptions.NotFound): - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - client.delete_dataset(self.DS_ID) + conn = client._connection = make_connection(resource1, resource2) + table = Table(self.TABLE_REF) + table.description = description + table.friendly_name = title - final_attributes.assert_called_once_with({"path": path}, client, None) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + table2 = client.update_table(table, ["description", "friendly_name"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description", "friendly_name"]}, + client, + None, ) - def test_delete_dataset_w_not_found_ok_true(self): - path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("dataset not found") - ) + self.assertEqual(table2.description, table.description) + table2.description = None with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - client.delete_dataset(self.DS_ID, not_found_ok=True) - - final_attributes.assert_called_once_with({"path": path}, client, None) + table3 = client.update_table(table2, ["description"]) - conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + final_attributes.assert_called_once_with( + {"path": "/%s" % path, "fields": ["description"]}, client, None ) + self.assertEqual(len(conn.api_request.call_args_list), 2) + req = conn.api_request.call_args_list[1] + self.assertEqual(req[1]["method"], "PATCH") + self.assertEqual(req[1]["path"], "/%s" % path) + sent = {"description": None} + self.assertEqual(req[1]["data"], sent) + self.assertIsNone(table3.description) + def test_delete_model(self): from google.cloud.bigquery.model import Model diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py new file mode 100644 index 000000000..3eb8f1072 --- /dev/null +++ b/tests/unit/test_create_dataset.py @@ -0,0 +1,349 @@ +from google.cloud.bigquery.dataset import Dataset, DatasetReference +from .helpers import make_connection, dataset_polymorphic, make_client +import google.cloud.bigquery.dataset +import mock +import pytest + + +@dataset_polymorphic +def test_create_dataset_minimal(make_dataset, get_reference, client, PROJECT, DS_ID): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + } + conn = client._connection = make_connection(RESOURCE) + + dataset = make_dataset(PROJECT, DS_ID) + after = client.create_dataset(dataset, timeout=7.5) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + }, + timeout=7.5, + ) + + +def test_create_dataset_w_attrs(client, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import AccessEntry + + PATH = "projects/%s/datasets" % PROJECT + DESCRIPTION = "DESC" + FRIENDLY_NAME = "FN" + LOCATION = "US" + USER_EMAIL = "phred@example.com" + LABELS = {"color": "red"} + VIEW = { + "projectId": "my-proj", + "datasetId": "starry-skies", + "tableId": "northern-hemisphere", + } + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "labels": LABELS, + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + } + conn = client._connection = make_connection(RESOURCE) + entries = [ + AccessEntry("OWNER", "userByEmail", USER_EMAIL), + AccessEntry(None, "view", VIEW), + ] + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.access_entries = entries + before.description = DESCRIPTION + before.friendly_name = FRIENDLY_NAME + before.default_table_expiration_ms = 3600 + before.location = LOCATION + before.labels = LABELS + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.description == DESCRIPTION + assert after.friendly_name == FRIENDLY_NAME + assert after.location == LOCATION + assert after.default_table_expiration_ms == 3600 + assert after.labels == LABELS + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "description": DESCRIPTION, + "friendlyName": FRIENDLY_NAME, + "location": LOCATION, + "defaultTableExpirationMs": "3600", + "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], + "labels": LABELS, + }, + timeout=None, + ) + + +def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): + # The library should handle sending properties to the API that are not + # yet part of the library + + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + } + conn = client._connection = make_connection(resource) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before._properties["newAlphaProperty"] = "unreleased property" + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after._properties["newAlphaProperty"] == "unreleased property" + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "newAlphaProperty": "unreleased property", + "labels": {}, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOCATION): + PATH = "projects/%s/datasets" % PROJECT + OTHER_LOCATION = "EU" + RESOURCE = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": OTHER_LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(RESOURCE) + + ds_ref = DatasetReference(PROJECT, DS_ID) + before = Dataset(ds_ref) + before.location = OTHER_LOCATION + after = client.create_dataset(before) + + assert after.dataset_id == DS_ID + assert after.project == PROJECT + assert after.etag == RESOURCE["etag"] + assert after.full_dataset_id == RESOURCE["id"] + assert after.location == OTHER_LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path="/%s" % PATH, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": OTHER_LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset(DatasetReference(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + dataset = client.create_dataset("{}.{}".format(PROJECT, DS_ID)) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): + path = "/projects/%s/datasets" % PROJECT + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "%s:%s" % (PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection(resource) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID) + + final_attributes.assert_called_once_with({"path": path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_called_once_with( + method="POST", + path=path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ) + + +def test_create_dataset_alreadyexists_w_exists_ok_false(PROJECT, DS_ID, LOCATION): + client = make_client(location=LOCATION) + client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists") + ) + + with pytest.raises(google.api_core.exceptions.AlreadyExists): + client.create_dataset(DS_ID) + + +def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION): + post_path = "/projects/{}/datasets".format(PROJECT) + get_path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + resource = { + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "etag": "etag", + "id": "{}:{}".format(PROJECT, DS_ID), + "location": LOCATION, + } + client = make_client(location=LOCATION) + conn = client._connection = make_connection( + google.api_core.exceptions.AlreadyExists("dataset already exists"), resource + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + dataset = client.create_dataset(DS_ID, exists_ok=True) + + final_attributes.assert_called_with({"path": get_path}, client, None) + + assert dataset.dataset_id == DS_ID + assert dataset.project == PROJECT + assert dataset.etag == resource["etag"] + assert dataset.full_dataset_id == resource["id"] + assert dataset.location == LOCATION + + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=post_path, + data={ + "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, + "labels": {}, + "location": LOCATION, + }, + timeout=None, + ), + mock.call(method="GET", path=get_path, timeout=None), + ] + ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py new file mode 100644 index 000000000..c57b517e0 --- /dev/null +++ b/tests/unit/test_delete_dataset.py @@ -0,0 +1,64 @@ +from .helpers import make_connection, make_client, dataset_polymorphic +import google.api_core.exceptions +import pytest + + +@dataset_polymorphic +def test_delete_dataset(make_dataset, get_reference, client, PROJECT, DS_ID): + dataset = make_dataset(PROJECT, DS_ID) + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + client.delete_dataset(dataset, timeout=7.5) + conn.api_request.assert_called_with( + method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_delete_dataset_delete_contents( + make_dataset, get_reference, client, PROJECT, DS_ID +): + PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID) + conn = client._connection = make_connection({}) + dataset = make_dataset(PROJECT, DS_ID) + client.delete_dataset(dataset, delete_contents=True) + conn.api_request.assert_called_with( + method="DELETE", + path="/%s" % PATH, + query_params={"deleteContents": "true"}, + timeout=None, + ) + + +def test_delete_dataset_wrong_type(client): + with pytest.raises(TypeError): + client.delete_dataset(42) + + +def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + + with pytest.raises(google.api_core.exceptions.NotFound): + client.delete_dataset(DS_ID) + + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) + + +def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): + path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID) + http = object() + client = make_client(_http=http) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("dataset not found") + ) + client.delete_dataset(DS_ID, not_found_ok=True) + conn.api_request.assert_called_with( + method="DELETE", path=path, query_params={}, timeout=None + ) diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py new file mode 100644 index 000000000..534a4b54c --- /dev/null +++ b/tests/unit/test_list_models.py @@ -0,0 +1,72 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset_id = "{}.{}".format(PROJECT, DS_ID) + iterator = client.list_models(dataset_id, timeout=7.5) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert models == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.model import Model + + MODEL_1 = "model_one" + MODEL_2 = "model_two" + PATH = "projects/%s/datasets/%s/models" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "models": [ + { + "modelReference": { + "modelId": MODEL_1, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + { + "modelReference": { + "modelId": MODEL_2, + "datasetId": DS_ID, + "projectId": PROJECT, + } + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_models(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + models = list(page) + token = iterator.next_page_token + + assert len(models) == len(DATA["models"]) + for found, expected in zip(models, DATA["models"]): + assert isinstance(found, Model) + assert found.model_id == expected["modelReference"]["modelId"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_models_wrong_type(client): + with pytest.raises(TypeError): + client.list_models(42) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py new file mode 100644 index 000000000..82719fce6 --- /dev/null +++ b/tests/unit/test_list_routines.py @@ -0,0 +1,75 @@ +from .helpers import make_connection, dataset_polymorphic +import pytest + + +def test_list_routines_empty_w_timeout(client): + conn = client._connection = make_connection({}) + + iterator = client.list_routines("test-routines.test_routines", timeout=7.5) + page = next(iterator.pages) + routines = list(page) + token = iterator.next_page_token + + assert routines == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/test-routines/datasets/test_routines/routines", + query_params={}, + timeout=7.5, + ) + + +@dataset_polymorphic +def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): + from google.cloud.bigquery.routine import Routine + + project_id = PROJECT + dataset_id = "test_routines" + path = f"/projects/{PROJECT}/datasets/test_routines/routines" + routine_1 = "routine_one" + routine_2 = "routine_two" + token = "TOKEN" + resource = { + "nextPageToken": token, + "routines": [ + { + "routineReference": { + "routineId": routine_1, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + { + "routineReference": { + "routineId": routine_2, + "datasetId": dataset_id, + "projectId": project_id, + } + }, + ], + } + + conn = client._connection = make_connection(resource) + dataset = make_dataset(client.project, dataset_id) + + iterator = client.list_routines(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + routines = list(page) + actual_token = iterator.next_page_token + + assert len(routines) == len(resource["routines"]) + for found, expected in zip(routines, resource["routines"]): + assert isinstance(found, Routine) + assert found.routine_id == expected["routineReference"]["routineId"] + assert actual_token == token + + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=None + ) + + +def test_list_routines_wrong_type(client): + with pytest.raises(TypeError): + client.list_routines(42) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py new file mode 100644 index 000000000..fdd3aa857 --- /dev/null +++ b/tests/unit/test_list_tables.py @@ -0,0 +1,145 @@ +from .helpers import make_connection, dataset_polymorphic +import google.cloud.bigquery.dataset +import pytest + + +@dataset_polymorphic +def test_list_tables_empty_w_timeout( + make_dataset, get_reference, client, PROJECT, DS_ID +): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params={}, timeout=7.5 + ) + + +@dataset_polymorphic +def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "tables": [ + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#table", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ], + } + + conn = client._connection = make_connection(DATA) + dataset = make_dataset(PROJECT, DS_ID) + + iterator = client.list_tables(dataset) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params={}, timeout=None + ) + + +def test_list_tables_explicit(client, PROJECT, DS_ID): + from google.cloud.bigquery.table import TableListItem + + TABLE_1 = "table_one" + TABLE_2 = "table_two" + PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID) + TOKEN = "TOKEN" + DATA = { + "tables": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1), + "tableReference": { + "tableId": TABLE_1, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2), + "tableReference": { + "tableId": TABLE_2, + "datasetId": DS_ID, + "projectId": PROJECT, + }, + "type": "TABLE", + }, + ] + } + + conn = client._connection = make_connection(DATA) + dataset = google.cloud.bigquery.dataset.DatasetReference(PROJECT, DS_ID) + + iterator = client.list_tables( + # Test with string for dataset ID. + DS_ID, + max_results=3, + page_token=TOKEN, + ) + assert iterator.dataset == dataset + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert len(tables) == len(DATA["tables"]) + for found, expected in zip(tables, DATA["tables"]): + assert isinstance(found, TableListItem) + assert found.full_table_id == expected["id"] + assert found.table_type == expected["type"] + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) + + +def test_list_tables_wrong_type(client): + with pytest.raises(TypeError): + client.list_tables(42) From dde9dc5114c2311fb76fafc5b222fff561e8abf1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 12 Apr 2021 19:00:25 +0200 Subject: [PATCH 062/230] feat: use pyarrow stream compression, if available (#593) * feat: use pyarrow stream compression, if available * Remove unnecessary pyarrow version check Arrow stream compression requires pyarrow>=1.0.0, but that's already guaranteed by a version pin in setup.py if bqstorage extra is installed. * Remvoe unused pyarrow version parsing in tests * Only use arrow compression in tests if available --- google/cloud/bigquery/_pandas_helpers.py | 13 ++++ google/cloud/bigquery/dbapi/cursor.py | 14 +++++ tests/system/test_client.py | 8 --- tests/unit/job/test_query_pandas.py | 78 ++++++++++++++++++++++-- tests/unit/test_dbapi_cursor.py | 47 ++++++++++++++ 5 files changed, 146 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7ad416e08..412f32754 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -33,6 +33,14 @@ except ImportError: # pragma: NO COVER pyarrow = None +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import schema @@ -631,6 +639,11 @@ def _download_table_bqstorage( for field in selected_fields: requested_session.read_options.selected_fields.append(field.name) + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + session = bqstorage_client.create_read_session( parent="projects/{}".format(project_id), read_session=requested_session, diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index e90bcc2c0..ee09158d8 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -19,6 +19,14 @@ import copy import logging +try: + from google.cloud.bigquery_storage import ArrowSerializationOptions +except ImportError: + _ARROW_COMPRESSION_SUPPORT = False +else: + # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. + _ARROW_COMPRESSION_SUPPORT = True + from google.cloud.bigquery import job from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions @@ -255,6 +263,12 @@ def _bqstorage_fetch(self, bqstorage_client): table=table_reference.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW, ) + + if _ARROW_COMPRESSION_SUPPORT: + requested_session.read_options.arrow_serialization_options.buffer_compression = ( + ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + read_session = bqstorage_client.create_read_session( parent="projects/{}".format(table_reference.project), read_session=requested_session, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 133f609a6..024441012 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -28,7 +28,6 @@ import psutil import pytest -import pkg_resources from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -116,13 +115,6 @@ (TooManyRequests, InternalServerError, ServiceUnavailable) ) -PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0") - -if pyarrow: - PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version -else: - PYARROW_INSTALLED_VERSION = None - MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d1600ad43..0f9623203 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -41,6 +41,22 @@ from .helpers import _make_job_resource +@pytest.fixture +def table_read_options_kwarg(): + # Create a BigQuery Storage table read options object with pyarrow compression + # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is + # installed to support the compression. + if not hasattr(bigquery_storage, "ArrowSerializationOptions"): + return {} + + read_options = bigquery_storage.ReadSession.TableReadOptions( + arrow_serialization_options=bigquery_storage.ArrowSerializationOptions( + buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + ) + ) + return {"read_options": read_options} + + @pytest.mark.parametrize( "query,expected", ( @@ -82,7 +98,7 @@ def test__contains_order_by(query, expected): "SelecT name, age froM table OrdeR \n\t BY other_column;", ), ) -def test_to_dataframe_bqstorage_preserve_order(query): +def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class job_resource = _make_job_resource( @@ -123,8 +139,10 @@ def test_to_dataframe_bqstorage_preserve_order(query): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **job_resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent="projects/test-project", @@ -431,7 +449,7 @@ def test_to_dataframe_ddl_query(): @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) -def test_to_dataframe_bqstorage(): +def test_to_dataframe_bqstorage(table_read_options_kwarg): from google.cloud.bigquery.job import QueryJob as target_class resource = _make_job_resource(job_type="query", ended=True) @@ -468,8 +486,10 @@ def test_to_dataframe_bqstorage(): destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( **resource["configuration"]["query"]["destinationTable"] ) - expected_session = bigquery_storage.types.ReadSession( - table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW, + expected_session = bigquery_storage.ReadSession( + table=destination_table, + data_format=bigquery_storage.DataFormat.ARROW, + **table_read_options_kwarg, ) bqstorage_client.create_read_session.assert_called_once_with( parent=f"projects/{client.project}", @@ -478,6 +498,52 @@ def test_to_dataframe_bqstorage(): ) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_to_dataframe_bqstorage_no_pyarrow_compression(): + from google.cloud.bigquery.job import QueryJob as target_class + + resource = _make_job_resource(job_type="query", ended=True) + query_resource = { + "jobComplete": True, + "jobReference": resource["jobReference"], + "totalRows": "4", + "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]}, + } + connection = _make_connection(query_resource) + client = _make_client(connection=connection) + job = target_class.from_api_repr(resource, client) + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + session.avro_schema.schema = json.dumps( + { + "type": "record", + "name": "__root__", + "fields": [{"name": "name", "type": ["null", "string"]}], + } + ) + bqstorage_client.create_read_session.return_value = session + + with mock.patch( + "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT", new=False + ): + job.to_dataframe(bqstorage_client=bqstorage_client) + + destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format( + **resource["configuration"]["query"]["destinationTable"] + ) + expected_session = bigquery_storage.ReadSession( + table=destination_table, data_format=bigquery_storage.DataFormat.ARROW, + ) + bqstorage_client.create_read_session.assert_called_once_with( + parent=f"projects/{client.project}", + read_session=expected_session, + max_stream_count=0, + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index cbd6f6909..0f44e3895 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -123,6 +123,7 @@ def _mock_job( schema=schema, num_dml_affected_rows=num_dml_affected_rows, ) + mock_job.destination.project = "P" mock_job.destination.to_bqstorage.return_value = ( "projects/P/datasets/DS/tables/T" ) @@ -380,6 +381,52 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): # the default client was not used mock_client.list_rows.assert_not_called() + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_fetchall_w_bqstorage_client_no_arrow_compression(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery import table + + # Use unordered data to also test any non-determenistic key order in dicts. + row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] + bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + + mock_client = self._mock_client(rows=row_data) + mock_bqstorage_client = self._mock_bqstorage_client( + stream_count=1, rows=bqstorage_streamed_rows, + ) + + connection = dbapi.connect( + client=mock_client, bqstorage_client=mock_bqstorage_client, + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar FROM some_table") + + with mock.patch( + "google.cloud.bigquery.dbapi.cursor._ARROW_COMPRESSION_SUPPORT", new=False + ): + rows = cursor.fetchall() + + mock_client.list_rows.assert_not_called() # The default client was not used. + + # Check the BQ Storage session config. + expected_session = bigquery_storage.ReadSession( + table="projects/P/datasets/DS/tables/T", + data_format=bigquery_storage.DataFormat.ARROW, + ) + mock_bqstorage_client.create_read_session.assert_called_once_with( + parent="projects/P", read_session=expected_session, max_stream_count=1 + ) + + # Check the data returned. + field_value = op.itemgetter(1) + sorted_row_data = [sorted(row.items(), key=field_value) for row in rows] + expected_row_data = [[("foo", 1.1), ("bar", 1.2)]] + + self.assertEqual(sorted_row_data, expected_row_data) + def test_execute_custom_job_id(self): from google.cloud.bigquery.dbapi import connect From 9fb6f2f22cf2d69c31e10bbde460f319fa56698f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Tue, 13 Apr 2021 08:06:04 -0700 Subject: [PATCH 063/230] chore: add constraints file check for python samples (#601) This PR was generated using Autosynth. :rainbow: Synth log will be available here: https://source.cloud.google.com/results/invocations/b7a528df-1b0b-42e0-a583-e53b45ee05fc/targets - [ ] To automatically regenerate this PR, check this box. (May take up to 24 hours.) Source-Link: https://github.com/googleapis/synthtool/commit/0a071b3460344886297a304253bf924aa68ddb7e --- .github/header-checker-lint.yml | 2 +- renovate.json | 5 ++++- samples/geography/noxfile.py | 10 ++++++++-- samples/snippets/noxfile.py | 10 ++++++++-- synth.metadata | 6 +++--- 5 files changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index fc281c05b..6fe78aa79 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -1,6 +1,6 @@ {"allowedCopyrightHolders": ["Google LLC"], "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt"], + "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], "sourceFileExtensions": [ "ts", "js", diff --git a/renovate.json b/renovate.json index f08bc22c9..c04895563 100644 --- a/renovate.json +++ b/renovate.json @@ -2,5 +2,8 @@ "extends": [ "config:base", ":preserveSemverRanges" ], - "ignorePaths": [".pre-commit-config.yaml"] + "ignorePaths": [".pre-commit-config.yaml"], + "pip_requirements": { + "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] + } } diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index f2320ea00..be1a3f251 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -174,10 +174,16 @@ def _session_tests( ) -> None: """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) diff --git a/synth.metadata b/synth.metadata index 114359b88..7221c0f0f 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "c1195147a6e9220f26558a301427dd447646da3a" + "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "5b5bf6d519b2d658d9f2e483d9f6f3d0ba8ee6bc" + "sha": "0a071b3460344886297a304253bf924aa68ddb7e" } } ], From df48cc5a0be99ad39d5835652d1b7422209afc5d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 13 Apr 2021 09:20:17 -0600 Subject: [PATCH 064/230] fix: missing license headers in new test files (#604) --- tests/unit/conftest.py | 14 ++++++++++++++ tests/unit/test_create_dataset.py | 14 ++++++++++++++ tests/unit/test_delete_dataset.py | 14 ++++++++++++++ tests/unit/test_list_models.py | 14 ++++++++++++++ tests/unit/test_list_routines.py | 14 ++++++++++++++ tests/unit/test_list_tables.py | 14 ++++++++++++++ 6 files changed, 84 insertions(+) diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 07fc9b4ad..7a67ea6b5 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import pytest from .helpers import make_client diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index 3eb8f1072..d07aaed4f 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index c57b517e0..3a65e031c 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions import pytest diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 534a4b54c..56aa66126 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 82719fce6..714ede0d4 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import pytest diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index fdd3aa857..9acee9580 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,3 +1,17 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .helpers import make_connection, dataset_polymorphic import google.cloud.bigquery.dataset import pytest From c741c381c2248eb69cebb20e675bb088d27bb636 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Apr 2021 18:50:05 +0200 Subject: [PATCH 065/230] refactor: simplify OrderedDict arguments in lexer (#598) Python 3.6+ guarantees that kwargs order is preserved, thus we don't need to assure the order by passing them as a list of tuples. --- .../bigquery/magics/line_arg_parser/lexer.py | 119 ++++++------------ 1 file changed, 37 insertions(+), 82 deletions(-) diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 0cb63292c..5a6ee1a83 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -49,90 +49,45 @@ # the value of an option other than "--params", we do not really care about its # structure, and thus do not want to use any of the "Python tokens" for pattern matching. # -# Since token definition order is important, an OrderedDict is needed with tightly -# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs). +# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468 +# guarantees us that the order of kwargs is preserved in Python 3.6+. token_types = OrderedDict( - [ - ( - "state_parse_pos_args", - OrderedDict( - [ - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--))", # double dash - starting the options list - ), - ( - "DEST_VAR", - r"(?P[^\d\W]\w*)", # essentially a Python ID - ), - ] - ), - ), - ( - "state_parse_non_params_options", - OrderedDict( - [ - ( - "GOTO_PARSE_PARAMS_OPTION", - r"(?P(?=--params(?:\s|=|--|$)))", # the --params option - ), - ("OPTION_SPEC", r"(?P--\w+)"), - ("OPTION_EQ", r"(?P=)"), - ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"), - ] - ), - ), - ( - "state_parse_params_option", - OrderedDict( - [ - ( - "PY_STRING", - r"(?P(?:{})|(?:{}))".format( - r"'(?:[^'\\]|\.)*'", - r'"(?:[^"\\]|\.)*"', # single and double quoted strings - ), - ), - ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"), - ("PARAMS_OPT_EQ", r"(?P=)"), - ( - "GOTO_PARSE_NON_PARAMS_OPTIONS", - r"(?P(?=--\w+))", # found another option spec - ), - ("PY_BOOL", r"(?PTrue|False)"), - ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"), - ( - "PY_NUMBER", - r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", - ), - ("SQUOTE", r"(?P')"), - ("DQUOTE", r'(?P")'), - ("COLON", r"(?P:)"), - ("COMMA", r"(?P,)"), - ("LCURL", r"(?P\{)"), - ("RCURL", r"(?P})"), - ("LSQUARE", r"(?P\[)"), - ("RSQUARE", r"(?P])"), - ("LPAREN", r"(?P\()"), - ("RPAREN", r"(?P\))"), - ] - ), - ), - ( - "common", - OrderedDict( - [ - ("WS", r"(?P\s+)"), - ("EOL", r"(?P$)"), - ( - # anything not a whitespace or matched by something else - "UNKNOWN", - r"(?P\S+)", - ), - ] - ), + state_parse_pos_args=OrderedDict( + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--))", # double dash - starting the options list + DEST_VAR=r"(?P[^\d\W]\w*)", # essentially a Python ID + ), + state_parse_non_params_options=OrderedDict( + GOTO_PARSE_PARAMS_OPTION=r"(?P(?=--params(?:\s|=|--|$)))", # the --params option + OPTION_SPEC=r"(?P--\w+)", + OPTION_EQ=r"(?P=)", + OPT_VAL=r"(?P\S+?(?=\s|--|$))", + ), + state_parse_params_option=OrderedDict( + PY_STRING=r"(?P(?:{})|(?:{}))".format( # single and double quoted strings + r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"' ), - ] + PARAMS_OPT_SPEC=r"(?P--params(?=\s|=|--|$))", + PARAMS_OPT_EQ=r"(?P=)", + GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--\w+))", # found another option spec + PY_BOOL=r"(?PTrue|False)", + DOLLAR_PY_ID=r"(?P\$[^\d\W]\w*)", + PY_NUMBER=r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)", + SQUOTE=r"(?P')", + DQUOTE=r'(?P")', + COLON=r"(?P:)", + COMMA=r"(?P,)", + LCURL=r"(?P\{)", + RCURL=r"(?P})", + LSQUARE=r"(?P\[)", + RSQUARE=r"(?P])", + LPAREN=r"(?P\()", + RPAREN=r"(?P\))", + ), + common=OrderedDict( + WS=r"(?P\s+)", + EOL=r"(?P$)", + UNKNOWN=r"(?P\S+)", # anything not a whitespace or matched by something else + ), ) From 8089bdbd146e856e9e4d47bc1329f633cb4b9671 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Apr 2021 19:17:39 +0200 Subject: [PATCH 066/230] chore(deps): update dependency mock to v4.0.3 (#605) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 676ff949e..a5da1a77d 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 676ff949e..a5da1a77d 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ pytest==5.4.3 -mock==4.0.2 +mock==4.0.3 From ff2ec3abe418a443cd07751c08e654f94e8b3155 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 13 Apr 2021 12:49:39 -0500 Subject: [PATCH 067/230] docs: add sample to run DML query (#591) * docs: add sample to run DML query * cleanup leftover datasets before test run * fix import order --- samples/snippets/conftest.py | 40 ++++++++++++ samples/snippets/test_update_with_dml.py | 36 +++++++++++ samples/snippets/update_with_dml.py | 82 ++++++++++++++++++++++++ samples/snippets/user_sessions_data.json | 10 +++ 4 files changed, 168 insertions(+) create mode 100644 samples/snippets/test_update_with_dml.py create mode 100644 samples/snippets/update_with_dml.py create mode 100644 samples/snippets/user_sessions_data.json diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index d22a33318..31c6ba104 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,10 +12,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import datetime +import random + from google.cloud import bigquery import pytest +RESOURCE_PREFIX = "python_bigquery_samples_snippets" + + +def resource_prefix() -> str: + timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + random_string = hex(random.randrange(1000000))[2:] + return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + for dataset in bigquery_client.list_datasets(): + if ( + dataset.dataset_id.startswith(RESOURCE_PREFIX) + and dataset.created < yesterday + ): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + @pytest.fixture(scope="session") def bigquery_client(): bigquery_client = bigquery.Client() @@ -25,3 +50,18 @@ def bigquery_client(): @pytest.fixture(scope="session") def project_id(bigquery_client): return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id(bigquery_client: bigquery.Client, project_id: str): + dataset_id = resource_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture +def bigquery_client_patch(monkeypatch, bigquery_client): + monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py new file mode 100644 index 000000000..3cca7a649 --- /dev/null +++ b/samples/snippets/test_update_with_dml.py @@ -0,0 +1,36 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery +import pytest + +from conftest import resource_prefix +import update_with_dml + + +@pytest.fixture +def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + table_id = f"{resource_prefix()}_update_with_dml" + yield table_id + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + +def test_update_with_dml(bigquery_client_patch, dataset_id, table_id): + override_values = { + "dataset_id": dataset_id, + "table_id": table_id, + } + num_rows = update_with_dml.run_sample(override_values=override_values) + assert num_rows > 0 diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py new file mode 100644 index 000000000..7fd09dd80 --- /dev/null +++ b/samples/snippets/update_with_dml.py @@ -0,0 +1,82 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquery_update_with_dml] +import pathlib + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def load_from_newline_delimited_json( + client: bigquery.Client, + filepath: pathlib.Path, + project_id: str, + dataset_id: str, + table_id: str, +): + full_table_id = f"{project_id}.{dataset_id}.{table_id}" + job_config = bigquery.LoadJobConfig() + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.schema = [ + bigquery.SchemaField("id", enums.SqlTypeNames.STRING), + bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER), + bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING), + ] + + with open(filepath, "rb") as json_file: + load_job = client.load_table_from_file( + json_file, full_table_id, job_config=job_config + ) + + # Wait for load job to finish. + load_job.result() + + +def update_with_dml( + client: bigquery.Client, project_id: str, dataset_id: str, table_id: str +): + query_text = f""" + UPDATE `{project_id}.{dataset_id}.{table_id}` + SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0") + WHERE TRUE + """ + query_job = client.query(query_text) + + # Wait for query job to finish. + query_job.result() + + print(f"DML query modified {query_job.num_dml_affected_rows} rows.") + return query_job.num_dml_affected_rows + + +def run_sample(override_values={}): + client = bigquery.Client() + filepath = pathlib.Path(__file__).parent / "user_sessions_data.json" + project_id = client.project + dataset_id = "sample_db" + table_id = "UserSessions" + # [END bigquery_update_with_dml] + # To facilitate testing, we replace values with alternatives + # provided by the testing harness. + dataset_id = override_values.get("dataset_id", dataset_id) + table_id = override_values.get("table_id", table_id) + # [START bigquery_update_with_dml] + load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id) + return update_with_dml(client, project_id, dataset_id, table_id) + + +# [END bigquery_update_with_dml] diff --git a/samples/snippets/user_sessions_data.json b/samples/snippets/user_sessions_data.json new file mode 100644 index 000000000..7ea3715ad --- /dev/null +++ b/samples/snippets/user_sessions_data.json @@ -0,0 +1,10 @@ +{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"} +{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"} +{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"} +{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"} +{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"} +{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"} +{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"} +{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"} +{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"} +{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"} From e7a54374e65869dc3ee117e6fb4629bec3fce3aa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:01:42 +0200 Subject: [PATCH 068/230] chore(deps): update dependency pytest to v6 (#606) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index a5da1a77d..299d90b65 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index a5da1a77d..299d90b65 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==5.4.3 +pytest==6.2.3 mock==4.0.3 From 9239d1a0bf3a9fccb607122ae17e695a980dc965 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 14 Apr 2021 16:03:06 +0200 Subject: [PATCH 069/230] chore(deps): update dependency google-cloud-bigquery to v2.13.1 (#573) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c5f60911e..6939c07e0 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,3 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 734cdf445..74a18981e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.0 +google-cloud-bigquery==2.13.1 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From f95f415d3441b3928f6cc705cb8a75603d790fd6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Apr 2021 23:08:17 +0200 Subject: [PATCH 070/230] feat: add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` (#575) * feat: add max_queue_size option for BQ Storage API The new parameter allows configuring the maximum size of the internal queue used to hold result pages when query data is streamed over the BigQuery Storage API. * Slightly simplify bits of page streaming logic * Only retain max_queue_size where most relevant * Adjust tests, add support for infinite queue size * Remove deleted param's description --- google/cloud/bigquery/_pandas_helpers.py | 28 +++++++--- google/cloud/bigquery/table.py | 32 +++++++++--- tests/unit/test__pandas_helpers.py | 66 ++++++++++++++++++++++++ 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 412f32754..7553726fa 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -53,6 +53,8 @@ _PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds. +_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads + _PANDAS_DTYPE_TO_BQ = { "bool": "BOOLEAN", "datetime64[ns, UTC]": "TIMESTAMP", @@ -616,6 +618,7 @@ def _download_table_bqstorage( preserve_order=False, selected_fields=None, page_to_item=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): """Use (faster, but billable) BQ Storage API to construct DataFrame.""" @@ -667,7 +670,17 @@ def _download_table_bqstorage( download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. - worker_queue = queue.Queue() + # + # The queue needs to be bounded by default, because if the user code processes the + # fetched result pages too slowly, while at the same time new pages are rapidly being + # fetched from the server, the queue can grow to the point where the process runs + # out of memory. + if max_queue_size is _MAX_QUEUE_SIZE_DEFAULT: + max_queue_size = total_streams + elif max_queue_size is None: + max_queue_size = 0 # unbounded + + worker_queue = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -708,15 +721,12 @@ def _download_table_bqstorage( continue # Return any remaining values after the workers finished. - while not worker_queue.empty(): # pragma: NO COVER + while True: # pragma: NO COVER try: - # Include a timeout because even though the queue is - # non-empty, it doesn't guarantee that a subsequent call to - # get() will not block. - frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + frame = worker_queue.get_nowait() yield frame except queue.Empty: # pragma: NO COVER - continue + break finally: # No need for a lock because reading/replacing a variable is # defined to be an atomic operation in the Python language @@ -729,7 +739,7 @@ def _download_table_bqstorage( def download_arrow_bqstorage( - project_id, table, bqstorage_client, preserve_order=False, selected_fields=None + project_id, table, bqstorage_client, preserve_order=False, selected_fields=None, ): return _download_table_bqstorage( project_id, @@ -749,6 +759,7 @@ def download_dataframe_bqstorage( dtypes, preserve_order=False, selected_fields=None, + max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -758,6 +769,7 @@ def download_dataframe_bqstorage( preserve_order=preserve_order, selected_fields=selected_fields, page_to_item=page_to_item, + max_queue_size=max_queue_size, ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a2366b806..bd5bca30f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1490,13 +1490,12 @@ def _to_page_iterable( if not self._validate_bqstorage(bqstorage_client, False): bqstorage_client = None - if bqstorage_client is not None: - for item in bqstorage_download(): - yield item - return - - for item in tabledata_list_download(): - yield item + result_pages = ( + bqstorage_download() + if bqstorage_client is not None + else tabledata_list_download() + ) + yield from result_pages def _to_arrow_iterable(self, bqstorage_client=None): """Create an iterable of arrow RecordBatches, to process the table as a stream.""" @@ -1622,7 +1621,12 @@ def to_arrow( arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) - def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): + def to_dataframe_iterable( + self, + bqstorage_client=None, + dtypes=None, + max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ): """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1642,6 +1646,17 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): ``dtype`` is used when constructing the series for the column specified. Otherwise, the default pandas behavior is used. + max_queue_size (Optional[int]): + The maximum number of result pages to hold in the internal queue when + streaming query results over the BigQuery Storage API. Ignored if + Storage API is not used. + + By default, the max queue size is set to the number of BQ Storage streams + created by the server. If ``max_queue_size`` is :data:`None`, the queue + size is infinite. + + ..versionadded:: 2.14.0 + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -1665,6 +1680,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None): dtypes, preserve_order=self._preserve_order, selected_fields=self._selected_fields, + max_queue_size=max_queue_size, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index abd725820..43692f4af 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -17,6 +17,7 @@ import decimal import functools import operator +import queue import warnings import mock @@ -41,6 +42,11 @@ from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -1265,6 +1271,66 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.parametrize( + "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", + [ + (3, {"max_queue_size": 2}, 3, 2), # custom queue size + (4, {}, 4, 4), # default queue size + (7, {"max_queue_size": None}, 7, 0), # infinite queue size + ], +) +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage( + module_under_test, + stream_count, + maxsize_kwarg, + expected_call_count, + expected_maxsize, +): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + + queue_used = None # A reference to the queue used by code under test. + + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=["stream/s{i}" for i in range(stream_count)]) + bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), "table-z", + ) + + def fake_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + nonlocal queue_used + queue_used = worker_queue + try: + worker_queue.put_nowait("result_page") + except queue.Full: # pragma: NO COVER + pass + + download_stream = mock.Mock(side_effect=fake_download_stream) + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=download_stream + ): + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, bqstorage_client, **maxsize_kwarg + ) + list(result_gen) + + # Timing-safe, as the method under test should block until the pool shutdown is + # complete, at which point all download stream workers have already been submitted + # to the thread pool. + assert download_stream.call_count == stream_count # once for each stream + assert queue_used.maxsize == expected_maxsize + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From b5a928e5fc6405e08a986e39e3308f86f3f4817f Mon Sep 17 00:00:00 2001 From: Yoshi Automation Bot Date: Thu, 15 Apr 2021 07:55:15 -0700 Subject: [PATCH 071/230] chore: generate PyPI token in secrets manager, fix spacing in docs (via synth) (#612) * docs(python): add empty lines between methods Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 14:41:09 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 721339ab60a6eb63b889978b3d9b295dcb3be370 Source-Link: https://github.com/googleapis/synthtool/commit/721339ab60a6eb63b889978b3d9b295dcb3be370 * build: use PyPI API token in secret manager Migrate python libraries onto the PyPI API token stored in secret manager. A PyPI API token is limited in scope to uploading new releases. https://pypi.org/help/#apitoken Verified that this works with [build](https://fusion2.corp.google.com/invocations/14bae126-83fa-4328-8da9-d390ed99315c/targets/cloud-devrel%2Fclient-libraries%2Fpython%2Fgoogleapis%2Fpython-vision%2Frelease%2Frelease;config=default/log) on https://github.com/googleapis/python-vision/pull/136 Source-Author: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Source-Date: Wed Apr 14 17:46:06 2021 -0600 Source-Repo: googleapis/synthtool Source-Sha: 043cc620d6a6111816d9e09f2a97208565fde958 Source-Link: https://github.com/googleapis/synthtool/commit/043cc620d6a6111816d9e09f2a97208565fde958 --- .kokoro/release.sh | 4 ++-- .kokoro/release/common.cfg | 14 ++------------ docs/_static/custom.css | 13 ++++++++++++- synth.metadata | 6 +++--- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 0e58f0640..3abba6e06 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password") +TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token") cd github/python-bigquery python3 setup.py sdist bdist_wheel -twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/* +twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 18b417709..922d7fe50 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -23,18 +23,8 @@ env_vars: { value: "github/python-bigquery/.kokoro/release.sh" } -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google_cloud_pypi_password" - } - } -} - # Tokens needed to report release status back to GitHub env_vars: { key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} \ No newline at end of file + value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token" +} diff --git a/docs/_static/custom.css b/docs/_static/custom.css index bcd37bbd3..b0a295464 100644 --- a/docs/_static/custom.css +++ b/docs/_static/custom.css @@ -1,9 +1,20 @@ div#python2-eol { border-color: red; border-width: medium; -} +} /* Ensure minimum width for 'Parameters' / 'Returns' column */ dl.field-list > dt { min-width: 100px } + +/* Insert space between methods for readability */ +dl.method { + padding-top: 10px; + padding-bottom: 10px +} + +/* Insert empty space between classes */ +dl.class { + padding-bottom: 50px +} diff --git a/synth.metadata b/synth.metadata index 7221c0f0f..b031618b0 100644 --- a/synth.metadata +++ b/synth.metadata @@ -4,7 +4,7 @@ "git": { "name": ".", "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "8f4c0b84dac3840532d7865247b8ad94b625b897" + "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" } }, { @@ -19,14 +19,14 @@ "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } }, { "git": { "name": "synthtool", "remote": "https://github.com/googleapis/synthtool.git", - "sha": "0a071b3460344886297a304253bf924aa68ddb7e" + "sha": "043cc620d6a6111816d9e09f2a97208565fde958" } } ], From 72d4c4a462f111cfc56e5b878fa641819638d8f5 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 15 Apr 2021 14:56:38 -0400 Subject: [PATCH 072/230] chore: prevent normalization of semver versioning (#611) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 607ffb63f..46a128426 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=version, + version=setuptools.sic(version), description=description, long_description=readme, author="Google LLC", From f75dcdf3943b87daba60011c9a3b42e34ff81910 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Apr 2021 18:40:04 -0500 Subject: [PATCH 073/230] feat: accept job object as argument to `get_job` and `cancel_job` (#617) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows one to more easily cancel or get updated metadata for an existing job from the client class. Ensures that project ID and location are correctly populated. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #616 🦕 --- google/cloud/bigquery/client.py | 63 ++++++++++++++++++++++++++++++--- tests/system/test_client.py | 11 +++--- tests/unit/test_client.py | 43 ++++++++++++++-------- 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 10127e10d..8211e23a3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1734,12 +1734,20 @@ def get_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1757,6 +1765,10 @@ def get_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -1791,12 +1803,20 @@ def cancel_job( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel Args: - job_id (str): Unique job identifier. + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. Keyword Arguments: project (Optional[str]): ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): Location where the job was run. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1814,6 +1834,10 @@ def cancel_job( """ extra_params = {"projection": "full"} + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + if project is None: project = self.project @@ -3518,6 +3542,37 @@ def _item_to_table(iterator, resource): return TableListItem(resource) +def _extract_job_reference(job, project=None, location=None): + """Extract fully-qualified job reference from a job-like object. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + project (Optional[str]): + Project where the job was run. Ignored if ``job_id`` is a job + object. + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + + Returns: + Tuple[str, str, str]: ``(project, location, job_id)`` + """ + if hasattr(job, "job_id"): + project = job.project + job_id = job.job_id + location = job.location + else: + job_id = job + + return (project, location, job_id) + + def _make_job_id(job_id, prefix=None): """Construct an ID for a new job. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 024441012..f31d994ca 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -189,7 +189,9 @@ def test_get_service_account_email(self): def _create_bucket(self, bucket_name, location=None): storage_client = storage.Client() bucket = storage_client.bucket(bucket_name) - retry_storage_errors(bucket.create)(location=location) + retry_storage_errors(storage_client.create_bucket)( + bucket_name, location=location + ) self.to_delete.append(bucket) return bucket @@ -872,7 +874,7 @@ def test_load_table_from_file_w_explicit_location(self): job_id = load_job.job_id # Can get the job from the EU. - load_job = client.get_job(job_id, location="EU") + load_job = client.get_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) self.assertTrue(load_job.exists()) @@ -889,7 +891,7 @@ def test_load_table_from_file_w_explicit_location(self): # Can cancel the job from the EU. self.assertTrue(load_job.cancel()) - load_job = client.cancel_job(job_id, location="EU") + load_job = client.cancel_job(load_job) self.assertEqual(job_id, load_job.job_id) self.assertEqual("EU", load_job.location) @@ -1204,8 +1206,7 @@ def test_query_w_timeout(self): # Even though the query takes >1 second, the call to getQueryResults # should succeed. self.assertFalse(query_job.done(timeout=1)) - - Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location) + self.assertIsNotNone(Config.CLIENT.cancel_job(query_job)) def test_query_w_page_size(self): page_size = 45 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 96e51678f..c5e742c9e 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2933,31 +2933,30 @@ def test_get_job_miss_w_explict_project(self): conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION) + client.get_job(JOB_ID, project=OTHER_PROJECT) conn.api_request.assert_called_once_with( method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + query_params={"projection": "full"}, timeout=None, ) def test_get_job_miss_w_client_location(self): from google.cloud.exceptions import NotFound - OTHER_PROJECT = "OTHER_PROJECT" JOB_ID = "NONESUCH" creds = _make_credentials() - client = self._make_one(self.PROJECT, creds, location=self.LOCATION) + client = self._make_one("client-proj", creds, location="client-loc") conn = client._connection = make_connection() with self.assertRaises(NotFound): - client.get_job(JOB_ID, project=OTHER_PROJECT) + client.get_job(JOB_ID) conn.api_request.assert_called_once_with( method="GET", - path="/projects/OTHER_PROJECT/jobs/NONESUCH", - query_params={"projection": "full", "location": self.LOCATION}, + path="/projects/client-proj/jobs/NONESUCH", + query_params={"projection": "full", "location": "client-loc"}, timeout=None, ) @@ -2971,7 +2970,11 @@ def test_get_job_hit_w_timeout(self): QUERY = "SELECT * from test_dataset:test_table" ASYNC_QUERY_DATA = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "resource-proj", + "jobId": "query_job", + "location": "us-east1", + }, "state": "DONE", "configuration": { "query": { @@ -2989,18 +2992,21 @@ def test_get_job_hit_w_timeout(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(ASYNC_QUERY_DATA) + job_from_resource = QueryJob.from_api_repr(ASYNC_QUERY_DATA, client) - job = client.get_job(JOB_ID, timeout=7.5) + job = client.get_job(job_from_resource, timeout=7.5) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "resource-proj") + self.assertEqual(job.location, "us-east1") self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED) self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE) conn.api_request.assert_called_once_with( method="GET", - path="/projects/PROJECT/jobs/query_job", - query_params={"projection": "full"}, + path="/projects/resource-proj/jobs/query_job", + query_params={"projection": "full", "location": "us-east1"}, timeout=7.5, ) @@ -3049,7 +3055,11 @@ def test_cancel_job_hit(self): QUERY = "SELECT * from test_dataset:test_table" QUERY_JOB_RESOURCE = { "id": "{}:{}".format(self.PROJECT, JOB_ID), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "asia-northeast1", + }, "state": "RUNNING", "configuration": {"query": {"query": QUERY}}, } @@ -3057,17 +3067,20 @@ def test_cancel_job_hit(self): creds = _make_credentials() client = self._make_one(self.PROJECT, creds) conn = client._connection = make_connection(RESOURCE) + job_from_resource = QueryJob.from_api_repr(QUERY_JOB_RESOURCE, client) - job = client.cancel_job(JOB_ID) + job = client.cancel_job(job_from_resource) self.assertIsInstance(job, QueryJob) self.assertEqual(job.job_id, JOB_ID) + self.assertEqual(job.project, "job-based-proj") + self.assertEqual(job.location, "asia-northeast1") self.assertEqual(job.query, QUERY) conn.api_request.assert_called_once_with( method="POST", - path="/projects/PROJECT/jobs/query_job/cancel", - query_params={"projection": "full"}, + path="/projects/job-based-proj/jobs/query_job/cancel", + query_params={"projection": "full", "location": "asia-northeast1"}, timeout=None, ) From e0b373d0e721a70656ed8faceb7f5c70f642d144 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:19:36 +0200 Subject: [PATCH 074/230] feat: DB API cursors are now iterable (#618) * feat: make DB API Cursors iterable * Raise error if obtaining iterator of closed Cursor --- google/cloud/bigquery/dbapi/_helpers.py | 2 +- google/cloud/bigquery/dbapi/cursor.py | 4 ++++ tests/unit/test_dbapi_cursor.py | 24 ++++++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 69694c98c..beb3c5e71 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -276,7 +276,7 @@ def decorate_public_methods(klass): """Apply ``_raise_on_closed()`` decorator to public instance methods. """ for name in dir(klass): - if name.startswith("_"): + if name.startswith("_") and name != "__iter__": continue member = getattr(klass, name) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index ee09158d8..7e5449718 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -365,6 +365,10 @@ def setinputsizes(self, sizes): def setoutputsize(self, size, column=None): """No-op, but for consistency raise an error if cursor is closed.""" + def __iter__(self): + self._try_fetch() + return iter(self._query_data) + def _format_operation_list(operation, parameters): """Formats parameters in operation in the way BigQuery expects. diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 0f44e3895..8ca4e9b6c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -178,6 +178,7 @@ def test_raises_error_if_closed(self): "fetchone", "setinputsizes", "setoutputsize", + "__iter__", ) for method in method_names: @@ -611,6 +612,29 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_is_iterable(self): + from google.cloud.bigquery import dbapi + + connection = dbapi.connect( + self._mock_client(rows=[("hello", "there", 7), ("good", "bye", -3)]) + ) + cursor = connection.cursor() + cursor.execute("SELECT foo, bar, baz FROM hello_world WHERE baz < 42;") + + rows_iter = iter(cursor) + + row = next(rows_iter) + self.assertEqual(row, ("hello", "there", 7)) + row = next(rows_iter) + self.assertEqual(row, ("good", "bye", -3)) + self.assertRaises(StopIteration, next, rows_iter) + + self.assertEqual( + list(cursor), + [], + "Iterating again over the same results should produce no rows.", + ) + def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor From 6502a602337ae562652a20b20270949f2c9d5073 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 16 Apr 2021 17:52:07 +0200 Subject: [PATCH 075/230] fix: consistent percents handling in DB API query (#619) Fixes #608. Percents in the query string are now always de-escaped, regardless of whether any query parameters are passed or not. In addition, misformatting placeholders that don't match parameter values now consistently raise `ProgrammingError`. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/dbapi/cursor.py | 6 +-- tests/unit/test_dbapi_cursor.py | 53 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 7e5449718..ca78d3907 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -393,7 +393,7 @@ def _format_operation_list(operation, parameters): try: return operation % tuple(formatted_params) - except TypeError as exc: + except (TypeError, ValueError) as exc: raise exceptions.ProgrammingError(exc) @@ -423,7 +423,7 @@ def _format_operation_dict(operation, parameters): try: return operation % formatted_params - except KeyError as exc: + except (KeyError, ValueError, TypeError) as exc: raise exceptions.ProgrammingError(exc) @@ -445,7 +445,7 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation + return operation.replace("%%", "%") # Still do percent de-escaping. if isinstance(parameters, collections_abc.Mapping): return _format_operation_dict(operation, parameters) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 8ca4e9b6c..039ef3b4c 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -657,6 +657,14 @@ def test__format_operation_w_wrong_dict(self): {"somevalue-not-here": "hi", "othervalue": "world"}, ) + def test__format_operation_w_redundant_dict_key(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation( + "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} + ) + self.assertEqual(formatted_operation, "SELECT @`somevalue`;") + def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor @@ -676,8 +684,53 @@ def test__format_operation_w_too_short_sequence(self): ("hello",), ) + def test__format_operation_w_too_long_sequence(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s;", + ("hello", "world", "everyone"), + ) + def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor formatted_operation = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") + + def test__format_operation_wo_params_single_percent(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_wo_params_double_percents(self): + from google.cloud.bigquery.dbapi import cursor + + formatted_operation = cursor._format_operation("SELECT '%%'", {}) + self.assertEqual(formatted_operation, "SELECT '%'") + + def test__format_operation_unescaped_percent_w_dict_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %(foo)s, '100 %';", + {"foo": "bar"}, + ) + + def test__format_operation_unescaped_percent_w_list_param(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT %s, %s, '100 %';", + ["foo", "bar"], + ) From 6ee582413c9b83fe8c853393d20090ed9d2e8b77 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:49:42 +0200 Subject: [PATCH 076/230] chore: add unit test nox session w/o extras (#623) --- noxfile.py | 12 ++++++++++-- tests/unit/test__pandas_helpers.py | 1 + tests/unit/test_client.py | 7 ++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index a738d8c00..bde3b990e 100644 --- a/noxfile.py +++ b/noxfile.py @@ -31,6 +31,7 @@ # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ + "unit_noextras", "unit", "system", "snippets", @@ -42,7 +43,7 @@ ] -def default(session): +def default(session, install_extras=True): """Default unit test session. This is intended to be run **without** an interpreter set, so @@ -65,7 +66,8 @@ def default(session): constraints_path, ) - session.install("-e", ".[all]", "-c", constraints_path) + install_target = ".[all]" if install_extras else "." + session.install("-e", install_target, "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -90,6 +92,12 @@ def unit(session): default(session) +@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1]) +def unit_noextras(session): + """Run the unit test suite.""" + default(session, install_extras=False) + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 43692f4af..39a3d845b 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1464,6 +1464,7 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) result = next(results_gen) +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c5e742c9e..860f25f35 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -65,7 +65,12 @@ from tests.unit.helpers import make_connection PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") -PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") def _make_credentials(): From 34ecc3f1ca0ff073330c0c605673d89b43af7ed9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 22:54:32 +0200 Subject: [PATCH 077/230] feat: retry google.auth TransportError by default (#624) --- google/cloud/bigquery/retry.py | 2 ++ tests/unit/test_retry.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 20a8e7b13..5e9075fe1 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -14,6 +14,7 @@ from google.api_core import exceptions from google.api_core import retry +from google.auth import exceptions as auth_exceptions import requests.exceptions @@ -27,6 +28,7 @@ exceptions.InternalServerError, exceptions.BadGateway, requests.exceptions.ConnectionError, + auth_exceptions.TransportError, ) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 318a54d34..0bef1e5e1 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -51,6 +51,12 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): + from google.auth.exceptions import TransportError + + exc = TransportError("testing") + self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_too_many_requests(self): from google.api_core.exceptions import TooManyRequests From f8d4aaa335a0eef915e73596fc9b43b11d11be9f Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Apr 2021 23:49:26 +0200 Subject: [PATCH 078/230] feat: add type hints for public methods (#613) * feat: add type hint for public methods * feat: add bigquery-storage in requirement file * feat: add pandas in requirement file * feat: add return type hint * feat: remove pandas import as a string * Use the latest pytype version (2021.4.9) * Silence false import and module attribute errors * Fix misc. pytype warnings and false postiives * Make changes to generated files persistent * Make final cleanup of client.py * Change import ignores to more specific errors * Silence false positive type warning in job config * Silence noisy _helper type warnings * Silence false positives for resumable media code * Add pytype to nox.options.sessions * Hide for-type-check-only imports behind a flag * Remove obsolete skipIf decorator from two tests inspect.signature() was added in Python 3.3, and the library only needs to suppport Python3.6+. * Install dependencies in pytype session This avoids numerous unnecessary import and module attribute errors, rendering lots of pytype directive comments obsolete. * Be more specific about to_dataframe()'s return type * Add missing return type for _get_query_results() * Be more specific about pandas/pyarrow return types * Exclude typing-only imports from coverage checks Co-authored-by: HemangChothani Co-authored-by: Tim Swast --- .gitignore | 1 + google/cloud/bigquery/_http.py | 3 +- google/cloud/bigquery/_pandas_helpers.py | 2 + google/cloud/bigquery/client.py | 549 +++++++++++------- google/cloud/bigquery/dataset.py | 16 +- google/cloud/bigquery/external_config.py | 28 +- google/cloud/bigquery/job/base.py | 43 +- google/cloud/bigquery/job/extract.py | 2 +- google/cloud/bigquery/job/load.py | 2 +- google/cloud/bigquery/job/query.py | 55 +- .../bigquery/magics/line_arg_parser/lexer.py | 2 +- google/cloud/bigquery/model.py | 10 +- google/cloud/bigquery/query.py | 20 +- google/cloud/bigquery/routine/routine.py | 22 +- google/cloud/bigquery/schema.py | 13 +- google/cloud/bigquery/table.py | 75 ++- noxfile.py | 11 + samples/geography/requirements.txt | 1 + setup.cfg | 14 + synth.py | 29 + tests/unit/test_signature_compatibility.py | 8 - 21 files changed, 575 insertions(+), 331 deletions(-) diff --git a/.gitignore b/.gitignore index b4243ced7..99c3a1444 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ pip-log.txt .nox .cache .pytest_cache +.pytype # Mac diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py index ede26cc70..81e7922e6 100644 --- a/google/cloud/bigquery/_http.py +++ b/google/cloud/bigquery/_http.py @@ -17,8 +17,7 @@ import os import pkg_resources -from google.cloud import _http - +from google.cloud import _http # pytype: disable=import-error from google.cloud.bigquery import __version__ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 7553726fa..e93a99eba 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -373,6 +373,7 @@ def augment_schema(dataframe, current_bq_schema): Returns: Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] """ + # pytype: disable=attribute-error augmented_schema = [] unknown_type_fields = [] @@ -406,6 +407,7 @@ def augment_schema(dataframe, current_bq_schema): return None return augmented_schema + # pytype: enable=attribute-error def dataframe_to_arrow(dataframe, bq_schema): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8211e23a3..5aa8608a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -19,6 +19,7 @@ from collections import abc as collections_abc import copy +import datetime import functools import gzip import io @@ -27,6 +28,7 @@ import math import os import tempfile +from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid import warnings @@ -35,17 +37,18 @@ except ImportError: # pragma: NO COVER pyarrow = None -from google import resumable_media +from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload from google.resumable_media.requests import ResumableUpload import google.api_core.client_options -import google.api_core.exceptions +import google.api_core.exceptions as core_exceptions from google.api_core.iam import Policy from google.api_core import page_iterator +from google.api_core import retry as retries import google.cloud._helpers -from google.cloud import exceptions -from google.cloud.client import ClientWithProject +from google.cloud import exceptions # pytype: disable=import-error +from google.cloud.client import ClientWithProject # pytype: disable=import-error from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop @@ -59,6 +62,13 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job +from google.cloud.bigquery.job import ( + LoadJobConfig, + QueryJob, + QueryJobConfig, + CopyJobConfig, + ExtractJobConfig, +) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref @@ -216,8 +226,11 @@ def close(self): self._http.close() def get_service_account_email( - self, project=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> str: """Get the email address of the project's BigQuery service account Note: @@ -259,8 +272,12 @@ def get_service_account_email( return api_response["email"] def list_projects( - self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List projects for the project associated with this client. See @@ -313,14 +330,14 @@ def api_request(*args, **kwargs): def list_datasets( self, - project=None, - include_all=False, - filter=None, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + project: str = None, + include_all: bool = False, + filter: str = None, + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List datasets for the project associated with this client. See @@ -390,7 +407,7 @@ def api_request(*args, **kwargs): extra_params=extra_params, ) - def dataset(self, dataset_id, project=None): + def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: """Deprecated: Construct a reference to a dataset. .. deprecated:: 1.24.0 @@ -466,8 +483,12 @@ def _dataset_from_arg(self, dataset): return dataset def create_dataset( - self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + dataset: Union[str, Dataset, DatasetReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """API call: create the dataset via a POST request. See @@ -531,14 +552,18 @@ def create_dataset( timeout=timeout, ) return Dataset.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_dataset(dataset.reference, retry=retry) def create_routine( - self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None - ): + self, + routine: Routine, + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Create a routine via a POST request. See @@ -582,12 +607,18 @@ def create_routine( timeout=timeout, ) return Routine.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_routine(routine.reference, retry=retry) - def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None): + def create_table( + self, + table: Union[str, Table, TableReference], + exists_ok: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """API call: create a table via a PUT request See @@ -636,7 +667,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None timeout=timeout, ) return Table.from_api_repr(api_response) - except google.api_core.exceptions.Conflict: + except core_exceptions.Conflict: if not exists_ok: raise return self.get_table(table.reference, retry=retry) @@ -654,7 +685,12 @@ def _call_api( return call() return call() - def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): + def get_dataset( + self, + dataset_ref: Union[DatasetReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` Args: @@ -693,8 +729,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None): return Dataset.from_api_repr(api_response) def get_iam_policy( - self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + requested_policy_version: int = 1, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -718,8 +758,13 @@ def get_iam_policy( return Policy.from_api_repr(response) def set_iam_policy( - self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + policy: Policy, + updateMask: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -747,8 +792,12 @@ def set_iam_policy( return Policy.from_api_repr(response) def test_iam_permissions( - self, table, permissions, retry=DEFAULT_RETRY, timeout=None, - ): + self, + table: Union[Table, TableReference], + permissions: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -768,7 +817,12 @@ def test_iam_permissions( return response - def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): + def get_model( + self, + model_ref: Union[ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. Args: @@ -806,7 +860,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): + def get_routine( + self, + routine_ref: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. Args: @@ -845,7 +904,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): + def get_table( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Fetch the table referenced by ``table``. Args: @@ -881,7 +945,13 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None): ) return Table.from_api_repr(api_response) - def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): + def update_dataset( + self, + dataset: Dataset, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Dataset: """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field @@ -945,7 +1015,13 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None): ) return Dataset.from_api_repr(api_response) - def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): + def update_model( + self, + model: Model, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Model: """[Beta] Change some fields of a model. Use ``fields`` to specify which fields to update. At least one field @@ -1003,7 +1079,13 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None): ) return Model.from_api_repr(api_response) - def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): + def update_routine( + self, + routine: Routine, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Routine: """[Beta] Change some fields of a routine. Use ``fields`` to specify which fields to update. At least one field @@ -1071,7 +1153,13 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None): ) return Routine.from_api_repr(api_response) - def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): + def update_table( + self, + table: Table, + fields: Sequence[str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Table: """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field @@ -1132,12 +1220,12 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None): def list_models( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List models in the dataset. See @@ -1204,12 +1292,12 @@ def api_request(*args, **kwargs): def list_routines( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. See @@ -1276,12 +1364,12 @@ def api_request(*args, **kwargs): def list_tables( self, - dataset, - max_results=None, - page_token=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + dataset: Union[Dataset, DatasetReference, str], + max_results: int = None, + page_token: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> page_iterator.Iterator: """List tables in the dataset. See @@ -1347,12 +1435,12 @@ def api_request(*args, **kwargs): def delete_dataset( self, - dataset, - delete_contents=False, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, - ): + dataset: Union[Dataset, DatasetReference, str], + delete_contents: bool = False, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a dataset. See @@ -1401,13 +1489,17 @@ def delete_dataset( query_params=params, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_model( - self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + model: Union[Model, ModelReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a model See @@ -1449,13 +1541,17 @@ def delete_model( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_routine( - self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + routine: Union[Routine, RoutineReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """[Beta] Delete a routine. See @@ -1499,13 +1595,17 @@ def delete_routine( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def delete_table( - self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False - ): + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + not_found_ok: bool = False, + ) -> None: """Delete a table See @@ -1545,13 +1645,19 @@ def delete_table( path=path, timeout=timeout, ) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: if not not_found_ok: raise def _get_query_results( - self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None, - ): + self, + job_id: str, + retry: retries.Retry, + project: str = None, + timeout_ms: int = None, + location: str = None, + timeout: float = None, + ) -> _QueryResults: """Get the query results object for a query job. Args: @@ -1609,7 +1715,7 @@ def _get_query_results( ) return _QueryResults.from_api_repr(resource) - def job_from_resource(self, resource): + def job_from_resource(self, resource: dict) -> job.UnknownJob: """Detect correct job type from resource and instantiate. Args: @@ -1635,7 +1741,12 @@ def job_from_resource(self, resource): return job.QueryJob.from_api_repr(resource, self) return job.UnknownJob.from_api_repr(resource, self) - def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): + def create_job( + self, + job_config: dict, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: job_config (dict): configuration job representation returned from the API. @@ -1726,8 +1837,13 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None): raise TypeError("Invalid job configuration received.") def get_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. See @@ -1795,8 +1911,13 @@ def get_job( return self.job_from_resource(resource) def cancel_job( - self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None - ): + self, + job_id: str, + project: str = None, + location: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. See @@ -1865,17 +1986,17 @@ def cancel_job( def list_jobs( self, - project=None, - parent_job=None, - max_results=None, - page_token=None, - all_users=None, - state_filter=None, - retry=DEFAULT_RETRY, - timeout=None, - min_creation_time=None, - max_creation_time=None, - ): + project: str = None, + parent_job: Optional[Union[QueryJob, str]] = None, + max_results: int = None, + page_token: str = None, + all_users: bool = None, + state_filter: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + min_creation_time: datetime.datetime = None, + max_creation_time: datetime.datetime = None, + ) -> page_iterator.Iterator: """List jobs for the project associated with this client. See @@ -1926,7 +2047,7 @@ def list_jobs( Iterable of job instances. """ if isinstance(parent_job, job._AsyncJob): - parent_job = parent_job.job_id + parent_job = parent_job.job_id # pytype: disable=attribute-error extra_params = { "allUsers": all_users, @@ -1975,16 +2096,16 @@ def api_request(*args, **kwargs): def load_table_from_uri( self, - source_uris, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + source_uris: Union[str, Sequence[str]], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. See @@ -2057,18 +2178,18 @@ def load_table_from_uri( def load_table_from_file( self, - file_obj, - destination, - rewind=False, - size=None, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + file_obj: BinaryIO, + destination: Union[Table, TableReference, str], + rewind: bool = False, + size: int = None, + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of this table from a file-like object. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2162,16 +2283,16 @@ def load_table_from_file( def load_table_from_dataframe( self, dataframe, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - parquet_compression="snappy", - timeout=None, - ): + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + parquet_compression: str = "snappy", + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. Similar to :meth:`load_table_from_uri`, this method creates, starts and @@ -2299,7 +2420,7 @@ def load_table_from_dataframe( ): try: table = self.get_table(destination) - except google.api_core.exceptions.NotFound: + except core_exceptions.NotFound: table = None else: columns_and_indexes = frozenset( @@ -2388,16 +2509,16 @@ def load_table_from_dataframe( def load_table_from_json( self, - json_rows, - destination, - num_retries=_DEFAULT_NUM_RETRIES, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - timeout=None, - ): + json_rows: Iterable[Dict[str, Any]], + destination: Union[Table, TableReference, str], + num_retries: int = _DEFAULT_NUM_RETRIES, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: LoadJobConfig = None, + timeout: float = None, + ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. Args: @@ -2669,16 +2790,18 @@ def _do_multipart_upload( def copy_table( self, - sources, - destination, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + sources: Union[ + Table, TableReference, str, Sequence[Union[Table, TableReference, str]] + ], + destination: Union[Table, TableReference, str], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: CopyJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.CopyJob: """Copy one or more tables to another table. See @@ -2772,17 +2895,17 @@ def copy_table( def extract_table( self, - source, - destination_uris, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - job_config=None, - retry=DEFAULT_RETRY, - timeout=None, - source_type="Table", - ): + source: Union[Table, TableReference, Model, ModelReference, str], + destination_uris: Union[str, Sequence[str]], + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + job_config: ExtractJobConfig = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + source_type: str = "Table", + ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. See @@ -2871,15 +2994,15 @@ def extract_table( def query( self, - query, - job_config=None, - job_id=None, - job_id_prefix=None, - location=None, - project=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + query: str, + job_config: QueryJobConfig = None, + job_id: str = None, + job_id_prefix: str = None, + location: str = None, + project: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> job.QueryJob: """Run a SQL query. See @@ -2956,7 +3079,13 @@ def query( return query_job - def insert_rows(self, table, rows, selected_fields=None, **kwargs): + def insert_rows( + self, + table: Union[Table, TableReference, str], + rows: Union[Iterable[Tuple], Iterable[Dict]], + selected_fields: Sequence[SchemaField] = None, + **kwargs: dict, + ) -> Sequence[dict]: """Insert rows into a table via the streaming API. See @@ -2979,7 +3108,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): The fields to return. Required if ``table`` is a :class:`~google.cloud.bigquery.table.TableReference`. - kwargs (Dict): + kwargs (dict): Keyword arguments to :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. @@ -3019,8 +3148,13 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs): return self.insert_rows_json(table, json_rows, **kwargs) def insert_rows_from_dataframe( - self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs - ): + self, + table: Union[Table, TableReference, str], + dataframe, + selected_fields: Sequence[SchemaField] = None, + chunk_size: int = 500, + **kwargs: Dict, + ) -> Sequence[Sequence[dict]]: """Insert rows into a table from a dataframe via the streaming API. Args: @@ -3068,15 +3202,15 @@ def insert_rows_from_dataframe( def insert_rows_json( self, - table, - json_rows, - row_ids=None, - skip_invalid_rows=None, - ignore_unknown_values=None, - template_suffix=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableReference, str], + json_rows: Sequence[Dict], + row_ids: Sequence[str] = None, + skip_invalid_rows: bool = None, + ignore_unknown_values: bool = None, + template_suffix: str = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. See @@ -3172,7 +3306,12 @@ def insert_rows_json( return errors - def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): + def list_partitions( + self, + table: Union[Table, TableReference, str], + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> Sequence[str]: """List the partitions in a table. Args: @@ -3214,15 +3353,15 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None): def list_rows( self, - table, - selected_fields=None, - max_results=None, - page_token=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + table: Union[Table, TableListItem, TableReference, str], + selected_fields: Sequence[SchemaField] = None, + max_results: int = None, + page_token: str = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of the table. See @@ -3323,18 +3462,18 @@ def list_rows( def _list_rows_from_query_results( self, - job_id, - location, - project, - schema, - total_rows=None, - destination=None, - max_results=None, - start_index=None, - page_size=None, - retry=DEFAULT_RETRY, - timeout=None, - ): + job_id: str, + location: str, + project: str, + schema: SchemaField, + total_rows: int = None, + destination: Union[Table, TableReference, TableListItem, str] = None, + max_results: int = None, + start_index: int = None, + page_size: int = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = None, + ) -> RowIterator: """List the rows of a completed query. See https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults @@ -3419,7 +3558,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj): """ json.dump(schema_list, file_obj, indent=2, sort_keys=True) - def schema_from_json(self, file_or_path): + def schema_from_json(self, file_or_path: Union[str, BinaryIO]): """Takes a file object or file path that contains json that describes a table schema. @@ -3432,7 +3571,9 @@ def schema_from_json(self, file_or_path): with open(file_or_path) as file_obj: return self._schema_from_json_file_object(file_obj) - def schema_to_json(self, schema_list, destination): + def schema_to_json( + self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO] + ): """Takes a list of schema field objects. Serializes the list of schema field objects as json to a file. @@ -3606,7 +3747,7 @@ def _check_mode(stream): mode = getattr(stream, "mode", None) if isinstance(stream, gzip.GzipFile): - if mode != gzip.READ: + if mode != gzip.READ: # pytype: disable=module-attr raise ValueError( "Cannot upload gzip files opened in write mode: use " "gzip.GzipFile(filename, mode='rb')" diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 2d3a4755f..21e56f305 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -220,7 +220,7 @@ def to_api_repr(self): return resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "AccessEntry": """Factory: construct an access entry given its API representation Args: @@ -288,7 +288,7 @@ def path(self): routine = _get_routine_reference @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "DatasetReference": """Factory: construct a dataset reference given its API representation Args: @@ -304,7 +304,9 @@ def from_api_repr(cls, resource): return cls(project, dataset_id) @classmethod - def from_string(cls, dataset_id, default_project=None): + def from_string( + cls, dataset_id: str, default_project: str = None + ) -> "DatasetReference": """Construct a dataset reference from dataset ID string. Args: @@ -350,7 +352,7 @@ def from_string(cls, dataset_id, default_project=None): return cls(output_project_id, output_dataset_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset reference Returns: @@ -640,7 +642,7 @@ def default_encryption_configuration(self, value): self._properties["defaultEncryptionConfiguration"] = api_repr @classmethod - def from_string(cls, full_dataset_id): + def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. Args: @@ -664,7 +666,7 @@ def from_string(cls, full_dataset_id): return cls(DatasetReference.from_string(full_dataset_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Dataset": """Factory: construct a dataset given its API representation Args: @@ -689,7 +691,7 @@ def from_api_repr(cls, resource): dataset._properties = copy.deepcopy(resource) return dataset - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this dataset Returns: diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 59e4960f9..ef4d569fa 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -149,7 +149,7 @@ def type_(self): def type_(self, value): self._properties["type"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -159,7 +159,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumn": """Factory: construct a :class:`~.external_config.BigtableColumn` instance given its API representation. @@ -251,7 +251,7 @@ def columns(self): def columns(self, value): self._properties["columns"] = [col.to_api_repr() for col in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -261,7 +261,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": """Factory: construct a :class:`~.external_config.BigtableColumnFamily` instance given its API representation. @@ -333,7 +333,7 @@ def column_families(self): def column_families(self, value): self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -343,7 +343,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "BigtableOptions": """Factory: construct a :class:`~.external_config.BigtableOptions` instance given its API representation. @@ -450,7 +450,7 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -459,7 +459,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "CSVOptions": """Factory: construct a :class:`~.external_config.CSVOptions` instance given its API representation. @@ -513,7 +513,7 @@ def range(self): def range(self, value): self._properties["range"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -522,7 +522,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` instance given its API representation. @@ -601,7 +601,7 @@ def require_partition_filter(self): def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -610,7 +610,7 @@ def to_api_repr(self): return copy.deepcopy(self._properties) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions": """Factory: construct a :class:`~.external_config.HivePartitioningOptions` instance given its API representation. @@ -784,7 +784,7 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of this object. Returns: @@ -799,7 +799,7 @@ def to_api_repr(self): return config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ExternalConfig": """Factory: construct an :class:`~.external_config.ExternalConfig` instance given its API representation. diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f24e972c8..20ad81c0b 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -18,6 +18,7 @@ import copy import http import threading +import typing from google.api_core import exceptions import google.api_core.future.polling @@ -25,6 +26,9 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery.retry import DEFAULT_RETRY +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.api_core import retry as retries + _DONE_STATE = "DONE" _STOPPED_REASON = "stopped" @@ -466,7 +470,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def exists( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: test for the existence of the job via a GET request See @@ -509,7 +515,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None): else: return True - def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def reload( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """API call: refresh job properties via a GET request. See @@ -544,7 +552,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None): ) self._set_properties(api_response) - def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None): + def cancel( + self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> bool: """API call: cancel job via a POST request See @@ -610,7 +620,12 @@ def _set_future_result(self): else: self.set_result(self) - def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): + def done( + self, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + reload: bool = True, + ) -> bool: """Checks if the job is complete. Args: @@ -633,7 +648,9 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True): self.reload(retry=retry, timeout=timeout) return self.state == _DONE_STATE - def result(self, retry=DEFAULT_RETRY, timeout=None): + def result( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ) -> "_AsyncJob": """Start the job and wait for it to complete and get the result. Args: @@ -788,7 +805,7 @@ def _del_sub_prop(self, key): """ _helpers._del_sub_prop(self._properties, [self._job_type, key]) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the job config. Returns: @@ -818,7 +835,10 @@ def _fill_from_default(self, default_job_config): + repr(default_job_config._job_type) ) - new_job_config = self.__class__() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + new_job_config = self.__class__() # pytype: disable=missing-parameter default_job_properties = copy.deepcopy(default_job_config._properties) for key in self._properties: @@ -831,7 +851,7 @@ def _fill_from_default(self, default_job_config): return new_job_config @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "_JobConfig": """Factory: construct a job configuration given its API representation Args: @@ -842,7 +862,10 @@ def from_api_repr(cls, resource): Returns: google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``. """ - job_config = cls() + # cls is one of the job config subclasses that provides the job_type argument to + # this base class on instantiation, thus missing-parameter warning is a false + # positive here. + job_config = cls() # pytype: disable=missing-parameter job_config._properties = resource return job_config @@ -929,7 +952,7 @@ class UnknownJob(_AsyncJob): """A job whose type cannot be determined.""" @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "UnknownJob": """Construct an UnknownJob from the JSON representation. Args: diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py index a6e262a32..3373bcdef 100644 --- a/google/cloud/bigquery/job/extract.py +++ b/google/cloud/bigquery/job/extract.py @@ -241,7 +241,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "ExtractJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e784af0a6..b8174af3e 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -733,7 +733,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "LoadJob": """Factory: construct a job given its API representation .. note: diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 491983f8e..f52f9c621 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -17,6 +17,8 @@ import concurrent.futures import copy import re +import typing +from typing import Any, Dict, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -46,6 +48,15 @@ from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference +if typing.TYPE_CHECKING: # pragma: NO COVER + # Assumption: type checks are only used by library developers and CI environments + # that have all optional dependencies installed, thus no conditional imports. + import pandas + import pyarrow + from google.api_core import retry as retries + from google.cloud import bigquery_storage + from google.cloud.bigquery.table import RowIterator + _CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE) _TIMEOUT_BUFFER_SECS = 0.1 @@ -491,7 +502,7 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Build an API representation of the query job config. Returns: @@ -718,7 +729,7 @@ def to_api_repr(self): } @classmethod - def from_api_repr(cls, resource, client): + def from_api_repr(cls, resource: dict, client) -> "QueryJob": """Factory: construct a job given its API representation Args: @@ -1036,7 +1047,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None): exc.query_job = self raise - def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None): + def _reload_query_results( + self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None + ): """Refresh the cached query results. Args: @@ -1111,12 +1124,12 @@ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None): def result( self, - page_size=None, - max_results=None, - retry=DEFAULT_RETRY, - timeout=None, - start_index=None, - ): + page_size: int = None, + max_results: int = None, + retry: "retries.Retry" = DEFAULT_RETRY, + timeout: float = None, + start_index: int = None, + ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. Args: @@ -1196,10 +1209,10 @@ def result( # changes to table.RowIterator.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1265,12 +1278,12 @@ def to_arrow( # changes to table.RowIterator.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob Args: @@ -1350,7 +1363,7 @@ def __init__(self, kind, substeps): self.substeps = list(substeps) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep": """Factory: construct instance from the JSON repr. Args: @@ -1380,7 +1393,7 @@ def __init__(self): self._properties = {} @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "QueryPlanEntry": """Factory: construct instance from the JSON repr. Args: diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py index 5a6ee1a83..cd809c389 100644 --- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py +++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py @@ -98,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values): return name -TokenType = AutoStrEnum( +TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types "TokenType", [ (name, enum.auto()) diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 55846bd1a..2d3f6660f 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -279,7 +279,7 @@ def encryption_configuration(self, value): self._properties["encryptionConfiguration"] = api_repr @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Model": """Factory: construct a model resource given its API representation Args: @@ -322,7 +322,7 @@ def _build_resource(self, filter_fields): def __repr__(self): return "Model(reference={})".format(repr(self.reference)) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model. Returns: @@ -389,7 +389,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, model_id, default_project=None): + def from_string( + cls, model_id: str, default_project: str = None + ) -> "ModelReference": """Construct a model reference from model ID string. Args: @@ -417,7 +419,7 @@ def from_string(cls, model_id, default_project=None): {"projectId": proj, "datasetId": dset, "modelId": model} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this model reference. Returns: diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 495c4effb..3751eb124 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -286,7 +286,7 @@ class _AbstractQueryParameter(object): """ @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -297,7 +297,7 @@ def from_api_repr(cls, resource): """ raise NotImplementedError - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -329,7 +329,7 @@ def __init__(self, name, type_, value): self.value = value @classmethod - def positional(cls, type_, value): + def positional(cls, type_: str, value) -> "ScalarQueryParameter": """Factory for positional paramater. Args: @@ -347,7 +347,7 @@ def positional(cls, type_, value): return cls(None, type_, value) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -369,7 +369,7 @@ def from_api_repr(cls, resource): return cls(name, type_, converted) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -441,7 +441,7 @@ def __init__(self, name, array_type, values): self.array_type = array_type @classmethod - def positional(cls, array_type, values): + def positional(cls, array_type: str, values: list) -> "ArrayQueryParameter": """Factory for positional parameters. Args: @@ -490,7 +490,7 @@ def _from_api_repr_scalar(cls, resource): return cls(name, array_type, converted) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "ArrayQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -504,7 +504,7 @@ def from_api_repr(cls, resource): return cls._from_api_repr_struct(resource) return cls._from_api_repr_scalar(resource) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: @@ -623,7 +623,7 @@ def positional(cls, *sub_params): return cls(None, *sub_params) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "StructQueryParameter": """Factory: construct parameter from JSON resource. Args: @@ -663,7 +663,7 @@ def from_api_repr(cls, resource): instance.struct_values[key] = converted return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct JSON API representation for the parameter. Returns: diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 103799e8f..bbc0a7693 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -266,7 +266,7 @@ def determinism_level(self, value): self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. Args: @@ -281,7 +281,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine. Returns: @@ -387,7 +387,7 @@ def data_type(self, value): self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineArgument": """Factory: construct a routine argument given its API representation. Args: @@ -401,7 +401,7 @@ def from_api_repr(cls, resource): ref._properties = resource return ref - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine argument. Returns: @@ -438,17 +438,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] + return self._properties["projectId"] # pytype: disable=key-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] + return self._properties["datasetId"] # pytype: disable=key-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] + return self._properties["routineId"] # pytype: disable=key-error @property def path(self): @@ -460,7 +460,7 @@ def path(self): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "RoutineReference": """Factory: construct a routine reference given its API representation. Args: @@ -476,7 +476,9 @@ def from_api_repr(cls, resource): return ref @classmethod - def from_string(cls, routine_id, default_project=None): + def from_string( + cls, routine_id: str, default_project: str = None + ) -> "RoutineReference": """Factory: construct a routine reference from routine ID string. Args: @@ -504,7 +506,7 @@ def from_string(cls, routine_id, default_project=None): {"projectId": proj, "datasetId": dset, "routineId": routine} ) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this routine reference. Returns: diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 680dcc138..cb221d6de 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -90,7 +90,7 @@ def __init__( self._policy_tags = policy_tags @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: @@ -163,7 +163,7 @@ def policy_tags(self): """ return self._policy_tags - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. Returns: @@ -194,13 +194,14 @@ def _key(self): return ( self.name, self.field_type.upper(), - self.mode.upper(), + # Mode is always str, if not given it defaults to a str value + self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, self._policy_tags, ) - def to_standard_sql(self): + def to_standard_sql(self) -> types.StandardSqlField: """Return the field as the standard SQL field representation object. Returns: @@ -375,7 +376,7 @@ def __repr__(self): return "PolicyTagList{}".format(self._key()) @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": """Return a :class:`PolicyTagList` object deserialized from a dict. This method creates a new ``PolicyTagList`` instance that points to @@ -398,7 +399,7 @@ def from_api_repr(cls, api_repr): names = api_repr.get("names", ()) return cls(names=names) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``PolicyTagList`` diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index bd5bca30f..5ab649a25 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -22,6 +22,8 @@ import logging import operator import pytz +import typing +from typing import Any, Dict, Iterable, Tuple import warnings try: @@ -47,6 +49,13 @@ from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +if typing.TYPE_CHECKING: # pragma: NO COVER + # Unconditionally import optional dependencies again to tell pytype that + # they are not None, avoiding false "no attribute" errors. + import pandas + import pyarrow + from google.cloud import bigquery_storage + _LOGGER = logging.getLogger(__name__) @@ -143,7 +152,9 @@ def path(self): ) @classmethod - def from_string(cls, table_id, default_project=None): + def from_string( + cls, table_id: str, default_project: str = None + ) -> "TableReference": """Construct a table reference from table ID string. Args: @@ -182,7 +193,7 @@ def from_string(cls, table_id, default_project=None): ) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "TableReference": """Factory: construct a table reference given its API representation Args: @@ -200,7 +211,7 @@ def from_api_repr(cls, resource): table_id = resource["tableId"] return cls(DatasetReference(project, dataset_id), table_id) - def to_api_repr(self): + def to_api_repr(self) -> dict: """Construct the API resource representation of this table reference. Returns: @@ -212,7 +223,7 @@ def to_api_repr(self): "tableId": self._table_id, } - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Install the ``google-cloud-bigquery-storage`` package to use this @@ -847,7 +858,7 @@ def external_data_configuration(self, value): self._properties["externalDataConfiguration"] = api_repr @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. Args: @@ -871,7 +882,7 @@ def from_string(cls, full_table_id): return cls(TableReference.from_string(full_table_id)) @classmethod - def from_api_repr(cls, resource): + def from_api_repr(cls, resource: dict) -> "Table": """Factory: construct a table given its API representation Args: @@ -907,7 +918,7 @@ def from_api_repr(cls, resource): return table - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -915,7 +926,7 @@ def to_api_repr(self): """ return copy.deepcopy(self._properties) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1104,7 +1115,7 @@ def clustering_fields(self): return list(prop.get("fields", ())) @classmethod - def from_string(cls, full_table_id): + def from_string(cls, full_table_id: str) -> "TableListItem": """Construct a table from fully-qualified table ID. Args: @@ -1129,7 +1140,7 @@ def from_string(cls, full_table_id): {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} ) - def to_bqstorage(self): + def to_bqstorage(self) -> str: """Construct a BigQuery Storage API representation of this table. Returns: @@ -1137,7 +1148,7 @@ def to_bqstorage(self): """ return self.reference.to_bqstorage() - def to_api_repr(self): + def to_api_repr(self) -> dict: """Constructs the API resource of this table Returns: @@ -1231,7 +1242,7 @@ def values(self): """ return copy.deepcopy(self._xxx_values) - def keys(self): + def keys(self) -> Iterable[str]: """Return the keys for using a row as a dict. Returns: @@ -1244,7 +1255,7 @@ def keys(self): """ return self._xxx_field_to_index.keys() - def items(self): + def items(self) -> Iterable[Tuple[str, Any]]: """Return items as ``(key, value)`` pairs. Returns: @@ -1259,7 +1270,7 @@ def items(self): for key, index in self._xxx_field_to_index.items(): yield (key, copy.deepcopy(self._xxx_values[index])) - def get(self, key, default=None): + def get(self, key: str, default: Any = None) -> Any: """Return a value for key, with a default value if it does not exist. Args: @@ -1520,10 +1531,10 @@ def _to_arrow_iterable(self, bqstorage_client=None): # changes to job.QueryJob.to_arrow() def to_arrow( self, - progress_bar_type=None, - bqstorage_client=None, - create_bqstorage_client=True, - ): + progress_bar_type: str = None, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + create_bqstorage_client: bool = True, + ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1623,10 +1634,10 @@ def to_arrow( def to_dataframe_iterable( self, - bqstorage_client=None, - dtypes=None, - max_queue_size=_pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, + ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. Args: @@ -1698,12 +1709,12 @@ def to_dataframe_iterable( # changes to job.QueryJob.to_dataframe() def to_dataframe( self, - bqstorage_client=None, - dtypes=None, - progress_bar_type=None, - create_bqstorage_client=True, - date_as_object=True, - ): + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. Args: @@ -1831,7 +1842,7 @@ def to_arrow( progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, - ): + ) -> "pyarrow.Table": """[Beta] Create an empty class:`pyarrow.Table`. Args: @@ -1853,7 +1864,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, - ): + ) -> "pandas.DataFrame": """Create an empty dataframe. Args: @@ -2164,7 +2175,7 @@ def require_partition_filter(self, value): self._properties["requirePartitionFilter"] = value @classmethod - def from_api_repr(cls, api_repr): + def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": """Return a :class:`TimePartitioning` object deserialized from a dict. This method creates a new ``TimePartitioning`` instance that points to @@ -2192,7 +2203,7 @@ def from_api_repr(cls, api_repr): instance._properties = api_repr return instance - def to_api_repr(self): + def to_api_repr(self) -> dict: """Return a dictionary representing this object. This method returns the properties dict of the ``TimePartitioning`` diff --git a/noxfile.py b/noxfile.py index bde3b990e..7ba081660 100644 --- a/noxfile.py +++ b/noxfile.py @@ -21,6 +21,7 @@ import nox +PYTYPE_VERSION = "pytype==2021.4.9" BLACK_VERSION = "black==19.10b0" BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py") @@ -39,6 +40,7 @@ "lint", "lint_setup_py", "blacken", + "pytype", "docs", ] @@ -98,6 +100,15 @@ def unit_noextras(session): default(session, install_extras=False) +@nox.session(python=DEFAULT_PYTHON_VERSION) +def pytype(session): + """Run type checks.""" + session.install("-e", ".[all]") + session.install("ipython") + session.install(PYTYPE_VERSION) + session.run("pytype") + + @nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS) def system(session): """Run the system test suite.""" diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6939c07e0..96819343c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,3 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.13.1 +google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/setup.cfg b/setup.cfg index c3a2b39f6..8eefc4435 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,3 +17,17 @@ # Generated by synthtool. DO NOT EDIT! [bdist_wheel] universal = 1 + +[pytype] +python_version = 3.8 +inputs = + google/cloud/ +exclude = + tests/ + google/cloud/bigquery_v2/ +output = .pytype/ +disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error diff --git a/synth.py b/synth.py index 3c6440600..d99f368cc 100644 --- a/synth.py +++ b/synth.py @@ -13,6 +13,7 @@ # limitations under the License. """This script is used to synthesize generated parts of this library.""" +import textwrap import synthtool as s from synthtool import gcp @@ -120,4 +121,32 @@ '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) +# ---------------------------------------------------------------------------- +# pytype-related changes +# ---------------------------------------------------------------------------- + +# Add .pytype to .gitignore +s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") + +# Add pytype config to setup.cfg +s.replace( + "setup.cfg", + r"universal = 1", + textwrap.dedent(""" \g<0> + + [pytype] + python_version = 3.8 + inputs = + google/cloud/ + exclude = + tests/ + google/cloud/bigquery_v2/ + output = .pytype/ + disable = + # There's some issue with finding some pyi files, thus disabling. + # The issue https://github.com/google/pytype/issues/150 is closed, but the + # error still occurs for some reason. + pyi-error""") +) + s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py index 6002ae3e8..e5016b0e5 100644 --- a/tests/unit/test_signature_compatibility.py +++ b/tests/unit/test_signature_compatibility.py @@ -31,20 +31,12 @@ def row_iterator_class(): return RowIterator -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_arrow) sig2 = inspect.signature(row_iterator_class.to_arrow) assert sig == sig2 -@pytest.mark.skipif( - not hasattr(inspect, "signature"), - reason="inspect.signature() is not availalbe in older Python versions", -) def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): sig = inspect.signature(query_job_class.to_dataframe) sig2 = inspect.signature(row_iterator_class.to_dataframe) From 0abb56669c097c59fbffce007c702e7a55f2d9c1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Mon, 26 Apr 2021 02:34:02 -0500 Subject: [PATCH 079/230] feat: add `Client.delete_job_metadata` method to remove job metadata (#610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note: this only removes job metadata. Use `Client.cancel_job` to stop a running job. Also, this feature is in preview and has not rolled out to all regions yet Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Towards internal issue 176186229 🦕 --- google/cloud/bigquery/client.py | 71 +++++++++++++++++++++++++++++++++ tests/system/test_client.py | 22 +++++++++- tests/unit/test_client.py | 60 ++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 5aa8608a5..8d0acb867 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1545,6 +1545,77 @@ def delete_model( if not not_found_ok: raise + def delete_job_metadata( + self, + job_id, + project=None, + location=None, + retry=DEFAULT_RETRY, + timeout=None, + not_found_ok=False, + ): + """[Beta] Delete job metadata from job history. + + Note: This does not stop a running job. Use + :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. + + Args: + job_id (Union[ \ + str, \ + google.cloud.bigquery.job.LoadJob, \ + google.cloud.bigquery.job.CopyJob, \ + google.cloud.bigquery.job.ExtractJob, \ + google.cloud.bigquery.job.QueryJob \ + ]): Job identifier. + + Keyword Arguments: + project (Optional[str]): + ID of the project which owns the job (defaults to the client's project). + location (Optional[str]): + Location where the job was run. Ignored if ``job_id`` is a job + object. + retry (Optional[google.api_core.retry.Retry]): + How to retry the RPC. + timeout (Optional[float]): + The number of seconds to wait for the underlying HTTP transport + before using ``retry``. + not_found_ok (Optional[bool]): + Defaults to ``False``. If ``True``, ignore "not found" errors + when deleting the job. + """ + extra_params = {} + + project, location, job_id = _extract_job_reference( + job_id, project=project, location=location + ) + + if project is None: + project = self.project + + if location is None: + location = self.location + + # Location is always required for jobs.delete() + extra_params["location"] = location + + path = f"/projects/{project}/jobs/{job_id}/delete" + + span_attributes = {"path": path, "job_id": job_id, "location": location} + + try: + self._call_api( + retry, + span_name="BigQuery.deleteJob", + span_attributes=span_attributes, + method="DELETE", + path=path, + query_params=extra_params, + timeout=timeout, + ) + except google.api_core.exceptions.NotFound: + if not not_found_ok: + raise + def delete_routine( self, routine: Union[Routine, RoutineReference, str], diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f31d994ca..e71788a43 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,7 @@ import time import unittest import uuid +from typing import Optional import psutil import pytest @@ -62,6 +63,7 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -123,7 +125,7 @@ def _has_rows(result): def _make_dataset_id(prefix): - return "%s%s" % (prefix, unique_resource_id()) + return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}" def _load_json_schema(filename="schema.json"): @@ -142,7 +144,7 @@ class Config(object): global state. """ - CLIENT = None + CLIENT: Optional[bigquery.Client] = None CURSOR = None DATASET = None @@ -430,6 +432,22 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) + def test_delete_job_metadata(self): + dataset_id = _make_dataset_id("us_east1") + self.temp_dataset(dataset_id, location="us-east1") + full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" + table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) + Config.CLIENT.create_table(table) + query_job: bigquery.QueryJob = Config.CLIENT.query( + f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", + ) + query_job.result() + self.assertIsNotNone(Config.CLIENT.get_job(query_job)) + + Config.CLIENT.delete_job_metadata(query_job) + with self.assertRaises(NotFound): + Config.CLIENT.get_job(query_job) + def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 860f25f35..8f535145b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2498,6 +2498,66 @@ def test_update_table_delete_property(self): self.assertEqual(req[1]["data"], sent) self.assertIsNone(table3.description) + def test_delete_job_metadata_not_found(self): + creds = _make_credentials() + client = self._make_one("client-proj", creds, location="client-loc") + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("job not found"), + google.api_core.exceptions.NotFound("job not found"), + ) + + with self.assertRaises(google.api_core.exceptions.NotFound): + client.delete_job_metadata("my-job") + + conn.api_request.reset_mock() + client.delete_job_metadata("my-job", not_found_ok=True) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/client-proj/jobs/my-job/delete", + query_params={"location": "client-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_id(self): + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection({}) + + client.delete_job_metadata("my-job", project="param-proj", location="param-loc") + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/param-proj/jobs/my-job/delete", + query_params={"location": "param-loc"}, + timeout=None, + ) + + def test_delete_job_metadata_with_resource(self): + from google.cloud.bigquery.job import QueryJob + + query_resource = { + "jobReference": { + "projectId": "job-based-proj", + "jobId": "query_job", + "location": "us-east1", + }, + "configuration": {"query": {}}, + } + creds = _make_credentials() + client = self._make_one(self.PROJECT, creds) + conn = client._connection = make_connection(query_resource) + job_from_resource = QueryJob.from_api_repr(query_resource, client) + + client.delete_job_metadata(job_from_resource) + + conn.api_request.assert_called_once_with( + method="DELETE", + path="/projects/job-based-proj/jobs/query_job/delete", + query_params={"location": "us-east1"}, + timeout=None, + ) + def test_delete_model(self): from google.cloud.bigquery.model import Model From 1cff487e912d3cc3414968c28cf1e6554361a9e3 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Mon, 26 Apr 2021 10:31:29 -0400 Subject: [PATCH 080/230] chore(revert): revert preventing normalization (#625) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 46a128426..607ffb63f 100644 --- a/setup.py +++ b/setup.py @@ -100,7 +100,7 @@ setuptools.setup( name=name, - version=setuptools.sic(version), + version=version, description=description, long_description=readme, author="Google LLC", From 33a871f06329f9bf5a6a92fab9ead65bf2bee75d Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 26 Apr 2021 16:35:16 +0200 Subject: [PATCH 081/230] fix: unsetting clustering fileds on Table is now possible (#622) * fix: unsetting clustering fields from Table * Remove unused stuff from table.py * Use _PROPERTY_TO_API_FIELD in Table properties * Clarify why a property is set to explicit None --- google/cloud/bigquery/table.py | 204 +++++++++++++++++++++------------ tests/system/test_client.py | 25 ++++ tests/unit/test_table.py | 8 +- 3 files changed, 160 insertions(+), 77 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 5ab649a25..b91c91a39 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -19,7 +19,6 @@ import copy import datetime import functools -import logging import operator import pytz import typing @@ -57,12 +56,6 @@ from google.cloud import bigquery_storage -_LOGGER = logging.getLogger(__name__) - -_NO_BQSTORAGE_ERROR = ( - "The google-cloud-bigquery-storage library is not installed, " - "please install google-cloud-bigquery-storage to use bqstorage features." -) _NO_PANDAS_ERROR = ( "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." @@ -302,16 +295,36 @@ class Table(object): """ _PROPERTY_TO_API_FIELD = { + "clustering_fields": "clustering", + "created": "creationTime", + "dataset_id": ["tableReference", "datasetId"], + "description": "description", "encryption_configuration": "encryptionConfiguration", + "etag": "etag", "expires": "expirationTime", "external_data_configuration": "externalDataConfiguration", "friendly_name": "friendlyName", + "full_table_id": "id", + "labels": "labels", + "location": "location", + "modified": "lastModifiedTime", "mview_enable_refresh": "materializedView", + "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "num_bytes": "numBytes", + "num_rows": "numRows", "partition_expiration": "timePartitioning", "partitioning_type": "timePartitioning", + "project": ["tableReference", "projectId"], + "range_partitioning": "rangePartitioning", + "time_partitioning": "timePartitioning", + "schema": "schema", + "streaming_buffer": "streamingBuffer", + "self_link": "selfLink", + "table_id": ["tableReference", "tableId"], "time_partitioning": "timePartitioning", + "type": "type", "view_use_legacy_sql": "view", "view_query": "view", "require_partition_filter": "requirePartitionFilter", @@ -327,17 +340,23 @@ def __init__(self, table_ref, schema=None): @property def project(self): """str: Project bound to the table.""" - return self._properties["tableReference"]["projectId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) @property def dataset_id(self): """str: ID of dataset containing the table.""" - return self._properties["tableReference"]["datasetId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) @property def table_id(self): """str: ID of the table.""" - return self._properties["tableReference"]["tableId"] + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["table_id"] + ) reference = property(_reference_getter) @@ -356,11 +375,15 @@ def require_partition_filter(self): partition filter that can be used for partition elimination to be specified. """ - return self._properties.get("requirePartitionFilter") + return self._properties.get( + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ) @require_partition_filter.setter def require_partition_filter(self, value): - self._properties["requirePartitionFilter"] = value + self._properties[ + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ] = value @property def schema(self): @@ -376,7 +399,7 @@ def schema(self): is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. """ - prop = self._properties.get("schema") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) if not prop: return [] else: @@ -384,11 +407,13 @@ def schema(self): @schema.setter def schema(self, value): + api_field = self._PROPERTY_TO_API_FIELD["schema"] + if value is None: - self._properties["schema"] = None + self._properties[api_field] = None else: value = _to_schema_fields(value) - self._properties["schema"] = {"fields": _build_schema_resource(value)} + self._properties[api_field] = {"fields": _build_schema_resource(value)} @property def labels(self): @@ -401,13 +426,13 @@ def labels(self): Raises: ValueError: If ``value`` type is invalid. """ - return self._properties.setdefault("labels", {}) + return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) @labels.setter def labels(self, value): if not isinstance(value, dict): raise ValueError("Pass a dict") - self._properties["labels"] = value + self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value @property def encryption_configuration(self): @@ -421,7 +446,9 @@ def encryption_configuration(self): `_ in the BigQuery documentation. """ - prop = self._properties.get("encryptionConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ) if prop is not None: prop = EncryptionConfiguration.from_api_repr(prop) return prop @@ -431,14 +458,16 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["encryptionConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ] = api_repr @property def created(self): """Union[datetime.datetime, None]: Datetime at which the table was created (:data:`None` until set from the server). """ - creation_time = self._properties.get("creationTime") + creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) if creation_time is not None: # creation_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -450,14 +479,14 @@ def etag(self): """Union[str, None]: ETag for the table resource (:data:`None` until set from the server). """ - return self._properties.get("etag") + return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) @property def modified(self): """Union[datetime.datetime, None]: Datetime at which the table was last modified (:data:`None` until set from the server). """ - modified_time = self._properties.get("lastModifiedTime") + modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) if modified_time is not None: # modified_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -469,21 +498,25 @@ def num_bytes(self): """Union[int, None]: The size of the table in bytes (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numBytes")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) + ) @property def num_rows(self): """Union[int, None]: The number of rows in the table (:data:`None` until set from the server). """ - return _helpers._int_or_none(self._properties.get("numRows")) + return _helpers._int_or_none( + self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) + ) @property def self_link(self): """Union[str, None]: URL for the table resource (:data:`None` until set from the server). """ - return self._properties.get("selfLink") + return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) @property def full_table_id(self): @@ -492,7 +525,7 @@ def full_table_id(self): In the format ``project-id:dataset_id.table_id``. """ - return self._properties.get("id") + return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) @property def table_type(self): @@ -502,7 +535,7 @@ def table_type(self): Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or ``'EXTERNAL'``. """ - return self._properties.get("type") + return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) @property def range_partitioning(self): @@ -523,7 +556,9 @@ def range_partitioning(self): :class:`~google.cloud.bigquery.table.RangePartitioning` or :data:`None`. """ - resource = self._properties.get("rangePartitioning") + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["range_partitioning"] + ) if resource is not None: return RangePartitioning(_properties=resource) @@ -536,7 +571,7 @@ def range_partitioning(self, value): raise ValueError( "Expected value to be RangePartitioning or None, got {}.".format(value) ) - self._properties["rangePartitioning"] = resource + self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource @property def time_partitioning(self): @@ -553,7 +588,7 @@ def time_partitioning(self): :class:`~google.cloud.bigquery.table.TimePartitioning` or :data:`None`. """ - prop = self._properties.get("timePartitioning") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) if prop is not None: return TimePartitioning.from_api_repr(prop) @@ -566,7 +601,7 @@ def time_partitioning(self, value): raise ValueError( "value must be google.cloud.bigquery.table.TimePartitioning " "or None" ) - self._properties["timePartitioning"] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr @property def partitioning_type(self): @@ -591,9 +626,10 @@ def partitioning_type(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] if self.time_partitioning is None: - self._properties["timePartitioning"] = {} - self._properties["timePartitioning"]["type"] = value + self._properties[api_field] = {} + self._properties[api_field]["type"] = value @property def partition_expiration(self): @@ -620,9 +656,11 @@ def partition_expiration(self, value): PendingDeprecationWarning, stacklevel=2, ) + api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] + if self.time_partitioning is None: - self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY} - self._properties["timePartitioning"]["expirationMs"] = str(value) + self._properties[api_field] = {"type": TimePartitioningType.DAY} + self._properties[api_field]["expirationMs"] = str(value) @property def clustering_fields(self): @@ -637,7 +675,7 @@ def clustering_fields(self): BigQuery supports clustering for both partitioned and non-partitioned tables. """ - prop = self._properties.get("clustering") + prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) if prop is not None: return list(prop.get("fields", ())) @@ -647,12 +685,15 @@ def clustering_fields(self, value): (Defaults to :data:`None`). """ + api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] + if value is not None: - prop = self._properties.setdefault("clustering", {}) + prop = self._properties.setdefault(api_field, {}) prop["fields"] = value else: - if "clustering" in self._properties: - del self._properties["clustering"] + # In order to allow unsetting clustering fields completely, we explicitly + # set this property to None (as oposed to merely removing the key). + self._properties[api_field] = None @property def description(self): @@ -662,13 +703,13 @@ def description(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("description") + return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) @description.setter def description(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["description"] = value + self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value @property def expires(self): @@ -678,7 +719,7 @@ def expires(self): Raises: ValueError: For invalid value types. """ - expiration_time = self._properties.get("expirationTime") + expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) if expiration_time is not None: # expiration_time will be in milliseconds. return google.cloud._helpers._datetime_from_microseconds( @@ -690,7 +731,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties["expirationTime"] = _helpers._str_or_none(value_ms) + self._properties[ + self._PROPERTY_TO_API_FIELD["expires"] + ] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -699,13 +742,13 @@ def friendly_name(self): Raises: ValueError: For invalid value types. """ - return self._properties.get("friendlyName") + return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) @friendly_name.setter def friendly_name(self, value): if not isinstance(value, str) and value is not None: raise ValueError("Pass a string, or None") - self._properties["friendlyName"] = value + self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value @property def location(self): @@ -713,7 +756,7 @@ def location(self): Defaults to :data:`None`. """ - return self._properties.get("location") + return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) @property def view_query(self): @@ -726,14 +769,17 @@ def view_query(self): Raises: ValueError: For invalid value types. """ - return _helpers._get_sub_prop(self._properties, ["view", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @view_query.setter def view_query(self, value): if not isinstance(value, str): raise ValueError("Pass a string") - _helpers._set_sub_prop(self._properties, ["view", "query"], value) - view = self._properties["view"] + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], value) + view = self._properties[api_field] # The service defaults useLegacySql to True, but this # client uses Standard SQL by default. if view.get("useLegacySql") is None: @@ -742,7 +788,7 @@ def view_query(self, value): @view_query.deleter def view_query(self): """Delete SQL query defining the table as a view.""" - self._properties.pop("view", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) view_use_legacy_sql = property(_view_use_legacy_sql_getter) @@ -750,27 +796,29 @@ def view_query(self): def view_use_legacy_sql(self, value): if not isinstance(value, bool): raise ValueError("Pass a boolean") - if self._properties.get("view") is None: - self._properties["view"] = {} - self._properties["view"]["useLegacySql"] = value + + api_field = self._PROPERTY_TO_API_FIELD["view_query"] + if self._properties.get(api_field) is None: + self._properties[api_field] = {} + self._properties[api_field]["useLegacySql"] = value @property def mview_query(self): """Optional[str]: SQL query defining the table as a materialized view (defaults to :data:`None`). """ - return _helpers._get_sub_prop(self._properties, ["materializedView", "query"]) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + return _helpers._get_sub_prop(self._properties, [api_field, "query"]) @mview_query.setter def mview_query(self, value): - _helpers._set_sub_prop( - self._properties, ["materializedView", "query"], str(value) - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_query"] + _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) @mview_query.deleter def mview_query(self): """Delete SQL query defining the table as a materialized view.""" - self._properties.pop("materializedView", None) + self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) @property def mview_last_refresh_time(self): @@ -778,7 +826,7 @@ def mview_last_refresh_time(self): refreshed (:data:`None` until set from the server). """ refresh_time = _helpers._get_sub_prop( - self._properties, ["materializedView", "lastRefreshTime"] + self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] ) if refresh_time is not None: # refresh_time will be in milliseconds. @@ -791,14 +839,14 @@ def mview_enable_refresh(self): """Optional[bool]: Enable automatic refresh of the materialized view when the base table is updated. The default value is :data:`True`. """ - return _helpers._get_sub_prop( - self._properties, ["materializedView", "enableRefresh"] - ) + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] + return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) @mview_enable_refresh.setter def mview_enable_refresh(self, value): + api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] return _helpers._set_sub_prop( - self._properties, ["materializedView", "enableRefresh"], value + self._properties, [api_field, "enableRefresh"], value ) @property @@ -807,8 +855,9 @@ def mview_refresh_interval(self): materialized view will be refreshed. The default value is 1800000 milliseconds (30 minutes). """ + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] refresh_interval = _helpers._get_sub_prop( - self._properties, ["materializedView", "refreshIntervalMs"] + self._properties, [api_field, "refreshIntervalMs"] ) if refresh_interval is not None: return datetime.timedelta(milliseconds=int(refresh_interval)) @@ -820,10 +869,9 @@ def mview_refresh_interval(self, value): else: refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) + api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] _helpers._set_sub_prop( - self._properties, - ["materializedView", "refreshIntervalMs"], - refresh_interval_ms, + self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms, ) @property @@ -831,7 +879,7 @@ def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's streaming buffer. """ - sb = self._properties.get("streamingBuffer") + sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) if sb is not None: return StreamingBuffer(sb) @@ -843,7 +891,9 @@ def external_data_configuration(self): Raises: ValueError: For invalid value types. """ - prop = self._properties.get("externalDataConfiguration") + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ) if prop is not None: prop = ExternalConfig.from_api_repr(prop) return prop @@ -855,7 +905,9 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties["externalDataConfiguration"] = api_repr + self._properties[ + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ] = api_repr @classmethod def from_string(cls, full_table_id: str) -> "Table": @@ -908,9 +960,15 @@ def from_api_repr(cls, resource: dict) -> "Table": "Resource lacks required identity information:" '["tableReference"]["tableId"]' ) - project_id = resource["tableReference"]["projectId"] - table_id = resource["tableReference"]["tableId"] - dataset_id = resource["tableReference"]["datasetId"] + project_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["project"] + ) + table_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["table_id"] + ) + dataset_id = _helpers._get_sub_prop( + resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] + ) dataset_ref = dataset.DatasetReference(project_id, dataset_id) table = cls(dataset_ref.table(table_id)) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index e71788a43..7c8ef50fa 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -90,6 +90,12 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +CLUSTERING_SCHEMA = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("body_height_cm", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"), +] TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [ bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"), bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"), @@ -579,6 +585,25 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_update_table_clustering_configuration(self): + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=CLUSTERING_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + table.clustering_fields = ["full_name", "date_of_birth"] + table2 = Config.CLIENT.update_table(table, ["clustering_fields"]) + self.assertEqual(table2.clustering_fields, ["full_name", "date_of_birth"]) + + table2.clustering_fields = None + table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) + self.assertIsNone(table3.clustering_fields, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3373528e0..ce4a15761 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1210,8 +1210,8 @@ def test_clustering_fields_setter_w_none(self): table._properties["clustering"] = {"fields": fields} table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_clustering_fields_setter_w_none_noop(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) @@ -1219,8 +1219,8 @@ def test_clustering_fields_setter_w_none_noop(self): table = self._make_one(table_ref) table.clustering_fields = None - self.assertEqual(table.clustering_fields, None) - self.assertFalse("clustering" in table._properties) + self.assertIsNone(table.clustering_fields) + self.assertTrue("clustering" in table._properties) # None stored explicitly def test_encryption_configuration_setter(self): # Previously, the EncryptionConfiguration class was in the table module, not the From a3224337dac217ec07df83bf0ad570b7aa6d2ec9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 26 Apr 2021 15:56:03 +0000 Subject: [PATCH 082/230] chore: release 2.14.0 (#602) :robot: I have created a release \*beep\* \*boop\* --- ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) ### Features * accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) * accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) * add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) * add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) * add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) * DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) * retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) * use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ### Bug Fixes * consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) * missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) * unsetting clustering fileds on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ### Documentation * add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) * update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5dc2c8838..9aee40510 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,33 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) + + +### Features + +* accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) +* accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) +* add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) +* add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) +* add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) +* DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) +* retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) +* use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) + + +### Bug Fixes + +* consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) +* missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) +* unsetting clustering fields on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) + + +### Documentation + +* add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) +* update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) + ### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2330d0c2c..ba8b4e8af 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.13.1" +__version__ = "2.14.0" From f4e34c09be696f41a097823db309a67d34db6efa Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 27 Apr 2021 17:08:05 +0200 Subject: [PATCH 083/230] chore(deps): update dependency google-cloud-bigquery to v2.14.0 (#627) [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.13.1` -> `==2.14.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/compatibility-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.14.0/confidence-slim/2.13.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.14.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2140-httpswwwgithubcomgoogleapispython-bigquerycomparev2131v2140-2021-04-26) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) ##### Features - accept DatasetListItem where DatasetReference is accepted ([#​597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2)) - accept job object as argument to `get_job` and `cancel_job` ([#​617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910)) - add `Client.delete_job_metadata` method to remove job metadata ([#​610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1)) - add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#​575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6)) - add type hints for public methods ([#​613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f)) - DB API cursors are now iterable ([#​618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144)) - retry google.auth TransportError by default ([#​624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9)) - use pyarrow stream compression, if available ([#​593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1)) ##### Bug Fixes - consistent percents handling in DB API query ([#​619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073)) - missing license headers in new test files ([#​604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d)) - unsetting clustering fields on Table is now possible ([#​622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d)) ##### Documentation - add sample to run DML query ([#​591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155)) - update the description of the return value of `_QueryResults.rows()` ([#​594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897)) ##### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23) ##### Bug Fixes - add ConnectionError to default retry ([#​571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad))
--- ### Configuration :date: **Schedule**: At any time (no schedule defined). :vertical_traffic_light: **Automerge**: Disabled by config. Please merge this manually once you are satisfied. :recycle: **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. :no_bell: **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/conftest.py | 12 ++++++++++-- samples/snippets/requirements.txt | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 96819343c..7e017e283 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.1.0 Shapely==1.7.1 diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 31c6ba104..0d0299ee5 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -20,21 +20,29 @@ RESOURCE_PREFIX = "python_bigquery_samples_snippets" +RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" +RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") + timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) random_string = hex(random.randrange(1000000))[2:] return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" +def resource_name_to_date(resource_name: str): + start_date = len(RESOURCE_PREFIX) + 1 + date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] + return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): if ( dataset.dataset_id.startswith(RESOURCE_PREFIX) - and dataset.created < yesterday + and resource_name_to_date(dataset.dataset_id) < yesterday ): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 74a18981e..d7e60f77d 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.13.1 +google-cloud-bigquery==2.14.0 google-cloud-bigquery-storage==2.3.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 4396e70771af6889d3242c37c5ff2e80241023a2 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 27 Apr 2021 14:15:52 -0600 Subject: [PATCH 084/230] fix: The DB API Binary function accepts bytes data (#630) * fix: The DB API Binary function accepts bytes data * Binary should accept bytes-like objects. * check for an integer before converting to bytes. Because we don't want to accidentally create a giant bytes. * blackened. * Fixed exception string. * parameterized binary tests and rearranged imports. * typo * Blackened --- google/cloud/bigquery/dbapi/types.py | 20 +++++++++++++---- tests/unit/test_dbapi_types.py | 32 ++++++++++++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py index 20eca9b00..717593ae1 100644 --- a/google/cloud/bigquery/dbapi/types.py +++ b/google/cloud/bigquery/dbapi/types.py @@ -30,16 +30,28 @@ TimestampFromTicks = datetime.datetime.fromtimestamp -def Binary(string): +def Binary(data): """Contruct a DB-API binary value. Args: - string (str): A string to encode as a binary value. + data (bytes-like): An object containing binary data and that + can be converted to bytes with the `bytes` builtin. Returns: - bytes: The UTF-8 encoded bytes representing the string. + bytes: The binary data as a bytes object. """ - return string.encode("utf-8") + if isinstance(data, int): + # This is not the conversion we're looking for, because it + # will simply create a bytes object of the given size. + raise TypeError("cannot convert `int` object to binary") + + try: + return bytes(data) + except TypeError: + if isinstance(data, str): + return data.encode("utf-8") + else: + raise def TimeFromTicks(ticks, tz=None): diff --git a/tests/unit/test_dbapi_types.py b/tests/unit/test_dbapi_types.py index e05660ffe..cf282c68b 100644 --- a/tests/unit/test_dbapi_types.py +++ b/tests/unit/test_dbapi_types.py @@ -15,6 +15,8 @@ import datetime import unittest +import pytest + import google.cloud._helpers from google.cloud.bigquery.dbapi import types @@ -26,10 +28,6 @@ def test_binary_type(self): self.assertEqual("STRUCT", types.BINARY) self.assertNotEqual("STRING", types.BINARY) - def test_binary_constructor(self): - self.assertEqual(types.Binary(u"hello"), b"hello") - self.assertEqual(types.Binary(u"\u1f60"), u"\u1f60".encode("utf-8")) - def test_timefromticks(self): somedatetime = datetime.datetime( 2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC @@ -40,3 +38,29 @@ def test_timefromticks(self): types.TimeFromTicks(ticks, google.cloud._helpers.UTC), datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC), ) + + +class CustomBinary: + def __bytes__(self): + return b"Google" + + +@pytest.mark.parametrize( + "raw,expected", + [ + (u"hello", b"hello"), + (u"\u1f60", u"\u1f60".encode("utf-8")), + (b"hello", b"hello"), + (bytearray(b"hello"), b"hello"), + (memoryview(b"hello"), b"hello"), + (CustomBinary(), b"Google"), + ], +) +def test_binary_constructor(raw, expected): + assert types.Binary(raw) == expected + + +@pytest.mark.parametrize("bad", (42, 42.0, None)) +def test_invalid_binary_constructor(bad): + with pytest.raises(TypeError): + types.Binary(bad) From 7196817e1a4ee6dfde4875a06f1ffb9bbdb8e2ed Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Apr 2021 16:28:10 +0200 Subject: [PATCH 085/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.4.0 (#595) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7e017e283..f46b141fd 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.1.0 +google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d7e60f77d..f7b5cebe9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.14.0 -google-cloud-bigquery-storage==2.3.0 +google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 ipython==7.16.1; python_version < '3.7' From c0851861ab1936e7444b5ae8970ded773482db43 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Apr 2021 15:44:25 -0500 Subject: [PATCH 086/230] chore: add yoshi to CODEOWNERS (#634) --- .github/CODEOWNERS | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 10f4ee7c0..ae570eb01 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,8 +5,7 @@ # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax # The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery +* @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes /samples/ @googleapis/python-samples-owners - From 8bcf397fbe2527e06317741875a059b109cfcd9c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 07:19:56 -0600 Subject: [PATCH 087/230] feat: Extended DB API parameter syntax to optionally provide parameter types (#626) * Added explicit type documentation. * Extended query-parameter system for specifying parameter types.assed. * Serialize non-floats (e.g. Decimals) using in FLOAT64 parameters. Co-authored-by: Tim Swast * De-reference aliases in SqlParameterScalarTypes when checking types Co-authored-by: Tim Swast --- docs/dbapi.rst | 37 ++++++++ google/cloud/bigquery/_helpers.py | 2 +- google/cloud/bigquery/dbapi/_helpers.py | 107 ++++++++++++++++-------- google/cloud/bigquery/dbapi/cursor.py | 93 ++++++++++++++++++-- google/cloud/bigquery/magics/magics.py | 2 +- tests/unit/test__helpers.py | 15 ++++ tests/unit/test_dbapi__helpers.py | 94 +++++++++++++++++++-- tests/unit/test_dbapi_cursor.py | 106 +++++++++++++++++++++-- 8 files changed, 396 insertions(+), 60 deletions(-) diff --git a/docs/dbapi.rst b/docs/dbapi.rst index ca0256d3c..41ec85833 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -4,3 +4,40 @@ DB-API Reference .. automodule:: google.cloud.bigquery.dbapi :members: :show-inheritance: + + +DB-API Query-Parameter Syntax +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The BigQuery DB-API uses the `qmark` `parameter style +`_ for +unnamed/positional parameters and the `pyformat` parameter style for +named parameters. + +An example of a query using unnamed parameters:: + + insert into people (name, income) values (?, ?) + +and using named parameters:: + + insert into people (name, income) values (%(name)s, %(income)s) + +Providing explicit type information +----------------------------------- + +BigQuery requires type information for parameters. The The BigQuery +DB-API can usually determine parameter types for parameters based on +provided values. Sometimes, however, types can't be determined (for +example when `None` is passed) or are determined incorrectly (for +example when passing a floating-point value to a numeric column). + +The BigQuery DB-API provides an extended parameter syntax. For named +parameters, a BigQuery type is provided after the name separated by a +colon, as in:: + + insert into people (name, income) values (%(name:string)s, %(income:numeric)s) + +For unnamed parameters, use the named syntax with a type, but now +name, as in:: + + insert into people (name, income) values (%(:string)s, %(:numeric)s) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index daa14b92a..ad8e3f003 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -275,7 +275,7 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value + return value if value is None else float(value) def _decimal_to_json(value): diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index beb3c5e71..3b0d8134c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -20,7 +20,7 @@ import numbers from google.cloud import bigquery -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import exceptions @@ -28,7 +28,28 @@ _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") -def scalar_to_query_parameter(value, name=None): +def _parameter_type(name, value, query_parameter_type=None, value_doc=""): + if query_parameter_type: + try: + parameter_type = getattr( + enums.SqlParameterScalarTypes, query_parameter_type.upper() + )._type + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {query_parameter_type}," + f" for {name} is not a valid BigQuery scalar type." + ) + else: + parameter_type = bigquery_scalar_type(value) + if parameter_type is None: + raise exceptions.ProgrammingError( + f"Encountered parameter {name} with " + f"{value_doc} value {value} of unexpected type." + ) + return parameter_type + + +def scalar_to_query_parameter(value, name=None, query_parameter_type=None): """Convert a scalar value into a query parameter. Args: @@ -37,6 +58,7 @@ def scalar_to_query_parameter(value, name=None): name (str): (Optional) Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: google.cloud.bigquery.ScalarQueryParameter: @@ -47,24 +69,19 @@ def scalar_to_query_parameter(value, name=None): google.cloud.bigquery.dbapi.exceptions.ProgrammingError: if the type cannot be determined. """ - parameter_type = bigquery_scalar_type(value) - - if parameter_type is None: - raise exceptions.ProgrammingError( - "encountered parameter {} with value {} of unexpected type".format( - name, value - ) - ) - return bigquery.ScalarQueryParameter(name, parameter_type, value) + return bigquery.ScalarQueryParameter( + name, _parameter_type(name, value, query_parameter_type), value + ) -def array_to_query_parameter(value, name=None): +def array_to_query_parameter(value, name=None, query_parameter_type=None): """Convert an array-like value into a query parameter. Args: value (Sequence[Any]): The elements of the array (should not be a string-like Sequence). name (Optional[str]): Name of the query parameter. + query_parameter_type (Optional[str]): Given type for the parameter. Returns: A query parameter corresponding with the type and value of the plain @@ -80,29 +97,30 @@ def array_to_query_parameter(value, name=None): "not string-like.".format(name) ) - if not value: + if query_parameter_type or value: + array_type = _parameter_type( + name, + value[0] if value else None, + query_parameter_type, + value_doc="array element ", + ) + else: raise exceptions.ProgrammingError( "Encountered an empty array-like value of parameter {}, cannot " "determine array elements type.".format(name) ) - # Assume that all elements are of the same type, and let the backend handle - # any type incompatibilities among the array elements - array_type = bigquery_scalar_type(value[0]) - if array_type is None: - raise exceptions.ProgrammingError( - "Encountered unexpected first array element of parameter {}, " - "cannot determine array elements type.".format(name) - ) - return bigquery.ArrayQueryParameter(name, array_type, value) -def to_query_parameters_list(parameters): +def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. Args: parameters (Sequence[Any]): Sequence of query parameter values. + parameter_types: + A list of parameter types, one for each parameter. + Unknown types are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -110,23 +128,27 @@ def to_query_parameters_list(parameters): """ result = [] - for value in parameters: + for value, type_ in zip(parameters, parameter_types): if isinstance(value, collections_abc.Mapping): raise NotImplementedError("STRUCT-like parameter values are not supported.") elif array_like(value): - param = array_to_query_parameter(value) + param = array_to_query_parameter(value, None, type_) else: - param = scalar_to_query_parameter(value) + param = scalar_to_query_parameter(value, None, type_) + result.append(param) return result -def to_query_parameters_dict(parameters): +def to_query_parameters_dict(parameters, query_parameter_types): """Converts a dictionary of parameter values into query parameters. Args: parameters (Mapping[str, Any]): Dictionary of query parameter values. + parameter_types: + A dictionary of parameter types. It needn't have a key for each + parameter. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -140,21 +162,38 @@ def to_query_parameters_dict(parameters): "STRUCT-like parameter values are not supported " "(parameter {}).".format(name) ) - elif array_like(value): - param = array_to_query_parameter(value, name=name) else: - param = scalar_to_query_parameter(value, name=name) + query_parameter_type = query_parameter_types.get(name) + if array_like(value): + param = array_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type + ) + else: + param = scalar_to_query_parameter( + value, name=name, query_parameter_type=query_parameter_type, + ) + result.append(param) return result -def to_query_parameters(parameters): +def to_query_parameters(parameters, parameter_types): """Converts DB-API parameter values into query parameters. Args: parameters (Union[Mapping[str, Any], Sequence[Any]]): A dictionary or sequence of query parameter values. + parameter_types (Union[Mapping[str, str], Sequence[str]]): + A dictionary or list of parameter types. + + If parameters is a mapping, then this must be a dictionary + of parameter types. It needn't have a key for each + parameter. + + If parameters is a sequence, then this must be a list of + parameter types, one for each paramater. Unknown types + are provided as None. Returns: List[google.cloud.bigquery.query._AbstractQueryParameter]: @@ -164,9 +203,9 @@ def to_query_parameters(parameters): return [] if isinstance(parameters, collections_abc.Mapping): - return to_query_parameters_dict(parameters) - - return to_query_parameters_list(parameters) + return to_query_parameters_dict(parameters, parameter_types) + else: + return to_query_parameters_list(parameters, parameter_types) def bigquery_scalar_type(value): diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index ca78d3907..f74781df9 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -18,6 +18,7 @@ from collections import abc as collections_abc import copy import logging +import re try: from google.cloud.bigquery_storage import ArrowSerializationOptions @@ -161,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): job_config (google.cloud.bigquery.job.QueryJobConfig): (Optional) Extra configuration options for the query job. """ + formatted_operation, parameter_types = _format_operation(operation, parameters) + self._execute( + formatted_operation, parameters, job_id, job_config, parameter_types + ) + + def _execute( + self, formatted_operation, parameters, job_id, job_config, parameter_types + ): self._query_data = None self._query_job = None client = self.connection._client @@ -169,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None): # query parameters was not one of the standard options. Convert both # the query and the parameters to the format expected by the client # libraries. - formatted_operation = _format_operation(operation, parameters=parameters) - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, parameter_types) if client._default_query_job_config: if job_config: @@ -209,8 +217,19 @@ def executemany(self, operation, seq_of_parameters): seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]): Sequence of many sets of parameter values. """ - for parameters in seq_of_parameters: - self.execute(operation, parameters) + if seq_of_parameters: + # There's no reason to format the line more than once, as + # the operation only barely depends on the parameters. So + # we just use the first set of parameters. If there are + # different numbers or types of parameters, we'll error + # anyway. + formatted_operation, parameter_types = _format_operation( + operation, seq_of_parameters[0] + ) + for parameters in seq_of_parameters: + self._execute( + formatted_operation, parameters, None, None, parameter_types + ) def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. @@ -427,7 +446,7 @@ def _format_operation_dict(operation, parameters): raise exceptions.ProgrammingError(exc) -def _format_operation(operation, parameters=None): +def _format_operation(operation, parameters): """Formats parameters in operation in way BigQuery expects. Args: @@ -445,9 +464,67 @@ def _format_operation(operation, parameters=None): ``parameters`` argument. """ if parameters is None or len(parameters) == 0: - return operation.replace("%%", "%") # Still do percent de-escaping. + return operation.replace("%%", "%"), None # Still do percent de-escaping. + + operation, parameter_types = _extract_types(operation) + if parameter_types is None: + raise exceptions.ProgrammingError( + f"Parameters were provided, but {repr(operation)} has no placeholders." + ) if isinstance(parameters, collections_abc.Mapping): - return _format_operation_dict(operation, parameters) + return _format_operation_dict(operation, parameters), parameter_types + + return _format_operation_list(operation, parameters), parameter_types + + +def _extract_types( + operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub +): + """Remove type information from parameter placeholders. + + For every parameter of the form %(name:type)s, replace with %(name)s and add the + item name->type to dict that's returned. + + Returns operation without type information and a dictionary of names and types. + """ + parameter_types = None + + def repl(m): + nonlocal parameter_types + prefix, name, type_ = m.groups() + if len(prefix) % 2: + # The prefix has an odd number of %s, the last of which + # escapes the % we're looking for, so we don't want to + # change anything. + return m.group(0) + + try: + if name: + if not parameter_types: + parameter_types = {} + if type_: + if name in parameter_types: + if type_ != parameter_types[name]: + raise exceptions.ProgrammingError( + f"Conflicting types for {name}: " + f"{parameter_types[name]} and {type_}." + ) + else: + parameter_types[name] = type_ + else: + if not isinstance(parameter_types, dict): + raise TypeError() + + return f"{prefix}%({name})s" + else: + if parameter_types is None: + parameter_types = [] + parameter_types.append(type_) + return f"{prefix}%s" + except (AttributeError, TypeError): + raise exceptions.ProgrammingError( + f"{repr(operation)} mixes named and unamed parameters." + ) - return _format_operation_list(operation, parameters) + return extra_type_sub(repl, operation), parameter_types diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 6ae7cae12..474d9a74a 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -615,7 +615,7 @@ def _cell_magic(line, query): ) raise NameError(msg) - params = _helpers.to_query_parameters(ast.literal_eval(params_option_value)) + params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {}) project = args.project or context.project diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0fdf1142f..2437f3568 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -1159,3 +1159,18 @@ def fake_isinstance(instance, target_class): "google.cloud.bigquery.schema.isinstance", side_effect=fake_isinstance ) return patcher + + +def test_decimal_as_float_api_repr(): + """Make sure decimals get converted to float.""" + import google.cloud.bigquery.query + from decimal import Decimal + + param = google.cloud.bigquery.query.ScalarQueryParameter( + "x", "FLOAT64", Decimal(42) + ) + assert param.to_api_repr() == { + "parameterType": {"type": "FLOAT64"}, + "parameterValue": {"value": 42.0}, + "name": "x", + } diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 4b2724de0..250ba46d9 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -18,13 +18,15 @@ import operator as op import unittest +import pytest + try: import pyarrow except ImportError: # pragma: NO COVER pyarrow = None import google.cloud._helpers -from google.cloud.bigquery import table +from google.cloud.bigquery import table, enums from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.dbapi import exceptions from tests.unit.helpers import _to_pyarrow @@ -142,7 +144,7 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self): def test_to_query_parameters_w_dict(self): parameters = {"somebool": True, "somestring": "a-string-value"} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -158,7 +160,7 @@ def test_to_query_parameters_w_dict(self): def test_to_query_parameters_w_dict_array_param(self): parameters = {"somelist": [10, 20]} - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, {}) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -171,11 +173,11 @@ def test_to_query_parameters_w_dict_dict_param(self): parameters = {"my_param": {"foo": "bar"}} with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, {}) def test_to_query_parameters_w_list(self): parameters = [True, "a-string-value"] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None, None]) query_parameter_tuples = [] for param in query_parameters: query_parameter_tuples.append((param.name, param.type_, param.value)) @@ -186,7 +188,7 @@ def test_to_query_parameters_w_list(self): def test_to_query_parameters_w_list_array_param(self): parameters = [[10, 20]] - query_parameters = _helpers.to_query_parameters(parameters) + query_parameters = _helpers.to_query_parameters(parameters, [None]) self.assertEqual(len(query_parameters), 1) param = query_parameters[0] @@ -199,10 +201,10 @@ def test_to_query_parameters_w_list_dict_param(self): parameters = [{"foo": "bar"}] with self.assertRaises(NotImplementedError): - _helpers.to_query_parameters(parameters) + _helpers.to_query_parameters(parameters, [None]) def test_to_query_parameters_none_argument(self): - query_parameters = _helpers.to_query_parameters(None) + query_parameters = _helpers.to_query_parameters(None, None) self.assertEqual(query_parameters, []) @@ -338,3 +340,79 @@ def test_custom_on_closed_error_type(self): with self.assertRaisesRegex(RuntimeError, "I'm closed!"): instance.instance_method() + + +VALID_BQ_TYPES = [ + (name, getattr(enums.SqlParameterScalarTypes, name)._type) + for name in dir(enums.SqlParameterScalarTypes) + if not name.startswith("_") +] + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_scalar_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.scalar_to_query_parameter(1.23, None, alias) == ( + bigquery.ScalarQueryParameter(None, type_, 1.23) + ) + assert _helpers.scalar_to_query_parameter(None, "foo", alias) == ( + bigquery.ScalarQueryParameter("foo", type_, None) + ) + + +def test_scalar_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.scalar_to_query_parameter(None, "foo", "INT") + + +@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES) +def test_array_to_query_parameter_honors_given_type(alias, type_): + from google.cloud import bigquery + + assert _helpers.array_to_query_parameter([1.23], None, alias) == ( + bigquery.ArrayQueryParameter(None, type_, [1.23]) + ) + assert _helpers.array_to_query_parameter((), "foo", alias) == ( + bigquery.ArrayQueryParameter("foo", type_, ()) + ) + + +def test_array_to_query_parameter_honors_given_type_errors_on_invalid(): + with pytest.raises( + google.cloud.bigquery.dbapi.exceptions.ProgrammingError, + match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.", + ): + _helpers.array_to_query_parameter((), "foo", "INT") + + +def test_to_query_parameters_dict_w_types(): + from google.cloud import bigquery + + assert sorted( + _helpers.to_query_parameters( + dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + ), + key=lambda p: p.name, + ) == [ + bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), + bigquery.ScalarQueryParameter("y", "STRING", None), + bigquery.ArrayQueryParameter("z", "FLOAT64", []), + ] + + +def test_to_query_parameters_list_w_types(): + from google.cloud import bigquery + + assert _helpers.to_query_parameters( + [1, 1.2, None, []], [None, "numeric", "string", "float64"] + ) == [ + bigquery.ScalarQueryParameter(None, "INT64", 1), + bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), + bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ArrayQueryParameter(None, "FLOAT64", []), + ] diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 039ef3b4c..5afe269ef 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -12,10 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import mock import operator as op import unittest -import mock +import pytest + try: import pyarrow @@ -612,6 +614,15 @@ def test_executemany_w_dml(self): self.assertIsNone(cursor.description) self.assertEqual(cursor.rowcount, 12) + def test_executemany_empty(self): + from google.cloud.bigquery.dbapi import connect + + connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12)) + cursor = connection.cursor() + cursor.executemany((), ()) + self.assertIsNone(cursor.description) + self.assertEqual(cursor.rowcount, -1) + def test_is_iterable(self): from google.cloud.bigquery import dbapi @@ -638,13 +649,15 @@ def test_is_iterable(self): def test__format_operation_w_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( - "SELECT %(somevalue)s, %(a `weird` one)s;", + parameter_types = {} + formatted_operation, parameter_types = cursor._format_operation( + "SELECT %(somevalue)s, %(a `weird` one:STRING)s;", {"somevalue": "hi", "a `weird` one": "world"}, ) self.assertEqual( formatted_operation, "SELECT @`somevalue`, @`a \\`weird\\` one`;" ) + self.assertEqual(parameter_types, {"a `weird` one": "STRING"}) def test__format_operation_w_wrong_dict(self): from google.cloud.bigquery import dbapi @@ -660,7 +673,7 @@ def test__format_operation_w_wrong_dict(self): def test__format_operation_w_redundant_dict_key(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"} ) self.assertEqual(formatted_operation, "SELECT @`somevalue`;") @@ -668,7 +681,7 @@ def test__format_operation_w_redundant_dict_key(self): def test__format_operation_w_sequence(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation( + formatted_operation, _ = cursor._format_operation( "SELECT %s, %s;", ("hello", "world") ) self.assertEqual(formatted_operation, "SELECT ?, ?;") @@ -698,19 +711,19 @@ def test__format_operation_w_too_long_sequence(self): def test__format_operation_w_empty_dict(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%f'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%f'", {}) self.assertEqual(formatted_operation, "SELECT '%f'") def test__format_operation_wo_params_single_percent(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_wo_params_double_percents(self): from google.cloud.bigquery.dbapi import cursor - formatted_operation = cursor._format_operation("SELECT '%%'", {}) + formatted_operation, _ = cursor._format_operation("SELECT '%%'", {}) self.assertEqual(formatted_operation, "SELECT '%'") def test__format_operation_unescaped_percent_w_dict_param(self): @@ -734,3 +747,80 @@ def test__format_operation_unescaped_percent_w_list_param(self): "SELECT %s, %s, '100 %';", ["foo", "bar"], ) + + def test__format_operation_no_placeholders(self): + from google.cloud.bigquery import dbapi + from google.cloud.bigquery.dbapi import cursor + + self.assertRaises( + dbapi.ProgrammingError, + cursor._format_operation, + "SELECT 42", + ["foo", "bar"], + ) + + +@pytest.mark.parametrize( + "inp,expect", + [ + ("", ("", None)), + ("values(%(foo)s, %(bar)s)", ("values(%(foo)s, %(bar)s)", {})), + ( + "values('%%(oof:INT64)s', %(foo)s, %(bar)s)", + ("values('%%(oof:INT64)s', %(foo)s, %(bar)s)", {}), + ), + ( + "values(%(foo:INT64)s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values('%%(oof:INT64)s, %(foo:INT64)s, %(foo)s)", + ("values('%%(oof:INT64)s, %(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(foo:INT64)s)", + ("values(%(foo)s, %(foo)s)", dict(foo="INT64")), + ), + ( + "values(%(foo:INT64)s, %(bar:NUMERIC)s) 100 %", + ("values(%(foo)s, %(bar)s) 100 %", dict(foo="INT64", bar="NUMERIC")), + ), + (" %s %()s %(:int64)s ", (" %s %s %s ", [None, None, "int64"])), + (" %%s %s %()s %(:int64)s ", (" %%s %s %s %s ", [None, None, "int64"])), + ( + "values(%%%(foo:INT64)s, %(bar)s)", + ("values(%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ( + "values(%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%(foo:INT64)s, %(bar)s)", dict()), + ), + ( + "values(%%%%%(foo:INT64)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), + ), + ], +) +def test__extract_types(inp, expect): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + + assert et(inp) == expect + + +@pytest.mark.parametrize( + "match,inp", + [ + ( + "Conflicting types for foo: numeric and int64.", + " %(foo:numeric)s %(foo:int64)s ", + ), + (r"' %s %\(foo\)s ' mixes named and unamed parameters.", " %s %(foo)s "), + (r"' %\(foo\)s %s ' mixes named and unamed parameters.", " %(foo)s %s "), + ], +) +def test__extract_types_fail(match, inp): + from google.cloud.bigquery.dbapi.cursor import _extract_types as et + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises(exceptions.ProgrammingError, match=match): + et(inp) From 5df63fd9253cd0475cfb7cefd89a7729c6c5abf2 Mon Sep 17 00:00:00 2001 From: "google-cloud-policy-bot[bot]" <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 10:35:16 -0600 Subject: [PATCH 088/230] chore: add SECURITY.md (#636) Co-authored-by: google-cloud-policy-bot[bot] <80869356+google-cloud-policy-bot[bot]@users.noreply.github.com> --- SECURITY.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..8b58ae9c0 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,7 @@ +# Security Policy + +To report a security issue, please use [g.co/vulnz](https://g.co/vulnz). + +The Google Security Team will respond within 5 working days of your report on g.co/vulnz. + +We use g.co/vulnz for our intake, and do coordination and disclosure here using GitHub Security Advisory to privately discuss and fix the issue. From aa59023317b1c63720fb717b3544f755652da58d Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 29 Apr 2021 10:35:37 -0600 Subject: [PATCH 089/230] fix: add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC (#638) * Added decimal types to SqlTypeNames and SqlParameterScalarTypes * Go ahead and alias on the client To convey to the observant that these are aliases, even though they could be used (more or less) directly. * Make sure that DECIMAL data are converted when making API calls. This is mainly as a backstop -- DECIMAL requests should be converted to NUMERIC. * blacken --- google/cloud/bigquery/_helpers.py | 5 +++++ google/cloud/bigquery/enums.py | 6 ++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ad8e3f003..4fe29291d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -363,6 +363,11 @@ def _time_to_json(value): "DATETIME": _datetime_to_json, "DATE": _date_to_json, "TIME": _time_to_json, + # Make sure DECIMAL and BIGDECIMAL are handled, even though + # requests for them should be converted to NUMERIC. Better safe + # than sorry. + "DECIMAL": _decimal_to_json, + "BIGDECIMAL": _decimal_to_json, } diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index b378f091b..787c2449d 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -203,8 +203,8 @@ class SqlTypeNames(str, enum.Enum): INT64 = "INTEGER" FLOAT = "FLOAT" FLOAT64 = "FLOAT" - NUMERIC = "NUMERIC" - BIGNUMERIC = "BIGNUMERIC" + DECIMAL = NUMERIC = "NUMERIC" + BIGDECIMAL = BIGNUMERIC = "BIGNUMERIC" BOOLEAN = "BOOLEAN" BOOL = "BOOLEAN" GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types @@ -227,6 +227,8 @@ class SqlParameterScalarTypes: FLOAT64 = ScalarQueryParameterType("FLOAT64") NUMERIC = ScalarQueryParameterType("NUMERIC") BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") + DECIMAL = ScalarQueryParameterType("NUMERIC") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") BOOLEAN = ScalarQueryParameterType("BOOL") BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") From 6e6cfdf213a8e762fc0718ec7806f511b00f36bd Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Apr 2021 17:20:06 +0000 Subject: [PATCH 090/230] chore: release 2.15.0 (#637) :robot: I have created a release \*beep\* \*boop\* --- ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) ### Features * Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) ### Bug Fixes * add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) * The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9aee40510..6a222a710 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) + + +### Features + +* Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c)) + + +### Bug Fixes + +* add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d)) +* The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2)) + ## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ba8b4e8af..a8381fff6 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.14.0" +__version__ = "2.15.0" From 28485871dfff01ed18cd6ee56f36a7e373c6733d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:20:25 +0200 Subject: [PATCH 091/230] chore(deps): update dependency google-cloud-bigquery to v2.15.0 (#639) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f46b141fd..324ece4ef 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index f7b5cebe9..077896cb3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.14.0 +google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.0 From 471a76117b9f6353e343a2f493aee181e19c2f79 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:10 +0200 Subject: [PATCH 092/230] chore(deps): update dependency pyarrow to v4 (#641) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 077896cb3..7e04b06b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==3.0.0 +pyarrow==4.0.0 pytz==2021.1 From 6a48e80bc7d347f381b181f4cf81fef105d0ad0d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 30 Apr 2021 16:45:32 +0200 Subject: [PATCH 093/230] chore(deps): update dependency grpcio to v1.37.1 (#640) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7e04b06b5..04883477a 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.15.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.0 +grpcio==1.37.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From be3c49a72f0e04de4055f5ca7a99f821c2c8f240 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 4 May 2021 19:32:02 +0200 Subject: [PATCH 094/230] chore(deps): update dependency pytest to v6.2.4 (#647) --- samples/geography/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 299d90b65..b0cf76724 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 299d90b65..b0cf76724 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==6.2.3 +pytest==6.2.4 mock==4.0.3 From 6cc6876eb0e5bf49fdc047256a945dcf1b289576 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 14:41:46 +0200 Subject: [PATCH 095/230] feat: add with_name() to ScalarQueryParameterType (#644) * feat: add with_name() to ScalarQueryParameterType * Clarify unsetting a name, add extra test --- google/cloud/bigquery/query.py | 15 +++++++++++++++ tests/unit/test_query.py | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 3751eb124..d1e9a45a5 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,6 +16,7 @@ from collections import OrderedDict import copy +from typing import Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -119,6 +120,20 @@ def to_api_repr(self): # attributes in the API representation when needed. Here we omit them. return {"type": self._type} + def with_name(self, new_name: Union[str, None]): + """Return a copy of the instance with ``name`` set to ``new_name``. + + Args: + name (Union[str, None]): + The new name of the query parameter type. If ``None``, the existing + name is cleared. + + Returns: + google.cloud.bigquery.query.ScalarQueryParameterType: + A new instance with updated name. + """ + return type(self)(self._type, name=new_name, description=self.description) + def __repr__(self): name = f", name={self.name!r}" if self.name is not None else "" description = ( diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index c8be2911f..90fc30b20 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -98,6 +98,26 @@ def test_repr_all_optional_attrs(self): "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')", ) + def test_with_name_returns_copy_w_changed_name(self): + param_type = self._make_one("BOOLEAN", name=None, description="Some checkbox.") + modified_type = param_type.with_name("allow_emails") + + self.assertIsNot(modified_type, param_type) # Result is a copy. + self.assertEqual(modified_type.name, "allow_emails") + + # The rest of the The rest of the fields should have been preserved. + self.assertEqual(modified_type._type, param_type._type) + self.assertEqual(modified_type.description, param_type.description) + + def test_with_name_clearing_the_value(self): + param_type = self._make_one( + "BOOLEAN", name="allow_emails", description="Some checkbox." + ) + modified_type = param_type.with_name(None) + + self.assertIsNone(modified_type.name) + self.assertEqual(param_type.name, "allow_emails") # original unchanged + class Test_ArrayQueryParameterType(unittest.TestCase): @staticmethod From 9e1d3869c2024fe7a8af57ff59838d904ca5db03 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 15:49:10 +0200 Subject: [PATCH 096/230] deps: expand supported pyarrow versions to v4 (#643) * deps: expand supported pyarrow versions to v4 * Expand *all* pyarrow pins. * Constrain pyarrow to v4.0.0+ in Python 3.9 tests --- setup.py | 6 +++--- testing/constraints-3.9.txt | 7 +++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 607ffb63f..6a6202ef9 100644 --- a/setup.py +++ b/setup.py @@ -47,10 +47,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.32.0, < 2.0dev", - "pyarrow >= 1.0.0, < 4.0dev", + "pyarrow >= 1.0.0, < 5.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 4.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index e69de29bb..39dc6250e 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -0,0 +1,7 @@ +# This constraints file is used to make sure that the latest dependency versions +# we claim to support in setup.py are indeed installed in test sessions in the most +# recent Python version supported (3.9 at the time of writing - 2021-05-05). +# +# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by +# the renovate bot. +pyarrow>=4.0.0 From a6a4eeac8f832cf9e24b0a4391b9848587fb6d29 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 5 May 2021 17:17:19 +0200 Subject: [PATCH 097/230] chore: use file paths for --cov args in noxfile (#648) --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 7ba081660..654bbd093 100644 --- a/noxfile.py +++ b/noxfile.py @@ -77,8 +77,8 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", - "--cov=google.cloud.bigquery", - "--cov=tests.unit", + "--cov=google/cloud/bigquery", + "--cov=tests/unit", "--cov-append", "--cov-config=.coveragerc", "--cov-report=", From 144ceeaac0167f774b86c39a042a2de2b8b4d356 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 6 May 2021 11:36:10 +0200 Subject: [PATCH 098/230] chore: release 2.16.0 (#649) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a222a710..15d594c1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) + + +### Features + +* add with_name() to ScalarQueryParameterType ([#644](https://www.github.com/googleapis/python-bigquery/issues/644)) ([6cc6876](https://www.github.com/googleapis/python-bigquery/commit/6cc6876eb0e5bf49fdc047256a945dcf1b289576)) + + +### Dependencies + +* expand supported pyarrow versions to v4 ([#643](https://www.github.com/googleapis/python-bigquery/issues/643)) ([9e1d386](https://www.github.com/googleapis/python-bigquery/commit/9e1d3869c2024fe7a8af57ff59838d904ca5db03)) + ## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a8381fff6..a93d72c2b 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.15.0" +__version__ = "2.16.0" From e24d47e72a3fdb8b6fe3d387abc31f79c5a32bc0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 7 May 2021 16:55:55 +0200 Subject: [PATCH 099/230] chore: avoid pytype error caused by attrs==21.1.0 (#656) --- noxfile.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/noxfile.py b/noxfile.py index 654bbd093..dc77be3b7 100644 --- a/noxfile.py +++ b/noxfile.py @@ -103,6 +103,10 @@ def unit_noextras(session): @nox.session(python=DEFAULT_PYTHON_VERSION) def pytype(session): """Run type checks.""" + # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less + # recent version avoids the error until a possibly better fix is found. + # https://github.com/googleapis/python-bigquery/issues/655 + session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install("ipython") session.install(PYTYPE_VERSION) From 240d1d2ba125b83c4277b2f2cd6724d66cd95bb9 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 7 May 2021 16:14:02 -0600 Subject: [PATCH 100/230] chore: use 3.8 for blacken session (#653) The Autosynth build now has 3.8: https://github.com/googleapis/synthtool/commit/fd33d7df9ecfc79cc6dbe552b497a4fb36f2e635#diff-f80f936e0eac73417c05535c764a44906afd70a37096ea3c58934a9f6f1e7fcd Should fix unexpected style in #651 --- noxfile.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/noxfile.py b/noxfile.py index dc77be3b7..a52025635 100644 --- a/noxfile.py +++ b/noxfile.py @@ -257,15 +257,12 @@ def lint_setup_py(session): session.run("python", "setup.py", "check", "--restructuredtext", "--strict") -@nox.session(python="3.6") +@nox.session(python=DEFAULT_PYTHON_VERSION) def blacken(session): """Run black. Format code to uniform standard. - - This currently uses Python 3.6 due to the automated Kokoro run of synthtool. - That run uses an image that doesn't have 3.6 installed. Before updating this - check the state of the `gcp_ubuntu_config` we use for that Kokoro run. """ + session.install(BLACK_VERSION) session.run("black", *BLACK_PATHS) From 615d139be15bbbaea1517eb4a5d75b93055c6663 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Sun, 9 May 2021 04:05:30 -0400 Subject: [PATCH 101/230] chore: add library type to .repo-metadata.json (#658) --- .repo-metadata.json | 1 + 1 file changed, 1 insertion(+) diff --git a/.repo-metadata.json b/.repo-metadata.json index f50dbbeb2..f132056d5 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -6,6 +6,7 @@ "issue_tracker": "https://issuetracker.google.com/savedsearches/559654", "release_level": "ga", "language": "python", + "library_type": "GAPIC_COMBO", "repo": "googleapis/python-bigquery", "distribution_name": "google-cloud-bigquery", "api_id": "bigquery.googleapis.com", From aeadc8c2d614bb9f0883ec901fca48930f3aaf19 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 12 May 2021 04:22:20 -0600 Subject: [PATCH 102/230] fix: executemany rowcount only reflected the last execution (#660) --- google/cloud/bigquery/dbapi/cursor.py | 4 ++++ tests/unit/test_dbapi_cursor.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index f74781df9..c8fc49378 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -218,6 +218,7 @@ def executemany(self, operation, seq_of_parameters): Sequence of many sets of parameter values. """ if seq_of_parameters: + rowcount = 0 # There's no reason to format the line more than once, as # the operation only barely depends on the parameters. So # we just use the first set of parameters. If there are @@ -230,6 +231,9 @@ def executemany(self, operation, seq_of_parameters): self._execute( formatted_operation, parameters, None, None, parameter_types ) + rowcount += self.rowcount + + self.rowcount = rowcount def _try_fetch(self, size=None): """Try to start fetching data, if not yet started. diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 5afe269ef..55e453254 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -612,7 +612,7 @@ def test_executemany_w_dml(self): (("test",), ("anothertest",)), ) self.assertIsNone(cursor.description) - self.assertEqual(cursor.rowcount, 12) + self.assertEqual(cursor.rowcount, 24) # 24 because 2 * 12 because cumulatve. def test_executemany_empty(self): from google.cloud.bigquery.dbapi import connect From c6ba15593f4d7541793e45295d9e531fd214094a Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 13 May 2021 09:17:21 +0200 Subject: [PATCH 103/230] chore: release 2.16.1 (#662) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15d594c1b..ef184dffb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) + + +### Bug Fixes + +* executemany rowcount only reflected the last execution ([#660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19)) + ## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a93d72c2b..61e0c0a83 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.0" +__version__ = "2.16.1" From 506b268cab218d6a123c82d431f99dc36a2dc35a Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 13 May 2021 12:30:51 +0200 Subject: [PATCH 104/230] chore(deps): update dependency google-cloud-bigquery to v2.16.1 (#652) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 324ece4ef..e494fbaae 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 04883477a..2dfee39b5 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.15.0 +google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.37.1 From e8838a75f50a62f8a1189d2fcde3ee78f13eb1d1 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Thu, 13 May 2021 10:59:52 -0400 Subject: [PATCH 105/230] chore: migrate to owl bot (#663) * chore: migrate to owl bot * chore: copy files from googleapis-gen f2de93abafa306b2ebadf1d10d947db8bcf2bf15 * chore: run the post processor --- .github/.OwlBot.lock.yaml | 4 ++ .github/.OwlBot.yaml | 26 ++++++++ .pre-commit-config.yaml | 2 +- CONTRIBUTING.rst | 16 +---- synth.py => owlbot.py | 110 +++++++++++++++---------------- synth.metadata | 134 -------------------------------------- 6 files changed, 85 insertions(+), 207 deletions(-) create mode 100644 .github/.OwlBot.lock.yaml create mode 100644 .github/.OwlBot.yaml rename synth.py => owlbot.py (60%) delete mode 100644 synth.metadata diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml new file mode 100644 index 000000000..d49860b32 --- /dev/null +++ b/.github/.OwlBot.lock.yaml @@ -0,0 +1,4 @@ +docker: + digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e + image: gcr.io/repo-automation-bots/owlbot-python:latest + diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml new file mode 100644 index 000000000..2b6451c19 --- /dev/null +++ b/.github/.OwlBot.yaml @@ -0,0 +1,26 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker: + image: gcr.io/repo-automation-bots/owlbot-python:latest + +deep-remove-regex: + - /owl-bot-staging + +deep-copy-regex: + - source: /google/cloud/bigquery/(v.*)/.*-py/(.*) + dest: /owl-bot-staging/$1/$2 + +begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8912e9b5d..1bbd78783 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.0 + rev: 3.9.1 hooks: - id: flake8 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a0e330e44..20ba9e62e 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -160,21 +160,7 @@ Running System Tests auth settings and change some configuration in your project to run all the tests. -- System tests will be run against an actual project and - so you'll need to provide some environment variables to facilitate - authentication to your project: - - - ``GOOGLE_APPLICATION_CREDENTIALS``: The path to a JSON key file; - Such a file can be downloaded directly from the developer's console by clicking - "Generate new JSON key". See private key - `docs `__ - for more details. - -- Once you have downloaded your json keys, set the environment variable - ``GOOGLE_APPLICATION_CREDENTIALS`` to the absolute path of the json file:: - - $ export GOOGLE_APPLICATION_CREDENTIALS="/Users//path/to/app_credentials.json" - +- System tests will be run against an actual project. You should use local credentials from gcloud when possible. See `Best practices for application authentication `__. Some tests require a service account. For those tests see `Authenticating as a service account `__. ************* Test Coverage diff --git a/synth.py b/owlbot.py similarity index 60% rename from synth.py rename to owlbot.py index d99f368cc..f45c24fbb 100644 --- a/synth.py +++ b/owlbot.py @@ -19,36 +19,61 @@ from synthtool import gcp from synthtool.languages import python -gapic = gcp.GAPICBazel() common = gcp.CommonTemplates() -version = "v2" -library = gapic.py_library( - service="bigquery", - version=version, - bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py", - include_protos=True, -) - -s.move( - library, - excludes=[ - "*.tar.gz", - "docs/index.rst", - "docs/bigquery_v2/*_service.rst", - "docs/bigquery_v2/services.rst", - "README.rst", - "noxfile.py", - "setup.py", - "scripts/fixup_bigquery_v2_keywords.py", - library / f"google/cloud/bigquery/__init__.py", - library / f"google/cloud/bigquery/py.typed", - # There are no public API endpoints for the generated ModelServiceClient, - # thus there's no point in generating it and its tests. - library / f"google/cloud/bigquery_{version}/services/**", - library / f"tests/unit/gapic/bigquery_{version}/**", - ], -) +default_version = "v2" + +for library in s.get_staging_dirs(default_version): + # Do not expose ModelServiceClient, as there is no public API endpoint for the + # models service. + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceClient", + "", + ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceClient["'],""", + "", + ) + + # Adjust Model docstring so that Sphinx does not think that "predicted_" is + # a reference to something, issuing a false warning. + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/model.py", + r'will have a "predicted_"', + "will have a `predicted_`", + ) + + # Avoid breaking change due to change in field renames. + # https://github.com/googleapis/python-bigquery/issues/319 + s.replace( + library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", + r"type_ ", + "type " + ) + + s.move( + library, + excludes=[ + "*.tar.gz", + "docs/index.rst", + f"docs/bigquery_{library.name}/*_service.rst", + f"docs/bigquery_{library.name}/services.rst", + "README.rst", + "noxfile.py", + "setup.py", + f"scripts/fixup_bigquery_{library.name}_keywords.py", + f"google/cloud/bigquery/__init__.py", + f"google/cloud/bigquery/py.typed", + # There are no public API endpoints for the generated ModelServiceClient, + # thus there's no point in generating it and its tests. + f"google/cloud/bigquery_{library.name}/services/**", + f"tests/unit/gapic/bigquery_{library.name}/**", + ], + ) + +s.remove_staging_dirs() # ---------------------------------------------------------------------------- # Add templated files @@ -79,41 +104,12 @@ python.py_samples() -# Do not expose ModelServiceClient, as there is no public API endpoint for the -# models service. -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"from \.services\.model_service import ModelServiceClient", - "", -) -s.replace( - "google/cloud/bigquery_v2/__init__.py", - r"""["']ModelServiceClient["'],""", - "", -) - -# Adjust Model docstring so that Sphinx does not think that "predicted_" is -# a reference to something, issuing a false warning. -s.replace( - "google/cloud/bigquery_v2/types/model.py", - r'will have a "predicted_"', - "will have a `predicted_`", -) - s.replace( "docs/conf.py", r'\{"members": True\}', '{"members": True, "inherited-members": True}' ) -# Avoid breaking change due to change in field renames. -# https://github.com/googleapis/python-bigquery/issues/319 -s.replace( - "google/cloud/bigquery_v2/types/standard_sql.py", - r"type_ ", - "type " -) - # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", diff --git a/synth.metadata b/synth.metadata deleted file mode 100644 index b031618b0..000000000 --- a/synth.metadata +++ /dev/null @@ -1,134 +0,0 @@ -{ - "sources": [ - { - "git": { - "name": ".", - "remote": "https://github.com/googleapis/python-bigquery.git", - "sha": "f95f415d3441b3928f6cc705cb8a75603d790fd6" - } - }, - { - "git": { - "name": "googleapis", - "remote": "https://github.com/googleapis/googleapis.git", - "sha": "28a591963253d52ce3a25a918cafbdd9928de8cf", - "internalRef": "361662015" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - }, - { - "git": { - "name": "synthtool", - "remote": "https://github.com/googleapis/synthtool.git", - "sha": "043cc620d6a6111816d9e09f2a97208565fde958" - } - } - ], - "destinations": [ - { - "client": { - "source": "googleapis", - "apiName": "bigquery", - "apiVersion": "v2", - "language": "python", - "generator": "bazel" - } - } - ], - "generatedFiles": [ - ".coveragerc", - ".flake8", - ".github/CONTRIBUTING.md", - ".github/ISSUE_TEMPLATE/bug_report.md", - ".github/ISSUE_TEMPLATE/feature_request.md", - ".github/ISSUE_TEMPLATE/support_request.md", - ".github/PULL_REQUEST_TEMPLATE.md", - ".github/header-checker-lint.yml", - ".github/release-please.yml", - ".github/snippet-bot.yml", - ".gitignore", - ".kokoro/build.sh", - ".kokoro/continuous/common.cfg", - ".kokoro/continuous/continuous.cfg", - ".kokoro/docker/docs/Dockerfile", - ".kokoro/docker/docs/fetch_gpg_keys.sh", - ".kokoro/docs/common.cfg", - ".kokoro/docs/docs-presubmit.cfg", - ".kokoro/docs/docs.cfg", - ".kokoro/populate-secrets.sh", - ".kokoro/presubmit/common.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/publish-docs.sh", - ".kokoro/release.sh", - ".kokoro/release/common.cfg", - ".kokoro/release/release.cfg", - ".kokoro/samples/lint/common.cfg", - ".kokoro/samples/lint/continuous.cfg", - ".kokoro/samples/lint/periodic.cfg", - ".kokoro/samples/lint/presubmit.cfg", - ".kokoro/samples/python3.6/common.cfg", - ".kokoro/samples/python3.6/continuous.cfg", - ".kokoro/samples/python3.6/periodic-head.cfg", - ".kokoro/samples/python3.6/periodic.cfg", - ".kokoro/samples/python3.6/presubmit.cfg", - ".kokoro/samples/python3.7/common.cfg", - ".kokoro/samples/python3.7/continuous.cfg", - ".kokoro/samples/python3.7/periodic-head.cfg", - ".kokoro/samples/python3.7/periodic.cfg", - ".kokoro/samples/python3.7/presubmit.cfg", - ".kokoro/samples/python3.8/common.cfg", - ".kokoro/samples/python3.8/continuous.cfg", - ".kokoro/samples/python3.8/periodic-head.cfg", - ".kokoro/samples/python3.8/periodic.cfg", - ".kokoro/samples/python3.8/presubmit.cfg", - ".kokoro/test-samples-against-head.sh", - ".kokoro/test-samples-impl.sh", - ".kokoro/test-samples.sh", - ".kokoro/trampoline.sh", - ".kokoro/trampoline_v2.sh", - ".pre-commit-config.yaml", - ".trampolinerc", - "CODE_OF_CONDUCT.md", - "CONTRIBUTING.rst", - "LICENSE", - "MANIFEST.in", - "docs/_static/custom.css", - "docs/_templates/layout.html", - "docs/bigquery_v2/types.rst", - "docs/conf.py", - "google/cloud/bigquery_v2/__init__.py", - "google/cloud/bigquery_v2/proto/encryption_config.proto", - "google/cloud/bigquery_v2/proto/model.proto", - "google/cloud/bigquery_v2/proto/model_reference.proto", - "google/cloud/bigquery_v2/proto/standard_sql.proto", - "google/cloud/bigquery_v2/proto/table_reference.proto", - "google/cloud/bigquery_v2/py.typed", - "google/cloud/bigquery_v2/types/__init__.py", - "google/cloud/bigquery_v2/types/encryption_config.py", - "google/cloud/bigquery_v2/types/model.py", - "google/cloud/bigquery_v2/types/model_reference.py", - "google/cloud/bigquery_v2/types/standard_sql.py", - "google/cloud/bigquery_v2/types/table_reference.py", - "mypy.ini", - "renovate.json", - "samples/AUTHORING_GUIDE.md", - "samples/CONTRIBUTING.md", - "samples/geography/noxfile.py", - "samples/snippets/noxfile.py", - "scripts/decrypt-secrets.sh", - "scripts/readme-gen/readme_gen.py", - "scripts/readme-gen/templates/README.tmpl.rst", - "scripts/readme-gen/templates/auth.tmpl.rst", - "scripts/readme-gen/templates/auth_api_key.tmpl.rst", - "scripts/readme-gen/templates/install_deps.tmpl.rst", - "scripts/readme-gen/templates/install_portaudio.tmpl.rst", - "setup.cfg", - "testing/.gitignore" - ] -} \ No newline at end of file From 82f6c32ab9e75c86d7e27439016e634a484e4e9e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 16 May 2021 11:14:01 +0000 Subject: [PATCH 106/230] chore: new owl bot post processor docker image (#665) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa --- .github/.OwlBot.lock.yaml | 5 ++--- .pre-commit-config.yaml | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d49860b32..864c17653 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,3 @@ docker: - digest: sha256:457583330eec64daa02aeb7a72a04d33e7be2428f646671ce4045dcbc0191b1e - image: gcr.io/repo-automation-bots/owlbot-python:latest - + image: gcr.io/repo-automation-bots/owlbot-python:latest + digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1bbd78783..4f00c7cff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -26,6 +26,6 @@ repos: hooks: - id: black - repo: https://gitlab.com/pycqa/flake8 - rev: 3.9.1 + rev: 3.9.2 hooks: - id: flake8 From bd7dbdae5c972b16bafc53c67911eeaa3255a880 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 20 May 2021 09:56:53 +0200 Subject: [PATCH 107/230] feat: detect obsolete BQ Storage extra at runtime (#666) * feat: detect obsolete BQ Storage extra at runtime * Cover the changes with unit tests * Skip BQ Storage version tests if extra missing * Rename and improve _create_bqstorage_client() The method is renamed to _ensure_bqstorage_client() and now performs a check if BQ Storage dependency is recent enough. * Remove BQ Storage check from dbapi.Cursor The check is now performed in dbapi.Connection, which is sufficient. * Remove BQ Storage check in _pandas_helpers The methods in higher layers already do the same check before a BQ Storage client instance is passed to _pandas_helpers._download_table_bqstorage() helper. * Simplify BQ Storage client factory in magics Lean more heavily on client._ensure_bqstorage_client() to de-duplicate logic. * Cover missing code lines with tests --- google/cloud/bigquery/__init__.py | 3 + google/cloud/bigquery/_helpers.py | 30 +++++++++ google/cloud/bigquery/client.py | 57 +++++++++++++++-- google/cloud/bigquery/dbapi/connection.py | 6 +- google/cloud/bigquery/exceptions.py | 21 +++++++ google/cloud/bigquery/magics/magics.py | 11 ++-- google/cloud/bigquery/table.py | 14 ++++- tests/unit/test__helpers.py | 38 ++++++++++++ tests/unit/test_client.py | 76 +++++++++++++++++++++-- tests/unit/test_dbapi_connection.py | 20 +++++- tests/unit/test_dbapi_cursor.py | 12 +++- tests/unit/test_magics.py | 44 ++++++++++++- tests/unit/test_table.py | 61 +++++++++++++++--- 13 files changed, 357 insertions(+), 36 deletions(-) create mode 100644 google/cloud/bigquery/exceptions.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index f609468f5..ec08b2c84 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery import enums from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions from google.cloud.bigquery.external_config import BigtableColumnFamily @@ -152,6 +153,8 @@ "WriteDisposition", # EncryptionConfiguration "EncryptionConfiguration", + # Custom exceptions + "LegacyBigQueryStorageError", ] diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4fe29291d..7602483c2 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -25,6 +25,10 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +import pkg_resources + +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + _RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f" _TIMEONLY_WO_MICROS = "%H:%M:%S" @@ -36,6 +40,32 @@ re.VERBOSE, ) +_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") + + +def _verify_bq_storage_version(): + """Verify that a recent enough version of BigQuery Storage extra is installed. + + The function assumes that google-cloud-bigquery-storage extra is installed, and + should thus be used in places where this assumption holds. + + Because `pip` can install an outdated version of this extra despite the constraints + in setup.py, the the calling code can use this helper to verify the version + compatibility at runtime. + """ + from google.cloud import bigquery_storage + + installed_version = pkg_resources.parse_version( + getattr(bigquery_storage, "__version__", "legacy") + ) + + if installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + def _not_null(value, field): """Check whether 'value' should be coerced to 'field' type.""" diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8d0acb867..7ef3795a8 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -50,16 +50,25 @@ from google.cloud import exceptions # pytype: disable=import-error from google.cloud.client import ClientWithProject # pytype: disable=import-error +try: + from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( + DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, + ) +except ImportError: + DEFAULT_BQSTORAGE_CLIENT_INFO = None + from google.cloud.bigquery._helpers import _del_sub_prop from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery._helpers import _verify_bq_storage_version from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( @@ -445,15 +454,38 @@ def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: ) return DatasetReference(project, dataset_id) - def _create_bqstorage_client(self): + def _ensure_bqstorage_client( + self, + bqstorage_client: Optional[ + "google.cloud.bigquery_storage.BigQueryReadClient" + ] = None, + client_options: Optional[google.api_core.client_options.ClientOptions] = None, + client_info: Optional[ + "google.api_core.gapic_v1.client_info.ClientInfo" + ] = DEFAULT_BQSTORAGE_CLIENT_INFO, + ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: """Create a BigQuery Storage API client using this client's credentials. - If a client cannot be created due to missing dependencies, raise a - warning and return ``None``. + If a client cannot be created due to a missing or outdated dependency + `google-cloud-bigquery-storage`, raise a warning and return ``None``. + + If the `bqstorage_client` argument is not ``None``, still perform the version + check and return the argument back to the caller if the check passes. If it + fails, raise a warning and return ``None``. + + Args: + bqstorage_client: + An existing BigQuery Storage client instance to check for version + compatibility. If ``None``, a new instance is created and returned. + client_options: + Custom options used with a new BigQuery Storage client instance if one + is created. + client_info: + The client info used with a new BigQuery Storage client instance if one + is created. Returns: - Optional[google.cloud.bigquery_storage.BigQueryReadClient]: - A BigQuery Storage API client. + A BigQuery Storage API client. """ try: from google.cloud import bigquery_storage @@ -464,7 +496,20 @@ def _create_bqstorage_client(self): ) return None - return bigquery_storage.BigQueryReadClient(credentials=self._credentials) + try: + _verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return None + + if bqstorage_client is None: + bqstorage_client = bigquery_storage.BigQueryReadClient( + credentials=self._credentials, + client_options=client_options, + client_info=client_info, + ) + + return bqstorage_client def _dataset_from_arg(self, dataset): if isinstance(dataset, str): diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py index 459fc82aa..66dee7dfb 100644 --- a/google/cloud/bigquery/dbapi/connection.py +++ b/google/cloud/bigquery/dbapi/connection.py @@ -47,12 +47,14 @@ def __init__(self, client=None, bqstorage_client=None): else: self._owns_client = False + # A warning is already raised by the BQ Storage client factory factory if + # instantiation fails, or if the given BQ Storage client instance is outdated. if bqstorage_client is None: - # A warning is already raised by the factory if instantiation fails. - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self._owns_bqstorage_client = bqstorage_client is not None else: self._owns_bqstorage_client = False + bqstorage_client = client._ensure_bqstorage_client(bqstorage_client) self._client = client self._bqstorage_client = bqstorage_client diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py new file mode 100644 index 000000000..6e5c27eb1 --- /dev/null +++ b/google/cloud/bigquery/exceptions.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class BigQueryError(Exception): + """Base class for all custom exceptions defined by the BigQuery client.""" + + +class LegacyBigQueryStorageError(BigQueryError): + """Raised when too old a version of BigQuery Storage extra is detected at runtime.""" diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 474d9a74a..2b8c2928e 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -644,7 +644,7 @@ def _cell_magic(line, query): bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint bqstorage_client = _make_bqstorage_client( - use_bqstorage_api, context.credentials, bqstorage_client_options, + client, use_bqstorage_api, bqstorage_client_options, ) close_transports = functools.partial(_close_transports, client, bqstorage_client) @@ -762,12 +762,12 @@ def _split_args_line(line): return params_option_value, rest_of_args -def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): +def _make_bqstorage_client(client, use_bqstorage_api, client_options): if not use_bqstorage_api: return None try: - from google.cloud import bigquery_storage + from google.cloud import bigquery_storage # noqa: F401 except ImportError as err: customized_error = ImportError( "The default BigQuery Storage API client cannot be used, install " @@ -785,10 +785,9 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options): ) raise customized_error from err - return bigquery_storage.BigQueryReadClient( - credentials=credentials, - client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), + return client._ensure_bqstorage_client( client_options=client_options, + client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT), ) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b91c91a39..b12209252 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -41,6 +41,7 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers from google.cloud.bigquery import _pandas_helpers +from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -1519,6 +1520,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): ) return False + try: + from google.cloud import bigquery_storage # noqa: F401 + except ImportError: + return False + + try: + _helpers._verify_bq_storage_version() + except LegacyBigQueryStorageError as exc: + warnings.warn(str(exc)) + return False + return True def _get_next_page_response(self): @@ -1655,7 +1667,7 @@ def to_arrow( owns_bqstorage_client = False if not bqstorage_client and create_bqstorage_client: - bqstorage_client = self.client._create_bqstorage_client() + bqstorage_client = self.client._ensure_bqstorage_client() owns_bqstorage_client = bqstorage_client is not None try: diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 2437f3568..0ac76d424 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -19,6 +19,44 @@ import mock +try: + from google.cloud import bigquery_storage +except ImportError: # pragma: NO COVER + bigquery_storage = None + + +@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") +class Test_verify_bq_storage_version(unittest.TestCase): + def _call_fut(self): + from google.cloud.bigquery._helpers import _verify_bq_storage_version + + return _verify_bq_storage_version() + + def test_raises_no_error_w_recent_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"): + try: + self._call_fut() + except LegacyBigQueryStorageError: # pragma: NO COVER + self.fail("Legacy error raised with a non-legacy dependency version.") + + def test_raises_error_w_legacy_bqstorage(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"): + with self.assertRaises(LegacyBigQueryStorageError): + self._call_fut() + + def test_raises_error_w_unknown_bqstorage_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: + del fake_module.__version__ + error_pattern = r"version found: legacy" + with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): + self._call_fut() + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8f535145b..1346a1ef6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -822,7 +822,7 @@ def test_get_dataset(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_create_bqstorage_client(self): + def test_ensure_bqstorage_client_creating_new_instance(self): mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) mock_client_instance = object() mock_client.return_value = mock_client_instance @@ -832,12 +832,19 @@ def test_create_bqstorage_client(self): with mock.patch( "google.cloud.bigquery_storage.BigQueryReadClient", mock_client ): - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client( + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) self.assertIs(bqstorage_client, mock_client_instance) - mock_client.assert_called_once_with(credentials=creds) + mock_client.assert_called_once_with( + credentials=creds, + client_options=mock.sentinel.client_options, + client_info=mock.sentinel.client_info, + ) - def test_create_bqstorage_client_missing_dependency(self): + def test_ensure_bqstorage_client_missing_dependency(self): creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -850,7 +857,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) with no_bqstorage, warnings.catch_warnings(record=True) as warned: - bqstorage_client = client._create_bqstorage_client() + bqstorage_client = client._ensure_bqstorage_client() self.assertIsNone(bqstorage_client) matching_warnings = [ @@ -861,6 +868,65 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level): ] assert matching_warnings, "Missing dependency warning not raised." + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_obsolete_dependency(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client() + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_passes(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + bqstorage_client = client._ensure_bqstorage_client( + bqstorage_client=mock_storage_client + ) + + self.assertIs(bqstorage_client, mock_storage_client) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_ensure_bqstorage_client_existing_client_check_fails(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + mock_storage_client = mock.sentinel.mock_storage_client + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + bqstorage_client = client._ensure_bqstorage_client(mock_storage_client) + + self.assertIsNone(bqstorage_client) + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + def test_create_routine_w_minimal_resource(self): from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py index 74da318bf..0576cad38 100644 --- a/tests/unit/test_dbapi_connection.py +++ b/tests/unit/test_dbapi_connection.py @@ -51,7 +51,7 @@ def test_ctor_wo_bqstorage_client(self): from google.cloud.bigquery.dbapi import Connection mock_client = self._mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None connection = self._make_one(client=mock_client) self.assertIsInstance(connection, Connection) @@ -66,9 +66,15 @@ def test_ctor_w_bqstorage_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = self._make_one( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -92,9 +98,11 @@ def test_connect_w_client(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() - mock_client._create_bqstorage_client.return_value = mock_bqstorage_client + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = connect(client=mock_client) + + mock_client._ensure_bqstorage_client.assert_called_once_with() self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -108,9 +116,15 @@ def test_connect_w_both_clients(self): mock_client = self._mock_client() mock_bqstorage_client = self._mock_bqstorage_client() + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client + connection = connect( client=mock_client, bqstorage_client=mock_bqstorage_client, ) + + mock_client._ensure_bqstorage_client.assert_called_once_with( + mock_bqstorage_client + ) self.assertIsInstance(connection, Connection) self.assertIs(connection._client, mock_client) self.assertIs(connection._bqstorage_client, mock_bqstorage_client) @@ -140,7 +154,7 @@ def test_close_closes_all_created_bigquery_clients(self): return_value=client, ) bqstorage_client_patcher = mock.patch.object( - client, "_create_bqstorage_client", return_value=bqstorage_client, + client, "_ensure_bqstorage_client", return_value=bqstorage_client, ) with client_patcher, bqstorage_client_patcher: diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 55e453254..a2d6693d0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -72,7 +72,7 @@ def _mock_client( mock_client._default_query_job_config = default_query_job_config # Assure that the REST client gets used, not the BQ Storage client. - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None return mock_client @@ -311,6 +311,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self): mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -341,6 +342,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self): mock_client = self._mock_client(rows=[]) mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0) + mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client connection = dbapi.connect( client=mock_client, bqstorage_client=mock_bqstorage_client, @@ -365,7 +367,11 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self): row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=row_data, ) @@ -396,7 +402,11 @@ def test_fetchall_w_bqstorage_client_no_arrow_compression(self): row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})] bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}] + def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): + return bqstorage_client + mock_client = self._mock_client(rows=row_data) + mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client mock_bqstorage_client = self._mock_bqstorage_client( stream_count=1, rows=bqstorage_streamed_rows, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index ff41fe720..5e9bf28a9 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -317,7 +317,10 @@ def test__make_bqstorage_client_false(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(False, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, False, {}) assert got is None @@ -328,7 +331,10 @@ def test__make_bqstorage_client_true(): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - got = magics._make_bqstorage_client(True, credentials_mock, {}) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + got = magics._make_bqstorage_client(test_client, True, {}) assert isinstance(got, bigquery_storage.BigQueryReadClient) @@ -336,15 +342,46 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage): credentials_mock = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) with pytest.raises(ImportError) as exc_context, missing_bq_storage: - magics._make_bqstorage_client(True, credentials_mock, {}) + magics._make_bqstorage_client(test_client, True, {}) error_msg = str(exc_context.value) assert "google-cloud-bigquery-storage" in error_msg assert "pyarrow" in error_msg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__make_bqstorage_client_true_obsolete_dependency(): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + credentials_mock = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + test_client = bigquery.Client( + project="test_project", credentials=credentials_mock, location="test_location" + ) + + patcher = mock.patch( + "google.cloud.bigquery.client._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + got = magics._make_bqstorage_client(test_client, True, {}) + + assert got is None + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -887,6 +924,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): table_id = "bigquery-public-data.samples.shakespeare" with default_patch, client_patch as client_mock, bqstorage_client_patch: + client_mock()._ensure_bqstorage_client.return_value = bqstorage_instance_mock client_mock().list_rows.return_value = row_iterator_mock ip.run_cell_magic("bigquery", "--max_results=5", table_id) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ce4a15761..0f2ab00c1 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -24,6 +24,7 @@ import pytz import google.api_core.exceptions +from test_utils.imports import maybe_fail_import try: from google.cloud import bigquery_storage @@ -1768,6 +1769,48 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): + iterator = self._make_one(first_page_response=None) # not cached + + def fail_bqstorage_import(name, globals, locals, fromlist, level): + # NOTE: *very* simplified, assuming a straightforward absolute import + return "bigquery_storage" in name or ( + fromlist is not None and "bigquery_storage" in fromlist + ) + + no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import) + + with no_bqstorage: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): + from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError + + iterator = self._make_one(first_page_response=None) # not cached + + patcher = mock.patch( + "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + side_effect=LegacyBigQueryStorageError("BQ Storage too old"), + ) + with patcher, warnings.catch_warnings(record=True) as warned: + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + + self.assertFalse(result) + + matching_warnings = [ + warning for warning in warned if "BQ Storage too old" in str(warning) + ] + assert matching_warnings, "Obsolete dependency warning not raised." + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_to_arrow(self): from google.cloud.bigquery.schema import SchemaField @@ -2003,7 +2046,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( @@ -2099,7 +2142,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2114,11 +2157,11 @@ def test_to_arrow_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_arrow(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") - def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): + def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2133,14 +2176,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self): api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() - mock_client._create_bqstorage_client.return_value = None + mock_client._ensure_bqstorage_client.return_value = None row_iterator = self._make_one(mock_client, api_request, path, schema) tbl = row_iterator.to_arrow(create_bqstorage_client=True) # The client attempted to create a BQ Storage client, and even though # that was not possible, results were still returned without errors. - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() self.assertIsInstance(tbl, pyarrow.Table) self.assertEqual(tbl.num_rows, 2) @@ -2824,7 +2867,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") - mock_client._create_bqstorage_client.assert_not_called() + mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf( @@ -2839,7 +2882,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport = mock.create_autospec( big_query_read_grpc_transport.BigQueryReadGrpcTransport ) - mock_client._create_bqstorage_client.return_value = bqstorage_client + mock_client._ensure_bqstorage_client.return_value = bqstorage_client session = bigquery_storage.types.ReadSession() bqstorage_client.create_read_session.return_value = session row_iterator = mut.RowIterator( @@ -2854,7 +2897,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): table=mut.TableReference.from_string("proj.dset.tbl"), ) row_iterator.to_dataframe(create_bqstorage_client=True) - mock_client._create_bqstorage_client.assert_called_once() + mock_client._ensure_bqstorage_client.assert_called_once() bqstorage_client._transport.grpc_channel.close.assert_called_once() @unittest.skipIf(pandas is None, "Requires `pandas`") From e983bda009e006e7544089ea2ceb15f01831dffb Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 20 May 2021 22:24:04 +0000 Subject: [PATCH 108/230] chore: upgrade gapic-generator-python to 0.46.3 (#664) PiperOrigin-RevId: 373649163 Source-Link: https://github.com/googleapis/googleapis/commit/7e1b14e6c7a9ab96d2db7e4a131981f162446d34 Source-Link: https://github.com/googleapis/googleapis-gen/commit/0a3c7d272d697796db75857bac73905c68e498c3 --- google/cloud/bigquery_v2/__init__.py | 2 - google/cloud/bigquery_v2/gapic_metadata.json | 63 +++ google/cloud/bigquery_v2/types/__init__.py | 2 - .../bigquery_v2/types/encryption_config.py | 10 +- google/cloud/bigquery_v2/types/model.py | 444 ++++++------------ .../bigquery_v2/types/model_reference.py | 11 +- .../cloud/bigquery_v2/types/standard_sql.py | 9 +- .../bigquery_v2/types/table_reference.py | 11 +- owlbot.py | 17 +- tests/__init__.py | 15 + tests/unit/__init__.py | 4 +- tests/unit/gapic/__init__.py | 15 + 12 files changed, 271 insertions(+), 332 deletions(-) create mode 100644 google/cloud/bigquery_v2/gapic_metadata.json create mode 100644 tests/unit/gapic/__init__.py diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index ebcc26bef..476bd5747 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,7 +28,6 @@ from .types.standard_sql import StandardSqlStructType from .types.table_reference import TableReference - __all__ = ( "DeleteModelRequest", "EncryptionConfiguration", diff --git a/google/cloud/bigquery_v2/gapic_metadata.json b/google/cloud/bigquery_v2/gapic_metadata.json new file mode 100644 index 000000000..3251a2630 --- /dev/null +++ b/google/cloud/bigquery_v2/gapic_metadata.json @@ -0,0 +1,63 @@ + { + "comment": "This file maps proto services/RPCs to the corresponding library clients/methods", + "language": "python", + "libraryPackage": "google.cloud.bigquery_v2", + "protoPackage": "google.cloud.bigquery.v2", + "schema": "1.0", + "services": { + "ModelService": { + "clients": { + "grpc": { + "libraryClient": "ModelServiceClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + }, + "grpc-async": { + "libraryClient": "ModelServiceAsyncClient", + "rpcs": { + "DeleteModel": { + "methods": [ + "delete_model" + ] + }, + "GetModel": { + "methods": [ + "get_model" + ] + }, + "ListModels": { + "methods": [ + "list_models" + ] + }, + "PatchModel": { + "methods": [ + "patch_model" + ] + } + } + } + } + } + } +} diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index b76e65c65..9c850dca1 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - from .encryption_config import EncryptionConfiguration from .model import ( DeleteModelRequest, diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py index 2d801bde3..4b9139733 100644 --- a/google/cloud/bigquery_v2/types/encryption_config.py +++ b/google/cloud/bigquery_v2/types/encryption_config.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,11 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -28,7 +25,6 @@ class EncryptionConfiguration(proto.Message): r""" - Attributes: kms_key_name (google.protobuf.wrappers_pb2.StringValue): Optional. Describes the Cloud KMS encryption @@ -38,7 +34,9 @@ class EncryptionConfiguration(proto.Message): this encryption key. """ - kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,) + kms_key_name = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.StringValue, + ) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 8ae158b64..17e101d25 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,16 +13,14 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore - from google.cloud.bigquery_v2.types import encryption_config from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference from google.cloud.bigquery_v2.types import standard_sql from google.cloud.bigquery_v2.types import table_reference -from google.protobuf import timestamp_pb2 as timestamp # type: ignore -from google.protobuf import wrappers_pb2 as wrappers # type: ignore +from google.protobuf import timestamp_pb2 # type: ignore +from google.protobuf import wrappers_pb2 # type: ignore __protobuf__ = proto.module( @@ -41,7 +38,6 @@ class Model(proto.Message): r""" - Attributes: etag (str): Output only. A hash of this resource. @@ -251,7 +247,7 @@ class FeedbackType(proto.Enum): EXPLICIT = 2 class SeasonalPeriod(proto.Message): - r"""""" + r""" """ class SeasonalPeriodType(proto.Enum): r"""""" @@ -264,7 +260,7 @@ class SeasonalPeriodType(proto.Enum): YEARLY = 6 class KmeansEnums(proto.Message): - r"""""" + r""" """ class KmeansInitializationMethod(proto.Enum): r"""Indicates the method used to initialize the centroids for @@ -293,22 +289,20 @@ class RegressionMetrics(proto.Message): """ mean_absolute_error = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - mean_squared_log_error = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - median_absolute_error = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + r_squared = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - - r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) class AggregateClassificationMetrics(proto.Message): r"""Aggregate metrics for classification/classifier models. For @@ -350,19 +344,25 @@ class AggregateClassificationMetrics(proto.Message): is a macro-averaged metric. """ - precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,) - - recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,) - - accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,) - - threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,) - - f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,) - - log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,) - - roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) + precision = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,) + accuracy = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, + ) + threshold = proto.Field( + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, + ) + f1_score = proto.Field( + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, + ) + log_loss = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + roc_auc = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, + ) class BinaryClassificationMetrics(proto.Message): r"""Evaluation metrics for binary classification/classifier @@ -382,7 +382,6 @@ class BinaryClassificationMetrics(proto.Message): class BinaryConfusionMatrix(proto.Message): r"""Confusion matrix for binary classification models. - Attributes: positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue): Threshold value used when computing each of @@ -410,52 +409,43 @@ class BinaryConfusionMatrix(proto.Message): """ positive_class_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - true_positives = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) - false_positives = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) - true_negatives = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - false_negatives = proto.Field( - proto.MESSAGE, number=5, message=wrappers.Int64Value, + proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value, ) - precision = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, + ) + recall = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue, ) - - recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,) - f1_score = proto.Field( - proto.MESSAGE, number=8, message=wrappers.DoubleValue, + proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue, ) - accuracy = proto.Field( - proto.MESSAGE, number=9, message=wrappers.DoubleValue, + proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue, ) aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - binary_confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix", ) - - positive_label = proto.Field(proto.STRING, number=3) - - negative_label = proto.Field(proto.STRING, number=4) + positive_label = proto.Field(proto.STRING, number=3,) + negative_label = proto.Field(proto.STRING, number=4,) class MultiClassClassificationMetrics(proto.Message): r"""Evaluation metrics for multi-class classification/classifier @@ -470,7 +460,6 @@ class MultiClassClassificationMetrics(proto.Message): class ConfusionMatrix(proto.Message): r"""Confusion matrix for multi-class classification models. - Attributes: confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue): Confidence threshold used when computing the @@ -481,7 +470,6 @@ class ConfusionMatrix(proto.Message): class Entry(proto.Message): r"""A single entry in the confusion matrix. - Attributes: predicted_label (str): The predicted label. For confidence_threshold > 0, we will @@ -492,15 +480,13 @@ class Entry(proto.Message): label. """ - predicted_label = proto.Field(proto.STRING, number=1) - + predicted_label = proto.Field(proto.STRING, number=1,) item_count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) class Row(proto.Message): r"""A single row in the confusion matrix. - Attributes: actual_label (str): The original label of this row. @@ -508,8 +494,7 @@ class Row(proto.Message): Info describing predicted label distribution. """ - actual_label = proto.Field(proto.STRING, number=1) - + actual_label = proto.Field(proto.STRING, number=1,) entries = proto.RepeatedField( proto.MESSAGE, number=2, @@ -517,9 +502,8 @@ class Row(proto.Message): ) confidence_threshold = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - rows = proto.RepeatedField( proto.MESSAGE, number=2, @@ -529,7 +513,6 @@ class Row(proto.Message): aggregate_classification_metrics = proto.Field( proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics", ) - confusion_matrix_list = proto.RepeatedField( proto.MESSAGE, number=2, @@ -538,7 +521,6 @@ class Row(proto.Message): class ClusteringMetrics(proto.Message): r"""Evaluation metrics for clustering models. - Attributes: davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue): Davies-Bouldin index. @@ -551,7 +533,6 @@ class ClusteringMetrics(proto.Message): class Cluster(proto.Message): r"""Message containing the information about one cluster. - Attributes: centroid_id (int): Centroid id. @@ -565,7 +546,6 @@ class Cluster(proto.Message): class FeatureValue(proto.Message): r"""Representative value of a single feature within the cluster. - Attributes: feature_column (str): The feature column name. @@ -578,7 +558,6 @@ class FeatureValue(proto.Message): class CategoricalValue(proto.Message): r"""Representative value of a categorical feature. - Attributes: category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]): Counts of all categories for the categorical feature. If @@ -590,7 +569,6 @@ class CategoricalValue(proto.Message): class CategoryCount(proto.Message): r"""Represents the count of a single category within the cluster. - Attributes: category (str): The name of category. @@ -599,10 +577,9 @@ class CategoryCount(proto.Message): category within the cluster. """ - category = proto.Field(proto.STRING, number=1) - + category = proto.Field(proto.STRING, number=1,) count = proto.Field( - proto.MESSAGE, number=2, message=wrappers.Int64Value, + proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value, ) category_counts = proto.RepeatedField( @@ -611,15 +588,13 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", ) - feature_column = proto.Field(proto.STRING, number=1) - + feature_column = proto.Field(proto.STRING, number=1,) numerical_value = proto.Field( proto.MESSAGE, number=2, oneof="value", - message=wrappers.DoubleValue, + message=wrappers_pb2.DoubleValue, ) - categorical_value = proto.Field( proto.MESSAGE, number=3, @@ -627,24 +602,22 @@ class CategoryCount(proto.Message): message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", ) - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) feature_values = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ClusteringMetrics.Cluster.FeatureValue", ) - - count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,) + count = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, + ) davies_bouldin_index = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_distance = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - clusters = proto.RepeatedField( proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster", ) @@ -677,24 +650,20 @@ class RankingMetrics(proto.Message): """ mean_average_precision = proto.Field( - proto.MESSAGE, number=1, message=wrappers.DoubleValue, + proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue, ) - mean_squared_error = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - normalized_discounted_cumulative_gain = proto.Field( - proto.MESSAGE, number=3, message=wrappers.DoubleValue, + proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue, ) - average_rank = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) class ArimaForecastingMetrics(proto.Message): r"""Model evaluation metrics for ARIMA forecasting models. - Attributes: non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]): Non-seasonal order. @@ -737,15 +706,11 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=3) - - time_series_id = proto.Field(proto.STRING, number=4) - + has_drift = proto.Field(proto.BOOL, number=3,) + time_series_id = proto.Field(proto.STRING, number=4,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) @@ -753,19 +718,14 @@ class ArimaSingleModelForecastingMetrics(proto.Message): non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_fitting_metrics = proto.RepeatedField( proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - - has_drift = proto.RepeatedField(proto.BOOL, number=4) - - time_series_id = proto.RepeatedField(proto.STRING, number=5) - + has_drift = proto.RepeatedField(proto.BOOL, number=4,) + time_series_id = proto.RepeatedField(proto.STRING, number=5,) arima_single_model_forecasting_metrics = proto.RepeatedField( proto.MESSAGE, number=6, @@ -800,29 +760,24 @@ class EvaluationMetrics(proto.Message): regression_metrics = proto.Field( proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics", ) - binary_classification_metrics = proto.Field( proto.MESSAGE, number=2, oneof="metrics", message="Model.BinaryClassificationMetrics", ) - multi_class_classification_metrics = proto.Field( proto.MESSAGE, number=3, oneof="metrics", message="Model.MultiClassClassificationMetrics", ) - clustering_metrics = proto.Field( proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics", ) - ranking_metrics = proto.Field( proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics", ) - arima_forecasting_metrics = proto.Field( proto.MESSAGE, number=6, @@ -846,7 +801,6 @@ class DataSplitResult(proto.Message): training_table = proto.Field( proto.MESSAGE, number=1, message=table_reference.TableReference, ) - evaluation_table = proto.Field( proto.MESSAGE, number=2, message=table_reference.TableReference, ) @@ -864,15 +818,12 @@ class ArimaOrder(proto.Message): Order of the moving-average part. """ - p = proto.Field(proto.INT64, number=1) - - d = proto.Field(proto.INT64, number=2) - - q = proto.Field(proto.INT64, number=3) + p = proto.Field(proto.INT64, number=1,) + d = proto.Field(proto.INT64, number=2,) + q = proto.Field(proto.INT64, number=3,) class ArimaFittingMetrics(proto.Message): r"""ARIMA model fitting metrics. - Attributes: log_likelihood (float): Log-likelihood. @@ -882,11 +833,9 @@ class ArimaFittingMetrics(proto.Message): Variance. """ - log_likelihood = proto.Field(proto.DOUBLE, number=1) - - aic = proto.Field(proto.DOUBLE, number=2) - - variance = proto.Field(proto.DOUBLE, number=3) + log_likelihood = proto.Field(proto.DOUBLE, number=1,) + aic = proto.Field(proto.DOUBLE, number=2,) + variance = proto.Field(proto.DOUBLE, number=3,) class GlobalExplanation(proto.Message): r"""Global explanations containing the top most important @@ -906,7 +855,6 @@ class GlobalExplanation(proto.Message): class Explanation(proto.Message): r"""Explanation for a single feature. - Attributes: feature_name (str): Full name of the feature. For non-numerical features, will @@ -917,21 +865,18 @@ class Explanation(proto.Message): Attribution of feature. """ - feature_name = proto.Field(proto.STRING, number=1) - + feature_name = proto.Field(proto.STRING, number=1,) attribution = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) explanations = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation", ) - - class_label = proto.Field(proto.STRING, number=2) + class_label = proto.Field(proto.STRING, number=2,) class TrainingRun(proto.Message): r"""Information about a single training query run for the model. - Attributes: training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions): Options that were used for this training run, @@ -957,7 +902,6 @@ class TrainingRun(proto.Message): class TrainingOptions(proto.Message): r""" - Attributes: max_iterations (int): The maximum number of iterations in training. @@ -1100,131 +1044,88 @@ class TrainingOptions(proto.Message): The max value of non-seasonal p and q. """ - max_iterations = proto.Field(proto.INT64, number=1) - + max_iterations = proto.Field(proto.INT64, number=1,) loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",) - - learn_rate = proto.Field(proto.DOUBLE, number=3) - + learn_rate = proto.Field(proto.DOUBLE, number=3,) l1_regularization = proto.Field( - proto.MESSAGE, number=4, message=wrappers.DoubleValue, + proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue, ) - l2_regularization = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - min_relative_progress = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - warm_start = proto.Field( - proto.MESSAGE, number=7, message=wrappers.BoolValue, + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, ) - early_stop = proto.Field( - proto.MESSAGE, number=8, message=wrappers.BoolValue, + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, ) - - input_label_columns = proto.RepeatedField(proto.STRING, number=9) - + input_label_columns = proto.RepeatedField(proto.STRING, number=9,) data_split_method = proto.Field( proto.ENUM, number=10, enum="Model.DataSplitMethod", ) - - data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11) - - data_split_column = proto.Field(proto.STRING, number=12) - + data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,) + data_split_column = proto.Field(proto.STRING, number=12,) learn_rate_strategy = proto.Field( proto.ENUM, number=13, enum="Model.LearnRateStrategy", ) - - initial_learn_rate = proto.Field(proto.DOUBLE, number=16) - - label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17) - - user_column = proto.Field(proto.STRING, number=18) - - item_column = proto.Field(proto.STRING, number=19) - + initial_learn_rate = proto.Field(proto.DOUBLE, number=16,) + label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,) + user_column = proto.Field(proto.STRING, number=18,) + item_column = proto.Field(proto.STRING, number=19,) distance_type = proto.Field( proto.ENUM, number=20, enum="Model.DistanceType", ) - - num_clusters = proto.Field(proto.INT64, number=21) - - model_uri = proto.Field(proto.STRING, number=22) - + num_clusters = proto.Field(proto.INT64, number=21,) + model_uri = proto.Field(proto.STRING, number=22,) optimization_strategy = proto.Field( proto.ENUM, number=23, enum="Model.OptimizationStrategy", ) - - hidden_units = proto.RepeatedField(proto.INT64, number=24) - - batch_size = proto.Field(proto.INT64, number=25) - + hidden_units = proto.RepeatedField(proto.INT64, number=24,) + batch_size = proto.Field(proto.INT64, number=25,) dropout = proto.Field( - proto.MESSAGE, number=26, message=wrappers.DoubleValue, + proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue, ) - - max_tree_depth = proto.Field(proto.INT64, number=27) - - subsample = proto.Field(proto.DOUBLE, number=28) - + max_tree_depth = proto.Field(proto.INT64, number=27,) + subsample = proto.Field(proto.DOUBLE, number=28,) min_split_loss = proto.Field( - proto.MESSAGE, number=29, message=wrappers.DoubleValue, + proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue, ) - - num_factors = proto.Field(proto.INT64, number=30) - + num_factors = proto.Field(proto.INT64, number=30,) feedback_type = proto.Field( proto.ENUM, number=31, enum="Model.FeedbackType", ) - wals_alpha = proto.Field( - proto.MESSAGE, number=32, message=wrappers.DoubleValue, + proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue, ) - kmeans_initialization_method = proto.Field( proto.ENUM, number=33, enum="Model.KmeansEnums.KmeansInitializationMethod", ) - - kmeans_initialization_column = proto.Field(proto.STRING, number=34) - - time_series_timestamp_column = proto.Field(proto.STRING, number=35) - - time_series_data_column = proto.Field(proto.STRING, number=36) - - auto_arima = proto.Field(proto.BOOL, number=37) - + kmeans_initialization_column = proto.Field(proto.STRING, number=34,) + time_series_timestamp_column = proto.Field(proto.STRING, number=35,) + time_series_data_column = proto.Field(proto.STRING, number=36,) + auto_arima = proto.Field(proto.BOOL, number=37,) non_seasonal_order = proto.Field( proto.MESSAGE, number=38, message="Model.ArimaOrder", ) - data_frequency = proto.Field( proto.ENUM, number=39, enum="Model.DataFrequency", ) - - include_drift = proto.Field(proto.BOOL, number=41) - + include_drift = proto.Field(proto.BOOL, number=41,) holiday_region = proto.Field( proto.ENUM, number=42, enum="Model.HolidayRegion", ) - - time_series_id_column = proto.Field(proto.STRING, number=43) - - horizon = proto.Field(proto.INT64, number=44) - - preserve_input_structs = proto.Field(proto.BOOL, number=45) - - auto_arima_max_order = proto.Field(proto.INT64, number=46) + time_series_id_column = proto.Field(proto.STRING, number=43,) + horizon = proto.Field(proto.INT64, number=44,) + preserve_input_structs = proto.Field(proto.BOOL, number=45,) + auto_arima_max_order = proto.Field(proto.INT64, number=46,) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. - Attributes: index (google.protobuf.wrappers_pb2.Int32Value): Index of the iteration, 0 based. @@ -1248,7 +1149,6 @@ class IterationResult(proto.Message): class ClusterInfo(proto.Message): r"""Information about a single cluster for clustering model. - Attributes: centroid_id (int): Centroid id. @@ -1260,14 +1160,12 @@ class ClusterInfo(proto.Message): assigned to the cluster. """ - centroid_id = proto.Field(proto.INT64, number=1) - + centroid_id = proto.Field(proto.INT64, number=1,) cluster_radius = proto.Field( - proto.MESSAGE, number=2, message=wrappers.DoubleValue, + proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue, ) - cluster_size = proto.Field( - proto.MESSAGE, number=3, message=wrappers.Int64Value, + proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value, ) class ArimaResult(proto.Message): @@ -1287,7 +1185,6 @@ class ArimaResult(proto.Message): class ArimaCoefficients(proto.Message): r"""Arima coefficients. - Attributes: auto_regressive_coefficients (Sequence[float]): Auto-regressive coefficients, an array of @@ -1301,18 +1198,15 @@ class ArimaCoefficients(proto.Message): """ auto_regressive_coefficients = proto.RepeatedField( - proto.DOUBLE, number=1 + proto.DOUBLE, number=1, ) - moving_average_coefficients = proto.RepeatedField( - proto.DOUBLE, number=2 + proto.DOUBLE, number=2, ) - - intercept_coefficient = proto.Field(proto.DOUBLE, number=3) + intercept_coefficient = proto.Field(proto.DOUBLE, number=3,) class ArimaModelInfo(proto.Message): r"""Arima model information. - Attributes: non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder): Non-seasonal order. @@ -1333,21 +1227,16 @@ class ArimaModelInfo(proto.Message): non_seasonal_order = proto.Field( proto.MESSAGE, number=1, message="Model.ArimaOrder", ) - arima_coefficients = proto.Field( proto.MESSAGE, number=2, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients", ) - arima_fitting_metrics = proto.Field( proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics", ) - - has_drift = proto.Field(proto.BOOL, number=4) - - time_series_id = proto.Field(proto.STRING, number=5) - + has_drift = proto.Field(proto.BOOL, number=4,) + time_series_id = proto.Field(proto.STRING, number=5,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, @@ -1359,35 +1248,30 @@ class ArimaModelInfo(proto.Message): number=1, message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo", ) - seasonal_periods = proto.RepeatedField( proto.ENUM, number=2, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) - index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,) - + index = proto.Field( + proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value, + ) duration_ms = proto.Field( - proto.MESSAGE, number=4, message=wrappers.Int64Value, + proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value, ) - training_loss = proto.Field( - proto.MESSAGE, number=5, message=wrappers.DoubleValue, + proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue, ) - eval_loss = proto.Field( - proto.MESSAGE, number=6, message=wrappers.DoubleValue, + proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue, ) - - learn_rate = proto.Field(proto.DOUBLE, number=7) - + learn_rate = proto.Field(proto.DOUBLE, number=7,) cluster_infos = proto.RepeatedField( proto.MESSAGE, number=8, message="Model.TrainingRun.IterationResult.ClusterInfo", ) - arima_result = proto.Field( proto.MESSAGE, number=9, @@ -1397,57 +1281,41 @@ class ArimaModelInfo(proto.Message): training_options = proto.Field( proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions", ) - - start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,) - + start_time = proto.Field( + proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp, + ) results = proto.RepeatedField( proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult", ) - evaluation_metrics = proto.Field( proto.MESSAGE, number=7, message="Model.EvaluationMetrics", ) - data_split_result = proto.Field( proto.MESSAGE, number=9, message="Model.DataSplitResult", ) - global_explanations = proto.RepeatedField( proto.MESSAGE, number=10, message="Model.GlobalExplanation", ) - etag = proto.Field(proto.STRING, number=1) - + etag = proto.Field(proto.STRING, number=1,) model_reference = proto.Field( proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference, ) - - creation_time = proto.Field(proto.INT64, number=5) - - last_modified_time = proto.Field(proto.INT64, number=6) - - description = proto.Field(proto.STRING, number=12) - - friendly_name = proto.Field(proto.STRING, number=14) - - labels = proto.MapField(proto.STRING, proto.STRING, number=15) - - expiration_time = proto.Field(proto.INT64, number=16) - - location = proto.Field(proto.STRING, number=13) - + creation_time = proto.Field(proto.INT64, number=5,) + last_modified_time = proto.Field(proto.INT64, number=6,) + description = proto.Field(proto.STRING, number=12,) + friendly_name = proto.Field(proto.STRING, number=14,) + labels = proto.MapField(proto.STRING, proto.STRING, number=15,) + expiration_time = proto.Field(proto.INT64, number=16,) + location = proto.Field(proto.STRING, number=13,) encryption_configuration = proto.Field( proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration, ) - model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,) - training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,) - feature_columns = proto.RepeatedField( proto.MESSAGE, number=10, message=standard_sql.StandardSqlField, ) - label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) @@ -1455,7 +1323,6 @@ class ArimaModelInfo(proto.Message): class GetModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the requested model. @@ -1465,16 +1332,13 @@ class GetModelRequest(proto.Message): Required. Model ID of the requested model. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class PatchModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to patch. @@ -1489,18 +1353,14 @@ class PatchModelRequest(proto.Message): set to default value. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) - + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) model = proto.Field(proto.MESSAGE, number=4, message="Model",) class DeleteModelRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the model to delete. @@ -1510,16 +1370,13 @@ class DeleteModelRequest(proto.Message): Required. Model ID of the model to delete. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) class ListModelsRequest(proto.Message): r""" - Attributes: project_id (str): Required. Project ID of the models to list. @@ -1534,18 +1391,16 @@ class ListModelsRequest(proto.Message): request the next page of results """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,) - - page_token = proto.Field(proto.STRING, number=4) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + max_results = proto.Field( + proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value, + ) + page_token = proto.Field(proto.STRING, number=4,) class ListModelsResponse(proto.Message): r""" - Attributes: models (Sequence[google.cloud.bigquery_v2.types.Model]): Models in the requested dataset. Only the following fields @@ -1560,8 +1415,7 @@ def raw_page(self): return self models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",) - - next_page_token = proto.Field(proto.STRING, number=2) + next_page_token = proto.Field(proto.STRING, number=2,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py index e3891d6c1..a9ebad613 100644 --- a/google/cloud/bigquery_v2/types/model_reference.py +++ b/google/cloud/bigquery_v2/types/model_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class ModelReference(proto.Message): r"""Id path of a model. - Attributes: project_id (str): Required. The ID of the project containing @@ -39,11 +36,9 @@ class ModelReference(proto.Message): maximum length is 1,024 characters. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - model_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + model_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index 3bc6afedc..b2191a417 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -63,11 +61,9 @@ class TypeKind(proto.Enum): STRUCT = 17 type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,) - array_element_type = proto.Field( proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType", ) - struct_type = proto.Field( proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType", ) @@ -75,7 +71,6 @@ class TypeKind(proto.Enum): class StandardSqlField(proto.Message): r"""A field or a column. - Attributes: name (str): Optional. The name of this field. Can be @@ -88,14 +83,12 @@ class StandardSqlField(proto.Message): this "type" field). """ - name = proto.Field(proto.STRING, number=1) - + name = proto.Field(proto.STRING, number=1,) type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",) class StandardSqlStructType(proto.Message): r""" - Attributes: fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index d213e8bb6..a0a8ee4c9 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,7 +13,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import proto # type: ignore @@ -25,7 +23,6 @@ class TableReference(proto.Message): r""" - Attributes: project_id (str): Required. The ID of the project containing @@ -41,11 +38,9 @@ class TableReference(proto.Message): as ``sample_table$20190123``. """ - project_id = proto.Field(proto.STRING, number=1) - - dataset_id = proto.Field(proto.STRING, number=2) - - table_id = proto.Field(proto.STRING, number=3) + project_id = proto.Field(proto.STRING, number=1,) + dataset_id = proto.Field(proto.STRING, number=2,) + table_id = proto.Field(proto.STRING, number=3,) __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index f45c24fbb..476c5ee5d 100644 --- a/owlbot.py +++ b/owlbot.py @@ -24,19 +24,32 @@ default_version = "v2" for library in s.get_staging_dirs(default_version): - # Do not expose ModelServiceClient, as there is no public API endpoint for the - # models service. + # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there + # is no public API endpoint for the models service. s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"from \.services\.model_service import ModelServiceClient", "", ) + + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"from \.services\.model_service import ModelServiceAsyncClient", + "", + ) + s.replace( library / f"google/cloud/bigquery_{library.name}/__init__.py", r"""["']ModelServiceClient["'],""", "", ) + s.replace( + library / f"google/cloud/bigquery_{library.name}/__init__.py", + r"""["']ModelServiceAsyncClient["'],""", + "", + ) + # Adjust Model docstring so that Sphinx does not think that "predicted_" is # a reference to something, issuing a false warning. s.replace( diff --git a/tests/__init__.py b/tests/__init__.py index e69de29bb..4de65971c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py index df379f1e9..4de65971c 100644 --- a/tests/unit/__init__.py +++ b/tests/unit/__init__.py @@ -1,4 +1,5 @@ -# Copyright 2016 Google LLC +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,3 +12,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# diff --git a/tests/unit/gapic/__init__.py b/tests/unit/gapic/__init__.py new file mode 100644 index 000000000..4de65971c --- /dev/null +++ b/tests/unit/gapic/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# From 49557da28b3c1fe2210f7d9e44235ef2d2eeb564 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 21 May 2021 00:26:03 +0200 Subject: [PATCH 109/230] chore(deps): update dependency grpcio to v1.38.0 (#674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [grpcio](https://grpc.io) | `==1.37.1` -> `==1.38.0` | [![age](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/compatibility-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/grpcio/1.38.0/confidence-slim/1.37.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2dfee39b5..fcb481f48 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.16.1 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.37.1 +grpcio==1.38.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From 591cdd851bb1321b048a05a378a0ef48d3ade462 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 21 May 2021 11:03:35 +0200 Subject: [PATCH 110/230] fix(tests): invalid path to strptime() (#672) --- samples/snippets/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 0d0299ee5..cb11eb68f 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -33,7 +33,7 @@ def resource_prefix() -> str: def resource_name_to_date(resource_name: str): start_date = len(RESOURCE_PREFIX) + 1 date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.strptime(date_string, RESOURCE_DATE_FORMAT) + return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) @pytest.fixture(scope="session", autouse=True) From 45421e73bfcddb244822e6a5cd43be6bd1ca2256 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Fri, 21 May 2021 10:50:55 -0600 Subject: [PATCH 111/230] feat: Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types (#673) * parse parameterized schema info * Fixed SchemaField repr/key * Fix code duplication between _parse_schema_resource and from_api_repr Move new parameterized-type code from _parse_schema_resource to from_api_repr and implement _parse_schema_resource in terms of from_api_repr. * empty schemas are lists now, just like non-empty schemas. * changed new parameterized-type tests to use from_api_repr Because that's more direct and it uncovered duplicate code. * paramaterized the from_api_repr tests and added to_api_repr tests * Test BYTES and _key (repr) too. * Added a round-trip parameterized types schema tests * handle BYTES in _key/repr * blacken * Move _get_int close to use * Updated documentation. * Oops, forgot BIGNUMERIC * Improve argument doc and better argument name to __get_int * doom tables before creating them. * Use max_length in the Python for the REST api maxLength --- google/cloud/bigquery/schema.py | 71 +++++++++++++----- tests/system/test_client.py | 29 ++++++++ tests/unit/test_query.py | 4 +- tests/unit/test_schema.py | 123 ++++++++++++++++++++++++++++++++ 4 files changed, 209 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index cb221d6de..919d78b23 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -67,6 +67,15 @@ class SchemaField(object): policy_tags (Optional[PolicyTagList]): The policy tag list for the field. + precision (Optional[int]): + Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. + + scale (Optional[int]): + Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. + + max_length (Optional[int]): + Maximim length of fields with STRING or BYTES type. + """ def __init__( @@ -77,6 +86,9 @@ def __init__( description=_DEFAULT_VALUE, fields=(), policy_tags=None, + precision=_DEFAULT_VALUE, + scale=_DEFAULT_VALUE, + max_length=_DEFAULT_VALUE, ): self._properties = { "name": name, @@ -86,9 +98,22 @@ def __init__( self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: self._properties["description"] = description + if precision is not _DEFAULT_VALUE: + self._properties["precision"] = precision + if scale is not _DEFAULT_VALUE: + self._properties["scale"] = scale + if max_length is not _DEFAULT_VALUE: + self._properties["maxLength"] = max_length self._fields = tuple(fields) self._policy_tags = policy_tags + @staticmethod + def __get_int(api_repr, name): + v = api_repr.get(name, _DEFAULT_VALUE) + if v is not _DEFAULT_VALUE: + v = int(v) + return v + @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. @@ -113,6 +138,9 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": description=description, name=api_repr["name"], policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + precision=cls.__get_int(api_repr, "precision"), + scale=cls.__get_int(api_repr, "scale"), + max_length=cls.__get_int(api_repr, "maxLength"), ) @property @@ -148,6 +176,21 @@ def description(self): """Optional[str]: description for the field.""" return self._properties.get("description") + @property + def precision(self): + """Optional[int]: Precision (number of digits) for the NUMERIC field.""" + return self._properties.get("precision") + + @property + def scale(self): + """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" + return self._properties.get("scale") + + @property + def max_length(self): + """Optional[int]: Maximum length for the STRING or BYTES field.""" + return self._properties.get("maxLength") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -191,9 +234,19 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ + field_type = self.field_type.upper() + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" return ( self.name, - self.field_type.upper(), + field_type, # Mode is always str, if not given it defaults to a str value self.mode.upper(), # pytype: disable=attribute-error self.description, @@ -269,21 +322,7 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ - if "fields" not in info: - return () - - schema = [] - for r_field in info["fields"]: - name = r_field["name"] - field_type = r_field["type"] - mode = r_field.get("mode", "NULLABLE") - description = r_field.get("description") - sub_fields = _parse_schema_resource(r_field) - policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags")) - schema.append( - SchemaField(name, field_type, mode, description, sub_fields, policy_tags) - ) - return schema + return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] def _build_schema_resource(fields): diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7c8ef50fa..b4b0c053d 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2173,6 +2173,35 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) + def test_parameterized_types_round_trip(self): + client = Config.CLIENT + table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) + client.query( + "create table {} ({})".format( + table_id, ", ".join(" ".join(f) for f in fields) + ) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 90fc30b20..9483fe8dd 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1302,7 +1302,7 @@ def _verifySchema(self, query, resource): self.assertEqual(found.description, expected.get("description")) self.assertEqual(found.fields, expected.get("fields", ())) else: - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) def test_ctor_defaults(self): query = self._make_one(self._make_resource()) @@ -1312,7 +1312,7 @@ def test_ctor_defaults(self): self.assertIsNone(query.page_token) self.assertEqual(query.project, self.PROJECT) self.assertEqual(query.rows, []) - self.assertEqual(query.schema, ()) + self.assertEqual(query.schema, []) self.assertIsNone(query.total_rows) self.assertIsNone(query.total_bytes_processed) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 87baaf379..29c3bace5 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -15,6 +15,7 @@ import unittest import mock +import pytest class TestSchemaField(unittest.TestCase): @@ -715,3 +716,125 @@ def test___hash__not_equals(self): set_one = {policy1} set_two = {policy2} self.assertNotEqual(set_one, set_two) + + +@pytest.mark.parametrize( + "api,expect,key2", + [ + ( + dict(name="n", type="NUMERIC"), + ("n", "NUMERIC", None, None, None), + ("n", "NUMERIC"), + ), + ( + dict(name="n", type="NUMERIC", precision=9), + ("n", "NUMERIC", 9, None, None), + ("n", "NUMERIC(9)"), + ), + ( + dict(name="n", type="NUMERIC", precision=9, scale=2), + ("n", "NUMERIC", 9, 2, None), + ("n", "NUMERIC(9, 2)"), + ), + ( + dict(name="n", type="BIGNUMERIC"), + ("n", "BIGNUMERIC", None, None, None), + ("n", "BIGNUMERIC"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40), + ("n", "BIGNUMERIC", 40, None, None), + ("n", "BIGNUMERIC(40)"), + ), + ( + dict(name="n", type="BIGNUMERIC", precision=40, scale=2), + ("n", "BIGNUMERIC", 40, 2, None), + ("n", "BIGNUMERIC(40, 2)"), + ), + ( + dict(name="n", type="STRING"), + ("n", "STRING", None, None, None), + ("n", "STRING"), + ), + ( + dict(name="n", type="STRING", maxLength=9), + ("n", "STRING", None, None, 9), + ("n", "STRING(9)"), + ), + ( + dict(name="n", type="BYTES"), + ("n", "BYTES", None, None, None), + ("n", "BYTES"), + ), + ( + dict(name="n", type="BYTES", maxLength=9), + ("n", "BYTES", None, None, 9), + ("n", "BYTES(9)"), + ), + ], +) +def test_from_api_repr_parameterized(api, expect, key2): + from google.cloud.bigquery.schema import SchemaField + + field = SchemaField.from_api_repr(api) + + assert ( + field.name, + field.field_type, + field.precision, + field.scale, + field.max_length, + ) == expect + + assert field._key()[:2] == key2 + + +@pytest.mark.parametrize( + "field,api", + [ + ( + dict(name="n", field_type="NUMERIC"), + dict(name="n", type="NUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + ), + ( + dict(name="n", field_type="NUMERIC", precision=9, scale=2), + dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + ), + ( + dict(name="n", field_type="BIGNUMERIC"), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + ), + ( + dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), + dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + ), + ( + dict(name="n", field_type="STRING"), + dict(name="n", type="STRING", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="STRING", max_length=9), + dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + ), + ( + dict(name="n", field_type="BYTES"), + dict(name="n", type="BYTES", mode="NULLABLE"), + ), + ( + dict(name="n", field_type="BYTES", max_length=9), + dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + ), + ], +) +def test_to_api_repr_parameterized(field, api): + from google.cloud.bigquery.schema import SchemaField + + assert SchemaField(**field).to_api_repr() == api From 168e699dc091521edbd769825ede3b8c3ec9d2d9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Fri, 21 May 2021 11:37:20 -0600 Subject: [PATCH 112/230] chore: release 2.17.0 (#670) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef184dffb..2a0227118 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) + + +### Features + +* detect obsolete BQ Storage extra at runtime ([#666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) +* Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) + + +### Bug Fixes + +* **tests:** invalid path to strptime() ([#672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) + ### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 61e0c0a83..422b383cc 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.16.1" +__version__ = "2.17.0" From eed50b0facee6b71d8334fc70279286d5a9400da Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 22 May 2021 08:42:02 +0200 Subject: [PATCH 113/230] chore(deps): update dependency google-cloud-bigquery to v2.17.0 (#675) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.16.1` -> `==2.17.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/compatibility-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.17.0/confidence-slim/2.16.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.17.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2170-httpswwwgithubcomgoogleapispython-bigquerycomparev2161v2170-2021-05-21) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) ##### Features - detect obsolete BQ Storage extra at runtime ([#​666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880)) - Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#​673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256)) ##### Bug Fixes - **tests:** invalid path to strptime() ([#​672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462)) ##### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12) ##### Bug Fixes - executemany rowcount only reflected the last execution ([#​660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e494fbaae..9fbe85970 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fcb481f48..2ed5b0df8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.16.1 +google-cloud-bigquery==2.17.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From fcda42a72f999d9aa70a5a9f0296027e2868d17a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 22 May 2021 09:18:05 +0000 Subject: [PATCH 114/230] chore: new owl bot post processor docker image (#677) gcr.io/repo-automation-bots/owlbot-python:latest@sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 9 ++++++++- samples/snippets/noxfile.py | 9 ++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 864c17653..46e3f021c 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:4c981a6b6f2b8914a448d7b3a01688365be03e3ed26dfee399a6aa77fb112eaa + digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index be1a3f251..160fe7286 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index be1a3f251..160fe7286 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -48,6 +48,10 @@ # to use your own Cloud project. "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": {}, @@ -172,6 +176,9 @@ def blacken(session: nox.sessions.Session) -> None: def _session_tests( session: nox.sessions.Session, post_install: Callable = None ) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): if os.path.exists("constraints.txt"): @@ -198,7 +205,7 @@ def _session_tests( # on travis where slow and flaky tests are excluded. # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html success_codes=[0, 5], - env=get_pytest_env_vars() + env=get_pytest_env_vars(), ) From 27e4d34a257e2110a3928931c7ea58b2607e85d0 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 24 May 2021 02:16:36 -0400 Subject: [PATCH 115/230] chore: delete unused protos (#676) --- .../bigquery_v2/proto/encryption_config.proto | 32 - .../proto/encryption_config_pb2.py | 104 - .../bigquery_v2/proto/location_metadata.proto | 34 - google/cloud/bigquery_v2/proto/model.proto | 1208 ----- google/cloud/bigquery_v2/proto/model_pb2.py | 4298 ----------------- .../bigquery_v2/proto/model_reference.proto | 38 - .../bigquery_v2/proto/model_reference_pb2.py | 142 - .../bigquery_v2/proto/standard_sql.proto | 112 - .../bigquery_v2/proto/standard_sql_pb2.py | 442 -- .../bigquery_v2/proto/table_reference.proto | 39 - 10 files changed, 6449 deletions(-) delete mode 100644 google/cloud/bigquery_v2/proto/encryption_config.proto delete mode 100644 google/cloud/bigquery_v2/proto/encryption_config_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/location_metadata.proto delete mode 100644 google/cloud/bigquery_v2/proto/model.proto delete mode 100644 google/cloud/bigquery_v2/proto/model_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/model_reference.proto delete mode 100644 google/cloud/bigquery_v2/proto/model_reference_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/standard_sql.proto delete mode 100644 google/cloud/bigquery_v2/proto/standard_sql_pb2.py delete mode 100644 google/cloud/bigquery_v2/proto/table_reference.proto diff --git a/google/cloud/bigquery_v2/proto/encryption_config.proto b/google/cloud/bigquery_v2/proto/encryption_config.proto deleted file mode 100644 index 1c0512a17..000000000 --- a/google/cloud/bigquery_v2/proto/encryption_config.proto +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "EncryptionConfigProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message EncryptionConfiguration { - // Optional. Describes the Cloud KMS encryption key that will be used to - // protect destination BigQuery table. The BigQuery Service Account associated - // with your project requires access to this encryption key. - google.protobuf.StringValue kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; -} diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py deleted file mode 100644 index 5ae21ea6f..000000000 --- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py +++ /dev/null @@ -1,104 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/encryption_config.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/encryption_config.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_ENCRYPTIONCONFIGURATION = _descriptor.Descriptor( - name="EncryptionConfiguration", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="kms_key_name", - full_name="google.cloud.bigquery.v2.EncryptionConfiguration.kms_key_name", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=179, - serialized_end=261, -) - -_ENCRYPTIONCONFIGURATION.fields_by_name[ - "kms_key_name" -].message_type = google_dot_protobuf_dot_wrappers__pb2._STRINGVALUE -DESCRIPTOR.message_types_by_name["EncryptionConfiguration"] = _ENCRYPTIONCONFIGURATION -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -EncryptionConfiguration = _reflection.GeneratedProtocolMessageType( - "EncryptionConfiguration", - (_message.Message,), - { - "DESCRIPTOR": _ENCRYPTIONCONFIGURATION, - "__module__": "google.cloud.bigquery_v2.proto.encryption_config_pb2", - "__doc__": """Encryption configuration. - - Attributes: - kms_key_name: - Optional. Describes the Cloud KMS encryption key that will be - used to protect destination BigQuery table. The BigQuery - Service Account associated with your project requires access - to this encryption key. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration) - }, -) -_sym_db.RegisterMessage(EncryptionConfiguration) - - -DESCRIPTOR._options = None -_ENCRYPTIONCONFIGURATION.fields_by_name["kms_key_name"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/location_metadata.proto b/google/cloud/bigquery_v2/proto/location_metadata.proto deleted file mode 100644 index 95a3133c5..000000000 --- a/google/cloud/bigquery_v2/proto/location_metadata.proto +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2019 Google LLC. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "LocationMetadataProto"; -option java_package = "com.google.cloud.bigquery.v2"; - - -// BigQuery-specific metadata about a location. This will be set on -// google.cloud.location.Location.metadata in Cloud Location API -// responses. -message LocationMetadata { - // The legacy BigQuery location ID, e.g. “EU” for the “europe” location. - // This is for any API consumers that need the legacy “US” and “EU” locations. - string legacy_location_id = 1; -} diff --git a/google/cloud/bigquery_v2/proto/model.proto b/google/cloud/bigquery_v2/proto/model.proto deleted file mode 100644 index 2d400dddd..000000000 --- a/google/cloud/bigquery_v2/proto/model.proto +++ /dev/null @@ -1,1208 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/client.proto"; -import "google/api/field_behavior.proto"; -import "google/cloud/bigquery/v2/encryption_config.proto"; -import "google/cloud/bigquery/v2/model_reference.proto"; -import "google/cloud/bigquery/v2/standard_sql.proto"; -import "google/cloud/bigquery/v2/table_reference.proto"; -import "google/protobuf/empty.proto"; -import "google/protobuf/timestamp.proto"; -import "google/protobuf/wrappers.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -service ModelService { - option (google.api.default_host) = "bigquery.googleapis.com"; - option (google.api.oauth_scopes) = - "https://www.googleapis.com/auth/bigquery," - "https://www.googleapis.com/auth/bigquery.readonly," - "https://www.googleapis.com/auth/cloud-platform," - "https://www.googleapis.com/auth/cloud-platform.read-only"; - - // Gets the specified model resource by model ID. - rpc GetModel(GetModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } - - // Lists all models in the specified dataset. Requires the READER dataset - // role. - rpc ListModels(ListModelsRequest) returns (ListModelsResponse) { - option (google.api.method_signature) = "project_id,dataset_id,max_results"; - } - - // Patch specific fields in the specified model. - rpc PatchModel(PatchModelRequest) returns (Model) { - option (google.api.method_signature) = "project_id,dataset_id,model_id,model"; - } - - // Deletes the model specified by modelId from the dataset. - rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) { - option (google.api.method_signature) = "project_id,dataset_id,model_id"; - } -} - -message Model { - message SeasonalPeriod { - enum SeasonalPeriodType { - SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0; - - // No seasonality - NO_SEASONALITY = 1; - - // Daily period, 24 hours. - DAILY = 2; - - // Weekly period, 7 days. - WEEKLY = 3; - - // Monthly period, 30 days or irregular. - MONTHLY = 4; - - // Quarterly period, 90 days or irregular. - QUARTERLY = 5; - - // Yearly period, 365 days or irregular. - YEARLY = 6; - } - - - } - - message KmeansEnums { - // Indicates the method used to initialize the centroids for KMeans - // clustering algorithm. - enum KmeansInitializationMethod { - KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0; - - // Initializes the centroids randomly. - RANDOM = 1; - - // Initializes the centroids using data specified in - // kmeans_initialization_column. - CUSTOM = 2; - - // Initializes with kmeans++. - KMEANS_PLUS_PLUS = 3; - } - - - } - - // Evaluation metrics for regression and explicit feedback type matrix - // factorization models. - message RegressionMetrics { - // Mean absolute error. - google.protobuf.DoubleValue mean_absolute_error = 1; - - // Mean squared error. - google.protobuf.DoubleValue mean_squared_error = 2; - - // Mean squared log error. - google.protobuf.DoubleValue mean_squared_log_error = 3; - - // Median absolute error. - google.protobuf.DoubleValue median_absolute_error = 4; - - // R^2 score. - google.protobuf.DoubleValue r_squared = 5; - } - - // Aggregate metrics for classification/classifier models. For multi-class - // models, the metrics are either macro-averaged or micro-averaged. When - // macro-averaged, the metrics are calculated for each label and then an - // unweighted average is taken of those values. When micro-averaged, the - // metric is calculated globally by counting the total number of correctly - // predicted rows. - message AggregateClassificationMetrics { - // Precision is the fraction of actual positive predictions that had - // positive actual labels. For multiclass this is a macro-averaged - // metric treating each class as a binary classifier. - google.protobuf.DoubleValue precision = 1; - - // Recall is the fraction of actual positive labels that were given a - // positive prediction. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue recall = 2; - - // Accuracy is the fraction of predictions given the correct label. For - // multiclass this is a micro-averaged metric. - google.protobuf.DoubleValue accuracy = 3; - - // Threshold at which the metrics are computed. For binary - // classification models this is the positive class threshold. - // For multi-class classfication models this is the confidence - // threshold. - google.protobuf.DoubleValue threshold = 4; - - // The F1 score is an average of recall and precision. For multiclass - // this is a macro-averaged metric. - google.protobuf.DoubleValue f1_score = 5; - - // Logarithmic Loss. For multiclass this is a macro-averaged metric. - google.protobuf.DoubleValue log_loss = 6; - - // Area Under a ROC Curve. For multiclass this is a macro-averaged - // metric. - google.protobuf.DoubleValue roc_auc = 7; - } - - // Evaluation metrics for binary classification/classifier models. - message BinaryClassificationMetrics { - // Confusion matrix for binary classification models. - message BinaryConfusionMatrix { - // Threshold value used when computing each of the following metric. - google.protobuf.DoubleValue positive_class_threshold = 1; - - // Number of true samples predicted as true. - google.protobuf.Int64Value true_positives = 2; - - // Number of false samples predicted as true. - google.protobuf.Int64Value false_positives = 3; - - // Number of true samples predicted as false. - google.protobuf.Int64Value true_negatives = 4; - - // Number of false samples predicted as false. - google.protobuf.Int64Value false_negatives = 5; - - // The fraction of actual positive predictions that had positive actual - // labels. - google.protobuf.DoubleValue precision = 6; - - // The fraction of actual positive labels that were given a positive - // prediction. - google.protobuf.DoubleValue recall = 7; - - // The equally weighted average of recall and precision. - google.protobuf.DoubleValue f1_score = 8; - - // The fraction of predictions given the correct label. - google.protobuf.DoubleValue accuracy = 9; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Binary confusion matrix at multiple thresholds. - repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2; - - // Label representing the positive class. - string positive_label = 3; - - // Label representing the negative class. - string negative_label = 4; - } - - // Evaluation metrics for multi-class classification/classifier models. - message MultiClassClassificationMetrics { - // Confusion matrix for multi-class classification models. - message ConfusionMatrix { - // A single entry in the confusion matrix. - message Entry { - // The predicted label. For confidence_threshold > 0, we will - // also add an entry indicating the number of items under the - // confidence threshold. - string predicted_label = 1; - - // Number of items being predicted as this label. - google.protobuf.Int64Value item_count = 2; - } - - // A single row in the confusion matrix. - message Row { - // The original label of this row. - string actual_label = 1; - - // Info describing predicted label distribution. - repeated Entry entries = 2; - } - - // Confidence threshold used when computing the entries of the - // confusion matrix. - google.protobuf.DoubleValue confidence_threshold = 1; - - // One row per actual label. - repeated Row rows = 2; - } - - // Aggregate classification metrics. - AggregateClassificationMetrics aggregate_classification_metrics = 1; - - // Confusion matrix at different thresholds. - repeated ConfusionMatrix confusion_matrix_list = 2; - } - - // Evaluation metrics for clustering models. - message ClusteringMetrics { - // Message containing the information about one cluster. - message Cluster { - // Representative value of a single feature within the cluster. - message FeatureValue { - // Representative value of a categorical feature. - message CategoricalValue { - // Represents the count of a single category within the cluster. - message CategoryCount { - // The name of category. - string category = 1; - - // The count of training samples matching the category within the - // cluster. - google.protobuf.Int64Value count = 2; - } - - // Counts of all categories for the categorical feature. If there are - // more than ten categories, we return top ten (by count) and return - // one more CategoryCount with category "_OTHER_" and count as - // aggregate counts of remaining categories. - repeated CategoryCount category_counts = 1; - } - - // The feature column name. - string feature_column = 1; - - oneof value { - // The numerical feature value. This is the centroid value for this - // feature. - google.protobuf.DoubleValue numerical_value = 2; - - // The categorical feature value. - CategoricalValue categorical_value = 3; - } - } - - // Centroid id. - int64 centroid_id = 1; - - // Values of highly variant features for this cluster. - repeated FeatureValue feature_values = 2; - - // Count of training data rows that were assigned to this cluster. - google.protobuf.Int64Value count = 3; - } - - // Davies-Bouldin index. - google.protobuf.DoubleValue davies_bouldin_index = 1; - - // Mean of squared distances between each sample to its cluster centroid. - google.protobuf.DoubleValue mean_squared_distance = 2; - - // [Beta] Information for all clusters. - repeated Cluster clusters = 3; - } - - // Evaluation metrics used by weighted-ALS models specified by - // feedback_type=implicit. - message RankingMetrics { - // Calculates a precision per user for all the items by ranking them and - // then averages all the precisions across all the users. - google.protobuf.DoubleValue mean_average_precision = 1; - - // Similar to the mean squared error computed in regression and explicit - // recommendation models except instead of computing the rating directly, - // the output from evaluate is computed against a preference which is 1 or 0 - // depending on if the rating exists or not. - google.protobuf.DoubleValue mean_squared_error = 2; - - // A metric to determine the goodness of a ranking calculated from the - // predicted confidence by comparing it to an ideal rank measured by the - // original ratings. - google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3; - - // Determines the goodness of a ranking by computing the percentile rank - // from the predicted confidence and dividing it by the original rank. - google.protobuf.DoubleValue average_rank = 4; - } - - // Model evaluation metrics for ARIMA forecasting models. - message ArimaForecastingMetrics { - // Model evaluation metrics for a single ARIMA forecasting model. - message ArimaSingleModelForecastingMetrics { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 2; - - // Is arima model fitted with drift or not. It is always false when d - // is not 1. - bool has_drift = 3; - - // The id to indicate different time series. - string time_series_id = 4; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5; - } - - // Non-seasonal order. - repeated ArimaOrder non_seasonal_order = 1; - - // Arima model fitting metrics. - repeated ArimaFittingMetrics arima_fitting_metrics = 2; - - // Seasonal periods. Repeated because multiple periods are supported for one - // time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3; - - // Whether Arima model fitted with drift or not. It is always false when d - // is not 1. - repeated bool has_drift = 4; - - // Id to differentiate different time series for the large-scale case. - repeated string time_series_id = 5; - - // Repeated as there can be many metric sets (one for each model) in - // auto-arima and the large-scale case. - repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6; - } - - // Evaluation metrics of a model. These are either computed on all training - // data or just the eval data based on whether eval data was used during - // training. These are not present for imported models. - message EvaluationMetrics { - oneof metrics { - // Populated for regression models and explicit feedback type matrix - // factorization models. - RegressionMetrics regression_metrics = 1; - - // Populated for binary classification/classifier models. - BinaryClassificationMetrics binary_classification_metrics = 2; - - // Populated for multi-class classification/classifier models. - MultiClassClassificationMetrics multi_class_classification_metrics = 3; - - // Populated for clustering models. - ClusteringMetrics clustering_metrics = 4; - - // Populated for implicit feedback type matrix factorization models. - RankingMetrics ranking_metrics = 5; - - // Populated for ARIMA models. - ArimaForecastingMetrics arima_forecasting_metrics = 6; - } - } - - // Data split result. This contains references to the training and evaluation - // data tables that were used to train the model. - message DataSplitResult { - // Table reference of the training data after split. - TableReference training_table = 1; - - // Table reference of the evaluation data after split. - TableReference evaluation_table = 2; - } - - // Arima order, can be used for both non-seasonal and seasonal parts. - message ArimaOrder { - // Order of the autoregressive part. - int64 p = 1; - - // Order of the differencing part. - int64 d = 2; - - // Order of the moving-average part. - int64 q = 3; - } - - // ARIMA model fitting metrics. - message ArimaFittingMetrics { - // Log-likelihood. - double log_likelihood = 1; - - // AIC. - double aic = 2; - - // Variance. - double variance = 3; - } - - // Global explanations containing the top most important features - // after training. - message GlobalExplanation { - // Explanation for a single feature. - message Explanation { - // Full name of the feature. For non-numerical features, will be - // formatted like .. Overall size of - // feature name will always be truncated to first 120 characters. - string feature_name = 1; - - // Attribution of feature. - google.protobuf.DoubleValue attribution = 2; - } - - // A list of the top global explanations. Sorted by absolute value of - // attribution in descending order. - repeated Explanation explanations = 1; - - // Class label for this set of global explanations. Will be empty/null for - // binary logistic and linear regression models. Sorted alphabetically in - // descending order. - string class_label = 2; - } - - // Information about a single training query run for the model. - message TrainingRun { - message TrainingOptions { - // The maximum number of iterations in training. Used only for iterative - // training algorithms. - int64 max_iterations = 1; - - // Type of loss function used during training run. - LossType loss_type = 2; - - // Learning rate in training. Used only for iterative training algorithms. - double learn_rate = 3; - - // L1 regularization coefficient. - google.protobuf.DoubleValue l1_regularization = 4; - - // L2 regularization coefficient. - google.protobuf.DoubleValue l2_regularization = 5; - - // When early_stop is true, stops training when accuracy improvement is - // less than 'min_relative_progress'. Used only for iterative training - // algorithms. - google.protobuf.DoubleValue min_relative_progress = 6; - - // Whether to train a model from the last checkpoint. - google.protobuf.BoolValue warm_start = 7; - - // Whether to stop early when the loss doesn't improve significantly - // any more (compared to min_relative_progress). Used only for iterative - // training algorithms. - google.protobuf.BoolValue early_stop = 8; - - // Name of input label columns in training data. - repeated string input_label_columns = 9; - - // The data split type for training and evaluation, e.g. RANDOM. - DataSplitMethod data_split_method = 10; - - // The fraction of evaluation data over the whole input data. The rest - // of data will be used as training data. The format should be double. - // Accurate to two decimal places. - // Default value is 0.2. - double data_split_eval_fraction = 11; - - // The column to split data with. This column won't be used as a - // feature. - // 1. When data_split_method is CUSTOM, the corresponding column should - // be boolean. The rows with true value tag are eval data, and the false - // are training data. - // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION - // rows (from smallest to largest) in the corresponding column are used - // as training data, and the rest are eval data. It respects the order - // in Orderable data types: - // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties - string data_split_column = 12; - - // The strategy to determine learn rate for the current iteration. - LearnRateStrategy learn_rate_strategy = 13; - - // Specifies the initial learning rate for the line search learn rate - // strategy. - double initial_learn_rate = 16; - - // Weights associated with each label class, for rebalancing the - // training data. Only applicable for classification models. - map label_class_weights = 17; - - // User column specified for matrix factorization models. - string user_column = 18; - - // Item column specified for matrix factorization models. - string item_column = 19; - - // Distance type for clustering models. - DistanceType distance_type = 20; - - // Number of clusters for clustering models. - int64 num_clusters = 21; - - // [Beta] Google Cloud Storage URI from which the model was imported. Only - // applicable for imported models. - string model_uri = 22; - - // Optimization strategy for training linear regression models. - OptimizationStrategy optimization_strategy = 23; - - // Hidden units for dnn models. - repeated int64 hidden_units = 24; - - // Batch size for dnn models. - int64 batch_size = 25; - - // Dropout probability for dnn models. - google.protobuf.DoubleValue dropout = 26; - - // Maximum depth of a tree for boosted tree models. - int64 max_tree_depth = 27; - - // Subsample fraction of the training data to grow tree to prevent - // overfitting for boosted tree models. - double subsample = 28; - - // Minimum split loss for boosted tree models. - google.protobuf.DoubleValue min_split_loss = 29; - - // Num factors specified for matrix factorization models. - int64 num_factors = 30; - - // Feedback type that specifies which algorithm to run for matrix - // factorization. - FeedbackType feedback_type = 31; - - // Hyperparameter for matrix factoration when implicit feedback type is - // specified. - google.protobuf.DoubleValue wals_alpha = 32; - - // The method used to initialize the centroids for kmeans algorithm. - KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33; - - // The column used to provide the initial centroids for kmeans algorithm - // when kmeans_initialization_method is CUSTOM. - string kmeans_initialization_column = 34; - - // Column to be designated as time series timestamp for ARIMA model. - string time_series_timestamp_column = 35; - - // Column to be designated as time series data for ARIMA model. - string time_series_data_column = 36; - - // Whether to enable auto ARIMA or not. - bool auto_arima = 37; - - // A specification of the non-seasonal part of the ARIMA model: the three - // components (p, d, q) are the AR order, the degree of differencing, and - // the MA order. - ArimaOrder non_seasonal_order = 38; - - // The data frequency of a time series. - DataFrequency data_frequency = 39; - - // Include drift when fitting an ARIMA model. - bool include_drift = 41; - - // The geographical region based on which the holidays are considered in - // time series modeling. If a valid value is specified, then holiday - // effects modeling is enabled. - HolidayRegion holiday_region = 42; - - // The id column that will be used to indicate different time series to - // forecast in parallel. - string time_series_id_column = 43; - - // The number of periods ahead that need to be forecasted. - int64 horizon = 44; - - // Whether to preserve the input structs in output feature names. - // Suppose there is a struct A with field b. - // When false (default), the output feature name is A_b. - // When true, the output feature name is A.b. - bool preserve_input_structs = 45; - - // The max value of non-seasonal p and q. - int64 auto_arima_max_order = 46; - } - - // Information about a single iteration of the training run. - message IterationResult { - // Information about a single cluster for clustering model. - message ClusterInfo { - // Centroid id. - int64 centroid_id = 1; - - // Cluster radius, the average distance from centroid - // to each point assigned to the cluster. - google.protobuf.DoubleValue cluster_radius = 2; - - // Cluster size, the total number of points assigned to the cluster. - google.protobuf.Int64Value cluster_size = 3; - } - - // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier - // refactoring if we want to use model-specific iteration results. - message ArimaResult { - // Arima coefficients. - message ArimaCoefficients { - // Auto-regressive coefficients, an array of double. - repeated double auto_regressive_coefficients = 1; - - // Moving-average coefficients, an array of double. - repeated double moving_average_coefficients = 2; - - // Intercept coefficient, just a double not an array. - double intercept_coefficient = 3; - } - - // Arima model information. - message ArimaModelInfo { - // Non-seasonal order. - ArimaOrder non_seasonal_order = 1; - - // Arima coefficients. - ArimaCoefficients arima_coefficients = 2; - - // Arima fitting metrics. - ArimaFittingMetrics arima_fitting_metrics = 3; - - // Whether Arima model fitted with drift or not. It is always false - // when d is not 1. - bool has_drift = 4; - - // The id to indicate different time series. - string time_series_id = 5; - - // Seasonal periods. Repeated because multiple periods are supported - // for one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6; - } - - // This message is repeated because there are multiple arima models - // fitted in auto-arima. For non-auto-arima model, its size is one. - repeated ArimaModelInfo arima_model_info = 1; - - // Seasonal periods. Repeated because multiple periods are supported for - // one time series. - repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2; - } - - // Index of the iteration, 0 based. - google.protobuf.Int32Value index = 1; - - // Time taken to run the iteration in milliseconds. - google.protobuf.Int64Value duration_ms = 4; - - // Loss computed on the training data at the end of iteration. - google.protobuf.DoubleValue training_loss = 5; - - // Loss computed on the eval data at the end of iteration. - google.protobuf.DoubleValue eval_loss = 6; - - // Learn rate used for this iteration. - double learn_rate = 7; - - // Information about top clusters for clustering models. - repeated ClusterInfo cluster_infos = 8; - - ArimaResult arima_result = 9; - } - - // Options that were used for this training run, includes - // user specified and default options that were used. - TrainingOptions training_options = 1; - - // The start time of this training run. - google.protobuf.Timestamp start_time = 8; - - // Output of each iteration run, results.size() <= max_iterations. - repeated IterationResult results = 6; - - // The evaluation metrics over training/eval data that were computed at the - // end of training. - EvaluationMetrics evaluation_metrics = 7; - - // Data split result of the training run. Only set when the input data is - // actually split. - DataSplitResult data_split_result = 9; - - // Global explanations for important features of the model. For multi-class - // models, there is one entry for each label class. For other models, there - // is only one entry in the list. - repeated GlobalExplanation global_explanations = 10; - } - - // Indicates the type of the Model. - enum ModelType { - MODEL_TYPE_UNSPECIFIED = 0; - - // Linear regression model. - LINEAR_REGRESSION = 1; - - // Logistic regression based classification model. - LOGISTIC_REGRESSION = 2; - - // K-means clustering model. - KMEANS = 3; - - // Matrix factorization model. - MATRIX_FACTORIZATION = 4; - - // [Beta] DNN classifier model. - DNN_CLASSIFIER = 5; - - // [Beta] An imported TensorFlow model. - TENSORFLOW = 6; - - // [Beta] DNN regressor model. - DNN_REGRESSOR = 7; - - // [Beta] Boosted tree regressor model. - BOOSTED_TREE_REGRESSOR = 9; - - // [Beta] Boosted tree classifier model. - BOOSTED_TREE_CLASSIFIER = 10; - - // [Beta] ARIMA model. - ARIMA = 11; - - // [Beta] AutoML Tables regression model. - AUTOML_REGRESSOR = 12; - - // [Beta] AutoML Tables classification model. - AUTOML_CLASSIFIER = 13; - } - - // Loss metric to evaluate model training performance. - enum LossType { - LOSS_TYPE_UNSPECIFIED = 0; - - // Mean squared loss, used for linear regression. - MEAN_SQUARED_LOSS = 1; - - // Mean log loss, used for logistic regression. - MEAN_LOG_LOSS = 2; - } - - // Distance metric used to compute the distance between two points. - enum DistanceType { - DISTANCE_TYPE_UNSPECIFIED = 0; - - // Eculidean distance. - EUCLIDEAN = 1; - - // Cosine distance. - COSINE = 2; - } - - // Indicates the method to split input data into multiple tables. - enum DataSplitMethod { - DATA_SPLIT_METHOD_UNSPECIFIED = 0; - - // Splits data randomly. - RANDOM = 1; - - // Splits data with the user provided tags. - CUSTOM = 2; - - // Splits data sequentially. - SEQUENTIAL = 3; - - // Data split will be skipped. - NO_SPLIT = 4; - - // Splits data automatically: Uses NO_SPLIT if the data size is small. - // Otherwise uses RANDOM. - AUTO_SPLIT = 5; - } - - // Type of supported data frequency for time series forecasting models. - enum DataFrequency { - DATA_FREQUENCY_UNSPECIFIED = 0; - - // Automatically inferred from timestamps. - AUTO_FREQUENCY = 1; - - // Yearly data. - YEARLY = 2; - - // Quarterly data. - QUARTERLY = 3; - - // Monthly data. - MONTHLY = 4; - - // Weekly data. - WEEKLY = 5; - - // Daily data. - DAILY = 6; - - // Hourly data. - HOURLY = 7; - } - - // Type of supported holiday regions for time series forecasting models. - enum HolidayRegion { - // Holiday region unspecified. - HOLIDAY_REGION_UNSPECIFIED = 0; - - // Global. - GLOBAL = 1; - - // North America. - NA = 2; - - // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New - // Zealand. - JAPAC = 3; - - // Europe, the Middle East and Africa. - EMEA = 4; - - // Latin America and the Caribbean. - LAC = 5; - - // United Arab Emirates - AE = 6; - - // Argentina - AR = 7; - - // Austria - AT = 8; - - // Australia - AU = 9; - - // Belgium - BE = 10; - - // Brazil - BR = 11; - - // Canada - CA = 12; - - // Switzerland - CH = 13; - - // Chile - CL = 14; - - // China - CN = 15; - - // Colombia - CO = 16; - - // Czechoslovakia - CS = 17; - - // Czech Republic - CZ = 18; - - // Germany - DE = 19; - - // Denmark - DK = 20; - - // Algeria - DZ = 21; - - // Ecuador - EC = 22; - - // Estonia - EE = 23; - - // Egypt - EG = 24; - - // Spain - ES = 25; - - // Finland - FI = 26; - - // France - FR = 27; - - // Great Britain (United Kingdom) - GB = 28; - - // Greece - GR = 29; - - // Hong Kong - HK = 30; - - // Hungary - HU = 31; - - // Indonesia - ID = 32; - - // Ireland - IE = 33; - - // Israel - IL = 34; - - // India - IN = 35; - - // Iran - IR = 36; - - // Italy - IT = 37; - - // Japan - JP = 38; - - // Korea (South) - KR = 39; - - // Latvia - LV = 40; - - // Morocco - MA = 41; - - // Mexico - MX = 42; - - // Malaysia - MY = 43; - - // Nigeria - NG = 44; - - // Netherlands - NL = 45; - - // Norway - NO = 46; - - // New Zealand - NZ = 47; - - // Peru - PE = 48; - - // Philippines - PH = 49; - - // Pakistan - PK = 50; - - // Poland - PL = 51; - - // Portugal - PT = 52; - - // Romania - RO = 53; - - // Serbia - RS = 54; - - // Russian Federation - RU = 55; - - // Saudi Arabia - SA = 56; - - // Sweden - SE = 57; - - // Singapore - SG = 58; - - // Slovenia - SI = 59; - - // Slovakia - SK = 60; - - // Thailand - TH = 61; - - // Turkey - TR = 62; - - // Taiwan - TW = 63; - - // Ukraine - UA = 64; - - // United States - US = 65; - - // Venezuela - VE = 66; - - // Viet Nam - VN = 67; - - // South Africa - ZA = 68; - } - - // Indicates the learning rate optimization strategy to use. - enum LearnRateStrategy { - LEARN_RATE_STRATEGY_UNSPECIFIED = 0; - - // Use line search to determine learning rate. - LINE_SEARCH = 1; - - // Use a constant learning rate. - CONSTANT = 2; - } - - // Indicates the optimization strategy used for training. - enum OptimizationStrategy { - OPTIMIZATION_STRATEGY_UNSPECIFIED = 0; - - // Uses an iterative batch gradient descent algorithm. - BATCH_GRADIENT_DESCENT = 1; - - // Uses a normal equation to solve linear regression problem. - NORMAL_EQUATION = 2; - } - - // Indicates the training algorithm to use for matrix factorization models. - enum FeedbackType { - FEEDBACK_TYPE_UNSPECIFIED = 0; - - // Use weighted-als for implicit feedback problems. - IMPLICIT = 1; - - // Use nonweighted-als for explicit feedback problems. - EXPLICIT = 2; - } - - // Output only. A hash of this resource. - string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Required. Unique identifier for this model. - ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED]; - - // Output only. The time when this model was created, in millisecs since the epoch. - int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. The time when this model was last modified, in millisecs since the epoch. - int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Optional. A user-friendly description of this model. - string description = 12 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. A descriptive name for this model. - string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL]; - - // The labels associated with this model. You can use these to organize - // and group your models. Label keys and values can be no longer - // than 63 characters, can only contain lowercase letters, numeric - // characters, underscores and dashes. International characters are allowed. - // Label values are optional. Label keys must start with a letter and each - // label in the list must have a different key. - map labels = 15; - - // Optional. The time when this model expires, in milliseconds since the epoch. - // If not present, the model will persist indefinitely. Expired models - // will be deleted and their storage reclaimed. The defaultTableExpirationMs - // property of the encapsulating dataset can be used to set a default - // expirationTime on newly created models. - int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL]; - - // Output only. The geographic location where the model resides. This value - // is inherited from the dataset. - string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Custom encryption configuration (e.g., Cloud KMS keys). This shows the - // encryption configuration of the model data while stored in BigQuery - // storage. This field can be used with PatchModel to update encryption key - // for an already encrypted model. - EncryptionConfiguration encryption_configuration = 17; - - // Output only. Type of the model resource. - ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Information for all training runs in increasing order of start_time. - repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Input feature columns that were used to train this model. - repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; - - // Output only. Label columns that were used to train this model. - // The output of the model will have a "predicted_" prefix to these columns. - repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; -} - -message GetModelRequest { - // Required. Project ID of the requested model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the requested model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the requested model. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message PatchModelRequest { - // Required. Project ID of the model to patch. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to patch. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to patch. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; - - // Required. Patched model. - // Follows RFC5789 patch semantics. Missing fields are not updated. - // To clear a field, explicitly set to default value. - Model model = 4 [(google.api.field_behavior) = REQUIRED]; -} - -message DeleteModelRequest { - // Required. Project ID of the model to delete. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the model to delete. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. Model ID of the model to delete. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} - -message ListModelsRequest { - // Required. Project ID of the models to list. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. Dataset ID of the models to list. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // The maximum number of results to return in a single response page. - // Leverage the page tokens to iterate through the entire collection. - google.protobuf.UInt32Value max_results = 3; - - // Page token, returned by a previous call to request the next page of - // results - string page_token = 4; -} - -message ListModelsResponse { - // Models in the requested dataset. Only the following fields are populated: - // model_reference, model_type, creation_time, last_modified_time and - // labels. - repeated Model models = 1; - - // A token to request the next page of results. - string next_page_token = 2; -} diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py deleted file mode 100644 index 7b66be8f7..000000000 --- a/google/cloud/bigquery_v2/proto/model_pb2.py +++ /dev/null @@ -1,4298 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import client_pb2 as google_dot_api_dot_client__pb2 -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.cloud.bigquery_v2.proto import ( - encryption_config_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2, -) -from google.cloud.bigquery_v2.proto import ( - model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2, -) -from google.cloud.bigquery_v2.proto import ( - standard_sql_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2, -) -from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2 -from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_client__pb2.DESCRIPTOR, - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR, - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR, - google_dot_protobuf_dot_empty__pb2.DESCRIPTOR, - google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR, - google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD = _descriptor.EnumDescriptor( - name="KmeansInitializationMethod", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=1132, - serialized_end=1230, -) -_sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD) - -_MODEL_MODELTYPE = _descriptor.EnumDescriptor( - name="ModelType", - full_name="google.cloud.bigquery.v2.Model.ModelType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="MODEL_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINEAR_REGRESSION", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LOGISTIC_REGRESSION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="KMEANS", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TENSORFLOW", - index=4, - number=6, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6632, - serialized_end=6747, -) -_sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE) - -_MODEL_LOSSTYPE = _descriptor.EnumDescriptor( - name="LossType", - full_name="google.cloud.bigquery.v2.Model.LossType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LOSS_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_SQUARED_LOSS", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="MEAN_LOG_LOSS", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6749, - serialized_end=6828, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE) - -_MODEL_DISTANCETYPE = _descriptor.EnumDescriptor( - name="DistanceType", - full_name="google.cloud.bigquery.v2.Model.DistanceType", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DISTANCE_TYPE_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="EUCLIDEAN", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="COSINE", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6830, - serialized_end=6902, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE) - -_MODEL_DATASPLITMETHOD = _descriptor.EnumDescriptor( - name="DataSplitMethod", - full_name="google.cloud.bigquery.v2.Model.DataSplitMethod", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="DATA_SPLIT_METHOD_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="RANDOM", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CUSTOM", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="SEQUENTIAL", - index=3, - number=3, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NO_SPLIT", - index=4, - number=4, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="AUTO_SPLIT", - index=5, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=6904, - serialized_end=7026, -) -_sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD) - -_MODEL_LEARNRATESTRATEGY = _descriptor.EnumDescriptor( - name="LearnRateStrategy", - full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="LEARN_RATE_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="LINE_SEARCH", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="CONSTANT", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7028, - serialized_end=7115, -) -_sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY) - -_MODEL_OPTIMIZATIONSTRATEGY = _descriptor.EnumDescriptor( - name="OptimizationStrategy", - full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="OPTIMIZATION_STRATEGY_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BATCH_GRADIENT_DESCENT", - index=1, - number=1, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NORMAL_EQUATION", - index=2, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=7117, - serialized_end=7227, -) -_sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY) - - -_MODEL_KMEANSENUMS = _descriptor.Descriptor( - name="KmeansEnums", - full_name="google.cloud.bigquery.v2.Model.KmeansEnums", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - extensions=[], - nested_types=[], - enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1117, - serialized_end=1230, -) - -_MODEL_REGRESSIONMETRICS = _descriptor.Descriptor( - name="RegressionMetrics", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="mean_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_absolute_error", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_error", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_log_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_log_error", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="median_absolute_error", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.median_absolute_error", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="r_squared", - full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.r_squared", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1233, - serialized_end=1541, -) - -_MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="AggregateClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.precision", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.recall", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.accuracy", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="threshold", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.threshold", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.f1_score", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="log_loss", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.log_loss", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="roc_auc", - full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.roc_auc", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1544, - serialized_end=1911, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor( - name="BinaryConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="positive_class_threshold", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.positive_class_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_positives", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_positives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_positives", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="true_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_negatives", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="false_negatives", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_negatives", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="precision", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.precision", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="recall", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.recall", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="f1_score", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.f1_score", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="accuracy", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.accuracy", - index=8, - number=9, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2221, - serialized_end=2713, -) - -_MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="BinaryClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.binary_confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="positive_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.positive_label", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="negative_label", - full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.negative_label", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=1914, - serialized_end=2713, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor( - name="Entry", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="predicted_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.predicted_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="item_count", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.item_count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3148, - serialized_end=3229, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor( - name="Row", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="actual_label", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.actual_label", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="entries", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.entries", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3232, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor( - name="ConfusionMatrix", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="confidence_threshold", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.confidence_threshold", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="rows", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.rows", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2970, - serialized_end=3363, -) - -_MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor( - name="MultiClassClassificationMetrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="aggregate_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.aggregate_classification_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="confusion_matrix_list", - full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.confusion_matrix_list", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=2716, - serialized_end=3363, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor( - name="CategoryCount", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.category", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.count", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4123, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor( - name="CategoricalValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="category_counts", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.category_counts", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3975, - serialized_end=4200, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor( - name="FeatureValue", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="feature_column", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.feature_column", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="numerical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.numerical_value", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="categorical_value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.categorical_value", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=3759, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor( - name="Cluster", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_values", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.feature_values", - index=1, - number=2, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="count", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.count", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3586, - serialized_end=4209, -) - -_MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor( - name="ClusteringMetrics", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="davies_bouldin_index", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.davies_bouldin_index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="mean_squared_distance", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.mean_squared_distance", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clusters", - full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.clusters", - index=2, - number=3, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=3366, - serialized_end=4209, -) - -_MODEL_EVALUATIONMETRICS = _descriptor.Descriptor( - name="EvaluationMetrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="regression_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.regression_metrics", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="binary_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.binary_classification_metrics", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="multi_class_classification_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.multi_class_classification_metrics", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="clustering_metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.clustering_metrics", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="metrics", - full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=4212, - serialized_end=4617, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor( - name="LabelClassWeightsEntry", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.value", - index=1, - number=2, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6053, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor( - name="TrainingOptions", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="max_iterations", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.max_iterations", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="loss_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.loss_type", - index=1, - number=2, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate", - index=2, - number=3, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l1_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l1_regularization", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="l2_regularization", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l2_regularization", - index=4, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="min_relative_progress", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.min_relative_progress", - index=5, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="warm_start", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.warm_start", - index=6, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="early_stop", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.early_stop", - index=7, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="input_label_columns", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.input_label_columns", - index=8, - number=9, - type=9, - cpp_type=9, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_method", - index=9, - number=10, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_eval_fraction", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_eval_fraction", - index=10, - number=11, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="data_split_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_column", - index=11, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate_strategy", - index=12, - number=13, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="initial_learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.initial_learn_rate", - index=13, - number=16, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_class_weights", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.label_class_weights", - index=14, - number=17, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="distance_type", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.distance_type", - index=15, - number=20, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="num_clusters", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.num_clusters", - index=16, - number=21, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_uri", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.model_uri", - index=17, - number=22, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="optimization_strategy", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.optimization_strategy", - index=18, - number=23, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_method", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_method", - index=19, - number=33, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="kmeans_initialization_column", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_column", - index=20, - number=34, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4928, - serialized_end=6109, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor( - name="ClusterInfo", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="centroid_id", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.centroid_id", - index=0, - number=1, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_radius", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_radius", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_size", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_size", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6444, - serialized_end=6583, -) - -_MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor( - name="IterationResult", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="index", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.index", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="duration_ms", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.duration_ms", - index=1, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.training_loss", - index=2, - number=5, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="eval_loss", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.eval_loss", - index=3, - number=6, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="learn_rate", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.learn_rate", - index=4, - number=7, - type=1, - cpp_type=5, - label=1, - has_default_value=False, - default_value=float(0), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="cluster_infos", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.cluster_infos", - index=5, - number=8, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6112, - serialized_end=6583, -) - -_MODEL_TRAININGRUN = _descriptor.Descriptor( - name="TrainingRun", - full_name="google.cloud.bigquery.v2.Model.TrainingRun", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="training_options", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.training_options", - index=0, - number=1, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="start_time", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.start_time", - index=1, - number=8, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="results", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.results", - index=2, - number=6, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="evaluation_metrics", - full_name="google.cloud.bigquery.v2.Model.TrainingRun.evaluation_metrics", - index=3, - number=7, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_TRAININGRUN_TRAININGOPTIONS, - _MODEL_TRAININGRUN_ITERATIONRESULT, - ], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=4620, - serialized_end=6583, -) - -_MODEL_LABELSENTRY = _descriptor.Descriptor( - name="LabelsEntry", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="key", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.key", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="value", - full_name="google.cloud.bigquery.v2.Model.LabelsEntry.value", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=b"8\001", - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=6585, - serialized_end=6630, -) - -_MODEL = _descriptor.Descriptor( - name="Model", - full_name="google.cloud.bigquery.v2.Model", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="etag", - full_name="google.cloud.bigquery.v2.Model.etag", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_reference", - full_name="google.cloud.bigquery.v2.Model.model_reference", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="creation_time", - full_name="google.cloud.bigquery.v2.Model.creation_time", - index=2, - number=5, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="last_modified_time", - full_name="google.cloud.bigquery.v2.Model.last_modified_time", - index=3, - number=6, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="description", - full_name="google.cloud.bigquery.v2.Model.description", - index=4, - number=12, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="friendly_name", - full_name="google.cloud.bigquery.v2.Model.friendly_name", - index=5, - number=14, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="labels", - full_name="google.cloud.bigquery.v2.Model.labels", - index=6, - number=15, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="expiration_time", - full_name="google.cloud.bigquery.v2.Model.expiration_time", - index=7, - number=16, - type=3, - cpp_type=2, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="location", - full_name="google.cloud.bigquery.v2.Model.location", - index=8, - number=13, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="encryption_configuration", - full_name="google.cloud.bigquery.v2.Model.encryption_configuration", - index=9, - number=17, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_type", - full_name="google.cloud.bigquery.v2.Model.model_type", - index=10, - number=7, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="training_runs", - full_name="google.cloud.bigquery.v2.Model.training_runs", - index=11, - number=9, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="feature_columns", - full_name="google.cloud.bigquery.v2.Model.feature_columns", - index=12, - number=10, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="label_columns", - full_name="google.cloud.bigquery.v2.Model.label_columns", - index=13, - number=11, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\003", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[ - _MODEL_KMEANSENUMS, - _MODEL_REGRESSIONMETRICS, - _MODEL_AGGREGATECLASSIFICATIONMETRICS, - _MODEL_BINARYCLASSIFICATIONMETRICS, - _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - _MODEL_CLUSTERINGMETRICS, - _MODEL_EVALUATIONMETRICS, - _MODEL_TRAININGRUN, - _MODEL_LABELSENTRY, - ], - enum_types=[ - _MODEL_MODELTYPE, - _MODEL_LOSSTYPE, - _MODEL_DISTANCETYPE, - _MODEL_DATASPLITMETHOD, - _MODEL_LEARNRATESTRATEGY, - _MODEL_OPTIMIZATIONSTRATEGY, - ], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=416, - serialized_end=7227, -) - - -_GETMODELREQUEST = _descriptor.Descriptor( - name="GetModelRequest", - full_name="google.cloud.bigquery.v2.GetModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.GetModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7229, - serialized_end=7319, -) - - -_PATCHMODELREQUEST = _descriptor.Descriptor( - name="PatchModelRequest", - full_name="google.cloud.bigquery.v2.PatchModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model", - full_name="google.cloud.bigquery.v2.PatchModelRequest.model", - index=3, - number=4, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7322, - serialized_end=7467, -) - - -_DELETEMODELREQUEST = _descriptor.Descriptor( - name="DeleteModelRequest", - full_name="google.cloud.bigquery.v2.DeleteModelRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.DeleteModelRequest.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7469, - serialized_end=7562, -) - - -_LISTMODELSREQUEST = _descriptor.Descriptor( - name="ListModelsRequest", - full_name="google.cloud.bigquery.v2.ListModelsRequest", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ListModelsRequest.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="max_results", - full_name="google.cloud.bigquery.v2.ListModelsRequest.max_results", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="page_token", - full_name="google.cloud.bigquery.v2.ListModelsRequest.page_token", - index=3, - number=4, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7565, - serialized_end=7705, -) - - -_LISTMODELSRESPONSE = _descriptor.Descriptor( - name="ListModelsResponse", - full_name="google.cloud.bigquery.v2.ListModelsResponse", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="models", - full_name="google.cloud.bigquery.v2.ListModelsResponse.models", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="next_page_token", - full_name="google.cloud.bigquery.v2.ListModelsResponse.next_page_token", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=7707, - serialized_end=7801, -) - -_MODEL_KMEANSENUMS.containing_type = _MODEL -_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD.containing_type = _MODEL_KMEANSENUMS -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "mean_squared_log_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "median_absolute_error" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.fields_by_name[ - "r_squared" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_REGRESSIONMETRICS.containing_type = _MODEL -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "log_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[ - "roc_auc" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_AGGREGATECLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "positive_class_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_positives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "true_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "false_negatives" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "precision" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "recall" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "f1_score" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[ - "accuracy" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = ( - _MODEL_BINARYCLASSIFICATIONMETRICS -) -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[ - "binary_confusion_matrix_list" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX -_MODEL_BINARYCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.fields_by_name[ - "item_count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.fields_by_name[ - "entries" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "confidence_threshold" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[ - "rows" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW -_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.containing_type = ( - _MODEL_MULTICLASSCLASSIFICATIONMETRICS -) -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "aggregate_classification_metrics" -].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[ - "confusion_matrix_list" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX -_MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.fields_by_name[ - "category_counts" -].message_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.containing_type = ( - _MODEL_CLUSTERINGMETRICS_CLUSTER -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["numerical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "numerical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append( - _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["categorical_value"] -) -_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[ - "categorical_value" -].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[ - "value" -] -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "feature_values" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[ - "count" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_CLUSTERINGMETRICS_CLUSTER.containing_type = _MODEL_CLUSTERINGMETRICS -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "davies_bouldin_index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "mean_squared_distance" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_CLUSTERINGMETRICS.fields_by_name[ - "clusters" -].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER -_MODEL_CLUSTERINGMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].message_type = _MODEL_REGRESSIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].message_type = _MODEL_CLUSTERINGMETRICS -_MODEL_EVALUATIONMETRICS.containing_type = _MODEL -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["regression_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "regression_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["binary_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "binary_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["multi_class_classification_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "multi_class_classification_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append( - _MODEL_EVALUATIONMETRICS.fields_by_name["clustering_metrics"] -) -_MODEL_EVALUATIONMETRICS.fields_by_name[ - "clustering_metrics" -].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"] -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY.containing_type = ( - _MODEL_TRAININGRUN_TRAININGOPTIONS -) -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "loss_type" -].enum_type = _MODEL_LOSSTYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l1_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "l2_regularization" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "min_relative_progress" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "warm_start" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "early_stop" -].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "data_split_method" -].enum_type = _MODEL_DATASPLITMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "learn_rate_strategy" -].enum_type = _MODEL_LEARNRATESTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "label_class_weights" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "distance_type" -].enum_type = _MODEL_DISTANCETYPE -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "optimization_strategy" -].enum_type = _MODEL_OPTIMIZATIONSTRATEGY -_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[ - "kmeans_initialization_method" -].enum_type = _MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD -_MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_radius" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[ - "cluster_size" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.containing_type = ( - _MODEL_TRAININGRUN_ITERATIONRESULT -) -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "index" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT32VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "duration_ms" -].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "training_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "eval_loss" -].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE -_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[ - "cluster_infos" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO -_MODEL_TRAININGRUN_ITERATIONRESULT.containing_type = _MODEL_TRAININGRUN -_MODEL_TRAININGRUN.fields_by_name[ - "training_options" -].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS -_MODEL_TRAININGRUN.fields_by_name[ - "start_time" -].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP -_MODEL_TRAININGRUN.fields_by_name[ - "results" -].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT -_MODEL_TRAININGRUN.fields_by_name[ - "evaluation_metrics" -].message_type = _MODEL_EVALUATIONMETRICS -_MODEL_TRAININGRUN.containing_type = _MODEL -_MODEL_LABELSENTRY.containing_type = _MODEL -_MODEL.fields_by_name[ - "model_reference" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE -) -_MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY -_MODEL.fields_by_name[ - "encryption_configuration" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2._ENCRYPTIONCONFIGURATION -) -_MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE -_MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN -_MODEL.fields_by_name[ - "feature_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL.fields_by_name[ - "label_columns" -].message_type = ( - google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD -) -_MODEL_MODELTYPE.containing_type = _MODEL -_MODEL_LOSSTYPE.containing_type = _MODEL -_MODEL_DISTANCETYPE.containing_type = _MODEL -_MODEL_DATASPLITMETHOD.containing_type = _MODEL -_MODEL_LEARNRATESTRATEGY.containing_type = _MODEL -_MODEL_OPTIMIZATIONSTRATEGY.containing_type = _MODEL -_PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL -_LISTMODELSREQUEST.fields_by_name[ - "max_results" -].message_type = google_dot_protobuf_dot_wrappers__pb2._UINT32VALUE -_LISTMODELSRESPONSE.fields_by_name["models"].message_type = _MODEL -DESCRIPTOR.message_types_by_name["Model"] = _MODEL -DESCRIPTOR.message_types_by_name["GetModelRequest"] = _GETMODELREQUEST -DESCRIPTOR.message_types_by_name["PatchModelRequest"] = _PATCHMODELREQUEST -DESCRIPTOR.message_types_by_name["DeleteModelRequest"] = _DELETEMODELREQUEST -DESCRIPTOR.message_types_by_name["ListModelsRequest"] = _LISTMODELSREQUEST -DESCRIPTOR.message_types_by_name["ListModelsResponse"] = _LISTMODELSRESPONSE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -Model = _reflection.GeneratedProtocolMessageType( - "Model", - (_message.Message,), - { - "KmeansEnums": _reflection.GeneratedProtocolMessageType( - "KmeansEnums", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_KMEANSENUMS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums) - }, - ), - "RegressionMetrics": _reflection.GeneratedProtocolMessageType( - "RegressionMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_REGRESSIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for regression and explicit feedback type matrix - factorization models. - - Attributes: - mean_absolute_error: - Mean absolute error. - mean_squared_error: - Mean squared error. - mean_squared_log_error: - Mean squared log error. - median_absolute_error: - Median absolute error. - r_squared: - R^2 score. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics) - }, - ), - "AggregateClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "AggregateClassificationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_AGGREGATECLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Aggregate metrics for classification/classifier models. For multi- - class models, the metrics are either macro-averaged or micro-averaged. - When macro-averaged, the metrics are calculated for each label and - then an unweighted average is taken of those values. When micro- - averaged, the metric is calculated globally by counting the total - number of correctly predicted rows. - - Attributes: - precision: - Precision is the fraction of actual positive predictions that - had positive actual labels. For multiclass this is a macro- - averaged metric treating each class as a binary classifier. - recall: - Recall is the fraction of actual positive labels that were - given a positive prediction. For multiclass this is a macro- - averaged metric. - accuracy: - Accuracy is the fraction of predictions given the correct - label. For multiclass this is a micro-averaged metric. - threshold: - Threshold at which the metrics are computed. For binary - classification models this is the positive class threshold. - For multi-class classfication models this is the confidence - threshold. - f1_score: - The F1 score is an average of recall and precision. For - multiclass this is a macro-averaged metric. - log_loss: - Logarithmic Loss. For multiclass this is a macro-averaged - metric. - roc_auc: - Area Under a ROC Curve. For multiclass this is a macro- - averaged metric. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics) - }, - ), - "BinaryClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "BinaryClassificationMetrics", - (_message.Message,), - { - "BinaryConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "BinaryConfusionMatrix", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for binary classification models. - - Attributes: - positive_class_threshold: - Threshold value used when computing each of the following - metric. - true_positives: - Number of true samples predicted as true. - false_positives: - Number of false samples predicted as true. - true_negatives: - Number of true samples predicted as false. - false_negatives: - Number of false samples predicted as false. - precision: - The fraction of actual positive predictions that had positive - actual labels. - recall: - The fraction of actual positive labels that were given a - positive prediction. - f1_score: - The equally weighted average of recall and precision. - accuracy: - The fraction of predictions given the correct label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for binary classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - binary_confusion_matrix_list: - Binary confusion matrix at multiple thresholds. - positive_label: - Label representing the positive class. - negative_label: - Label representing the negative class. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics) - }, - ), - "MultiClassClassificationMetrics": _reflection.GeneratedProtocolMessageType( - "MultiClassClassificationMetrics", - (_message.Message,), - { - "ConfusionMatrix": _reflection.GeneratedProtocolMessageType( - "ConfusionMatrix", - (_message.Message,), - { - "Entry": _reflection.GeneratedProtocolMessageType( - "Entry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single entry in the confusion matrix. - - Attributes: - predicted_label: - The predicted label. For confidence_threshold > 0, we will - also add an entry indicating the number of items under the - confidence threshold. - item_count: - Number of items being predicted as this label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) - }, - ), - "Row": _reflection.GeneratedProtocolMessageType( - "Row", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """A single row in the confusion matrix. - - Attributes: - actual_label: - The original label of this row. - entries: - Info describing predicted label distribution. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Confusion matrix for multi-class classification models. - - Attributes: - confidence_threshold: - Confidence threshold used when computing the entries of the - confusion matrix. - rows: - One row per actual label. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix) - }, - ), - "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for multi-class classification/classifier models. - - Attributes: - aggregate_classification_metrics: - Aggregate classification metrics. - confusion_matrix_list: - Confusion matrix at different thresholds. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics) - }, - ), - "ClusteringMetrics": _reflection.GeneratedProtocolMessageType( - "ClusteringMetrics", - (_message.Message,), - { - "Cluster": _reflection.GeneratedProtocolMessageType( - "Cluster", - (_message.Message,), - { - "FeatureValue": _reflection.GeneratedProtocolMessageType( - "FeatureValue", - (_message.Message,), - { - "CategoricalValue": _reflection.GeneratedProtocolMessageType( - "CategoricalValue", - (_message.Message,), - { - "CategoryCount": _reflection.GeneratedProtocolMessageType( - "CategoryCount", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Represents the count of a single category within the cluster. - - Attributes: - category: - The name of category. - count: - The count of training samples matching the category within the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a categorical feature. - - Attributes: - category_counts: - Counts of all categories for the categorical feature. If there - are more than ten categories, we return top ten (by count) and - return one more CategoryCount with category ``*OTHER*`` and - count as aggregate counts of remaining categories. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Representative value of a single feature within the cluster. - - Attributes: - feature_column: - The feature column name. - numerical_value: - The numerical feature value. This is the centroid value for - this feature. - categorical_value: - The categorical feature value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Message containing the information about one cluster. - - Attributes: - centroid_id: - Centroid id. - feature_values: - Values of highly variant features for this cluster. - count: - Count of training data rows that were assigned to this - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster) - }, - ), - "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics for clustering models. - - Attributes: - davies_bouldin_index: - Davies-Bouldin index. - mean_squared_distance: - Mean of squared distances between each sample to its cluster - centroid. - clusters: - [Beta] Information for all clusters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics) - }, - ), - "EvaluationMetrics": _reflection.GeneratedProtocolMessageType( - "EvaluationMetrics", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_EVALUATIONMETRICS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Evaluation metrics of a model. These are either computed on all - training data or just the eval data based on whether eval data was - used during training. These are not present for imported models. - - Attributes: - regression_metrics: - Populated for regression models and explicit feedback type - matrix factorization models. - binary_classification_metrics: - Populated for binary classification/classifier models. - multi_class_classification_metrics: - Populated for multi-class classification/classifier models. - clustering_metrics: - Populated for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics) - }, - ), - "TrainingRun": _reflection.GeneratedProtocolMessageType( - "TrainingRun", - (_message.Message,), - { - "TrainingOptions": _reflection.GeneratedProtocolMessageType( - "TrainingOptions", - (_message.Message,), - { - "LabelClassWeightsEntry": _reflection.GeneratedProtocolMessageType( - "LabelClassWeightsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - max_iterations: - The maximum number of iterations in training. Used only for - iterative training algorithms. - loss_type: - Type of loss function used during training run. - learn_rate: - Learning rate in training. Used only for iterative training - algorithms. - l1_regularization: - L1 regularization coefficient. - l2_regularization: - L2 regularization coefficient. - min_relative_progress: - When early_stop is true, stops training when accuracy - improvement is less than ‘min_relative_progress’. Used only - for iterative training algorithms. - warm_start: - Whether to train a model from the last checkpoint. - early_stop: - Whether to stop early when the loss doesn’t improve - significantly any more (compared to min_relative_progress). - Used only for iterative training algorithms. - input_label_columns: - Name of input label columns in training data. - data_split_method: - The data split type for training and evaluation, e.g. RANDOM. - data_split_eval_fraction: - The fraction of evaluation data over the whole input data. The - rest of data will be used as training data. The format should - be double. Accurate to two decimal places. Default value is - 0.2. - data_split_column: - The column to split data with. This column won’t be used as a - feature. 1. When data_split_method is CUSTOM, the - corresponding column should be boolean. The rows with true - value tag are eval data, and the false are training data. 2. - When data_split_method is SEQ, the first - DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in - the corresponding column are used as training data, and the - rest are eval data. It respects the order in Orderable data - types: - https://cloud.google.com/bigquery/docs/reference/standard- - sql/data-types#data-type-properties - learn_rate_strategy: - The strategy to determine learn rate for the current - iteration. - initial_learn_rate: - Specifies the initial learning rate for the line search learn - rate strategy. - label_class_weights: - Weights associated with each label class, for rebalancing the - training data. Only applicable for classification models. - distance_type: - Distance type for clustering models. - num_clusters: - Number of clusters for clustering models. - model_uri: - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. - optimization_strategy: - Optimization strategy for training linear regression models. - kmeans_initialization_method: - The method used to initialize the centroids for kmeans - algorithm. - kmeans_initialization_column: - The column used to provide the initial centroids for kmeans - algorithm when kmeans_initialization_method is CUSTOM. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions) - }, - ), - "IterationResult": _reflection.GeneratedProtocolMessageType( - "IterationResult", - (_message.Message,), - { - "ClusterInfo": _reflection.GeneratedProtocolMessageType( - "ClusterInfo", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single cluster for clustering model. - - Attributes: - centroid_id: - Centroid id. - cluster_radius: - Cluster radius, the average distance from centroid to each - point assigned to the cluster. - cluster_size: - Cluster size, the total number of points assigned to the - cluster. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single iteration of the training run. - - Attributes: - index: - Index of the iteration, 0 based. - duration_ms: - Time taken to run the iteration in milliseconds. - training_loss: - Loss computed on the training data at the end of iteration. - eval_loss: - Loss computed on the eval data at the end of iteration. - learn_rate: - Learn rate used for this iteration. - cluster_infos: - Information about top clusters for clustering models. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult) - }, - ), - "DESCRIPTOR": _MODEL_TRAININGRUN, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Information about a single training query run for the model. - - Attributes: - training_options: - Options that were used for this training run, includes user - specified and default options that were used. - start_time: - The start time of this training run. - results: - Output of each iteration run, results.size() <= - max_iterations. - evaluation_metrics: - The evaluation metrics over training/eval data that were - computed at the end of training. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun) - }, - ), - "LabelsEntry": _reflection.GeneratedProtocolMessageType( - "LabelsEntry", - (_message.Message,), - { - "DESCRIPTOR": _MODEL_LABELSENTRY, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry) - }, - ), - "DESCRIPTOR": _MODEL, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - etag: - Output only. A hash of this resource. - model_reference: - Required. Unique identifier for this model. - creation_time: - Output only. The time when this model was created, in - millisecs since the epoch. - last_modified_time: - Output only. The time when this model was last modified, in - millisecs since the epoch. - description: - Optional. A user-friendly description of this model. - friendly_name: - Optional. A descriptive name for this model. - labels: - The labels associated with this model. You can use these to - organize and group your models. Label keys and values can be - no longer than 63 characters, can only contain lowercase - letters, numeric characters, underscores and dashes. - International characters are allowed. Label values are - optional. Label keys must start with a letter and each label - in the list must have a different key. - expiration_time: - Optional. The time when this model expires, in milliseconds - since the epoch. If not present, the model will persist - indefinitely. Expired models will be deleted and their storage - reclaimed. The defaultTableExpirationMs property of the - encapsulating dataset can be used to set a default - expirationTime on newly created models. - location: - Output only. The geographic location where the model resides. - This value is inherited from the dataset. - encryption_configuration: - Custom encryption configuration (e.g., Cloud KMS keys). This - shows the encryption configuration of the model data while - stored in BigQuery storage. - model_type: - Output only. Type of the model resource. - training_runs: - Output only. Information for all training runs in increasing - order of start_time. - feature_columns: - Output only. Input feature columns that were used to train - this model. - label_columns: - Output only. Label columns that were used to train this model. - The output of the model will have a ``predicted\_`` prefix to - these columns. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model) - }, -) -_sym_db.RegisterMessage(Model) -_sym_db.RegisterMessage(Model.KmeansEnums) -_sym_db.RegisterMessage(Model.RegressionMetrics) -_sym_db.RegisterMessage(Model.AggregateClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics) -_sym_db.RegisterMessage(Model.BinaryClassificationMetrics.BinaryConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry) -_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row) -_sym_db.RegisterMessage(Model.ClusteringMetrics) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue) -_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue) -_sym_db.RegisterMessage( - Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount -) -_sym_db.RegisterMessage(Model.EvaluationMetrics) -_sym_db.RegisterMessage(Model.TrainingRun) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions) -_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult) -_sym_db.RegisterMessage(Model.TrainingRun.IterationResult.ClusterInfo) -_sym_db.RegisterMessage(Model.LabelsEntry) - -GetModelRequest = _reflection.GeneratedProtocolMessageType( - "GetModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _GETMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the requested model. - dataset_id: - Required. Dataset ID of the requested model. - model_id: - Required. Model ID of the requested model. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest) - }, -) -_sym_db.RegisterMessage(GetModelRequest) - -PatchModelRequest = _reflection.GeneratedProtocolMessageType( - "PatchModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _PATCHMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to patch. - dataset_id: - Required. Dataset ID of the model to patch. - model_id: - Required. Model ID of the model to patch. - model: - Required. Patched model. Follows RFC5789 patch semantics. - Missing fields are not updated. To clear a field, explicitly - set to default value. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest) - }, -) -_sym_db.RegisterMessage(PatchModelRequest) - -DeleteModelRequest = _reflection.GeneratedProtocolMessageType( - "DeleteModelRequest", - (_message.Message,), - { - "DESCRIPTOR": _DELETEMODELREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the model to delete. - dataset_id: - Required. Dataset ID of the model to delete. - model_id: - Required. Model ID of the model to delete. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest) - }, -) -_sym_db.RegisterMessage(DeleteModelRequest) - -ListModelsRequest = _reflection.GeneratedProtocolMessageType( - "ListModelsRequest", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSREQUEST, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - project_id: - Required. Project ID of the models to list. - dataset_id: - Required. Dataset ID of the models to list. - max_results: - The maximum number of results to return in a single response - page. Leverage the page tokens to iterate through the entire - collection. - page_token: - Page token, returned by a previous call to request the next - page of results - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest) - }, -) -_sym_db.RegisterMessage(ListModelsRequest) - -ListModelsResponse = _reflection.GeneratedProtocolMessageType( - "ListModelsResponse", - (_message.Message,), - { - "DESCRIPTOR": _LISTMODELSRESPONSE, - "__module__": "google.cloud.bigquery_v2.proto.model_pb2", - "__doc__": """Protocol buffer. - - Attributes: - models: - Models in the requested dataset. Only the following fields are - populated: model_reference, model_type, creation_time, - last_modified_time and labels. - next_page_token: - A token to request the next page of results. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse) - }, -) -_sym_db.RegisterMessage(ListModelsResponse) - - -DESCRIPTOR._options = None -_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None -_MODEL_LABELSENTRY._options = None -_MODEL.fields_by_name["etag"]._options = None -_MODEL.fields_by_name["model_reference"]._options = None -_MODEL.fields_by_name["creation_time"]._options = None -_MODEL.fields_by_name["last_modified_time"]._options = None -_MODEL.fields_by_name["description"]._options = None -_MODEL.fields_by_name["friendly_name"]._options = None -_MODEL.fields_by_name["expiration_time"]._options = None -_MODEL.fields_by_name["location"]._options = None -_MODEL.fields_by_name["model_type"]._options = None -_MODEL.fields_by_name["training_runs"]._options = None -_MODEL.fields_by_name["feature_columns"]._options = None -_MODEL.fields_by_name["label_columns"]._options = None -_GETMODELREQUEST.fields_by_name["project_id"]._options = None -_GETMODELREQUEST.fields_by_name["dataset_id"]._options = None -_GETMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["project_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["dataset_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model_id"]._options = None -_PATCHMODELREQUEST.fields_by_name["model"]._options = None -_DELETEMODELREQUEST.fields_by_name["project_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["dataset_id"]._options = None -_DELETEMODELREQUEST.fields_by_name["model_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["project_id"]._options = None -_LISTMODELSREQUEST.fields_by_name["dataset_id"]._options = None - -_MODELSERVICE = _descriptor.ServiceDescriptor( - name="ModelService", - full_name="google.cloud.bigquery.v2.ModelService", - file=DESCRIPTOR, - index=0, - serialized_options=b"\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only", - create_key=_descriptor._internal_create_key, - serialized_start=7804, - serialized_end=8566, - methods=[ - _descriptor.MethodDescriptor( - name="GetModel", - full_name="google.cloud.bigquery.v2.ModelService.GetModel", - index=0, - containing_service=None, - input_type=_GETMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="ListModels", - full_name="google.cloud.bigquery.v2.ModelService.ListModels", - index=1, - containing_service=None, - input_type=_LISTMODELSREQUEST, - output_type=_LISTMODELSRESPONSE, - serialized_options=b"\332A!project_id,dataset_id,max_results", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="PatchModel", - full_name="google.cloud.bigquery.v2.ModelService.PatchModel", - index=2, - containing_service=None, - input_type=_PATCHMODELREQUEST, - output_type=_MODEL, - serialized_options=b"\332A$project_id,dataset_id,model_id,model", - create_key=_descriptor._internal_create_key, - ), - _descriptor.MethodDescriptor( - name="DeleteModel", - full_name="google.cloud.bigquery.v2.ModelService.DeleteModel", - index=3, - containing_service=None, - input_type=_DELETEMODELREQUEST, - output_type=google_dot_protobuf_dot_empty__pb2._EMPTY, - serialized_options=b"\332A\036project_id,dataset_id,model_id", - create_key=_descriptor._internal_create_key, - ), - ], -) -_sym_db.RegisterServiceDescriptor(_MODELSERVICE) - -DESCRIPTOR.services_by_name["ModelService"] = _MODELSERVICE - -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/model_reference.proto b/google/cloud/bigquery_v2/proto/model_reference.proto deleted file mode 100644 index c3d1a49a8..000000000 --- a/google/cloud/bigquery_v2/proto/model_reference.proto +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "ModelReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// Id path of a model. -message ModelReference { - // Required. The ID of the project containing this model. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this model. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the model. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. - string model_id = 3 [(google.api.field_behavior) = REQUIRED]; -} diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py deleted file mode 100644 index 2411c4863..000000000 --- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py +++ /dev/null @@ -1,142 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/model_reference.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/model_reference.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_MODELREFERENCE = _descriptor.Descriptor( - name="ModelReference", - full_name="google.cloud.bigquery.v2.ModelReference", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="project_id", - full_name="google.cloud.bigquery.v2.ModelReference.project_id", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="dataset_id", - full_name="google.cloud.bigquery.v2.ModelReference.dataset_id", - index=1, - number=2, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="model_id", - full_name="google.cloud.bigquery.v2.ModelReference.model_id", - index=2, - number=3, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=145, - serialized_end=234, -) - -DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -ModelReference = _reflection.GeneratedProtocolMessageType( - "ModelReference", - (_message.Message,), - { - "DESCRIPTOR": _MODELREFERENCE, - "__module__": "google.cloud.bigquery_v2.proto.model_reference_pb2", - "__doc__": """Id path of a model. - - Attributes: - project_id: - Required. The ID of the project containing this model. - dataset_id: - Required. The ID of the dataset containing this model. - model_id: - Required. The ID of the model. The ID must contain only - letters (a-z, A-Z), numbers (0-9), or underscores (_). The - maximum length is 1,024 characters. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference) - }, -) -_sym_db.RegisterMessage(ModelReference) - - -DESCRIPTOR._options = None -_MODELREFERENCE.fields_by_name["project_id"]._options = None -_MODELREFERENCE.fields_by_name["dataset_id"]._options = None -_MODELREFERENCE.fields_by_name["model_id"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/standard_sql.proto b/google/cloud/bigquery_v2/proto/standard_sql.proto deleted file mode 100644 index 1514eccbb..000000000 --- a/google/cloud/bigquery_v2/proto/standard_sql.proto +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "StandardSqlProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -// The type of a variable, e.g., a function argument. -// Examples: -// INT64: {type_kind="INT64"} -// ARRAY: {type_kind="ARRAY", array_element_type="STRING"} -// STRUCT>: -// {type_kind="STRUCT", -// struct_type={fields=[ -// {name="x", type={type_kind="STRING"}}, -// {name="y", type={type_kind="ARRAY", array_element_type="DATE"}} -// ]}} -message StandardSqlDataType { - enum TypeKind { - // Invalid type. - TYPE_KIND_UNSPECIFIED = 0; - - // Encoded as a string in decimal format. - INT64 = 2; - - // Encoded as a boolean "false" or "true". - BOOL = 5; - - // Encoded as a number, or string "NaN", "Infinity" or "-Infinity". - FLOAT64 = 7; - - // Encoded as a string value. - STRING = 8; - - // Encoded as a base64 string per RFC 4648, section 4. - BYTES = 9; - - // Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string: - // 1985-04-12T23:20:50.52Z - TIMESTAMP = 19; - - // Encoded as RFC 3339 full-date format string: 1985-04-12 - DATE = 10; - - // Encoded as RFC 3339 partial-time format string: 23:20:50.52 - TIME = 20; - - // Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52 - DATETIME = 21; - - // Encoded as WKT - GEOGRAPHY = 22; - - // Encoded as a decimal string. - NUMERIC = 23; - - // Encoded as a decimal string. - BIGNUMERIC = 24; - - // Encoded as a list with types matching Type.array_type. - ARRAY = 16; - - // Encoded as a list with fields of type Type.struct_type[i]. List is used - // because a JSON object cannot have duplicate field names. - STRUCT = 17; - } - - // Required. The top level type of this field. - // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY"). - TypeKind type_kind = 1 [(google.api.field_behavior) = REQUIRED]; - - oneof sub_type { - // The type of the array's elements, if type_kind = "ARRAY". - StandardSqlDataType array_element_type = 2; - - // The fields of this struct, in order, if type_kind = "STRUCT". - StandardSqlStructType struct_type = 3; - } -} - -// A field or a column. -message StandardSqlField { - // Optional. The name of this field. Can be absent for struct fields. - string name = 1 [(google.api.field_behavior) = OPTIONAL]; - - // Optional. The type of this parameter. Absent if not explicitly - // specified (e.g., CREATE FUNCTION statement can omit the return type; - // in this case the output parameter does not have this "type" field). - StandardSqlDataType type = 2 [(google.api.field_behavior) = OPTIONAL]; -} - -message StandardSqlStructType { - repeated StandardSqlField fields = 1; -} diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py deleted file mode 100644 index bfe77f934..000000000 --- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py +++ /dev/null @@ -1,442 +0,0 @@ -# -*- coding: utf-8 -*- -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: google/cloud/bigquery_v2/proto/standard_sql.proto -"""Generated protocol buffer code.""" -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection -from google.protobuf import symbol_database as _symbol_database - -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2 -from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2 - - -DESCRIPTOR = _descriptor.FileDescriptor( - name="google/cloud/bigquery_v2/proto/standard_sql.proto", - package="google.cloud.bigquery.v2", - syntax="proto3", - serialized_options=b"\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery", - create_key=_descriptor._internal_create_key, - serialized_pb=b'\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3', - dependencies=[ - google_dot_api_dot_field__behavior__pb2.DESCRIPTOR, - google_dot_api_dot_annotations__pb2.DESCRIPTOR, - ], -) - - -_STANDARDSQLDATATYPE_TYPEKIND = _descriptor.EnumDescriptor( - name="TypeKind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind", - filename=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - values=[ - _descriptor.EnumValueDescriptor( - name="TYPE_KIND_UNSPECIFIED", - index=0, - number=0, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="INT64", - index=1, - number=2, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BOOL", - index=2, - number=5, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="FLOAT64", - index=3, - number=7, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRING", - index=4, - number=8, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="BYTES", - index=5, - number=9, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIMESTAMP", - index=6, - number=19, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATE", - index=7, - number=10, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="TIME", - index=8, - number=20, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="DATETIME", - index=9, - number=21, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="GEOGRAPHY", - index=10, - number=22, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="NUMERIC", - index=11, - number=23, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="ARRAY", - index=12, - number=16, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - _descriptor.EnumValueDescriptor( - name="STRUCT", - index=13, - number=17, - serialized_options=None, - type=None, - create_key=_descriptor._internal_create_key, - ), - ], - containing_type=None, - serialized_options=None, - serialized_start=396, - serialized_end=590, -) -_sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND) - - -_STANDARDSQLDATATYPE = _descriptor.Descriptor( - name="StandardSqlDataType", - full_name="google.cloud.bigquery.v2.StandardSqlDataType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="type_kind", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.type_kind", - index=0, - number=1, - type=14, - cpp_type=8, - label=1, - has_default_value=False, - default_value=0, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\002", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="array_element_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.array_element_type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="struct_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.struct_type", - index=2, - number=3, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[_STANDARDSQLDATATYPE_TYPEKIND,], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name="sub_type", - full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type", - index=0, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[], - ), - ], - serialized_start=143, - serialized_end=602, -) - - -_STANDARDSQLFIELD = _descriptor.Descriptor( - name="StandardSqlField", - full_name="google.cloud.bigquery.v2.StandardSqlField", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="name", - full_name="google.cloud.bigquery.v2.StandardSqlField.name", - index=0, - number=1, - type=9, - cpp_type=9, - label=1, - has_default_value=False, - default_value=b"".decode("utf-8"), - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - _descriptor.FieldDescriptor( - name="type", - full_name="google.cloud.bigquery.v2.StandardSqlField.type", - index=1, - number=2, - type=11, - cpp_type=10, - label=1, - has_default_value=False, - default_value=None, - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=b"\340A\001", - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=604, - serialized_end=707, -) - - -_STANDARDSQLSTRUCTTYPE = _descriptor.Descriptor( - name="StandardSqlStructType", - full_name="google.cloud.bigquery.v2.StandardSqlStructType", - filename=None, - file=DESCRIPTOR, - containing_type=None, - create_key=_descriptor._internal_create_key, - fields=[ - _descriptor.FieldDescriptor( - name="fields", - full_name="google.cloud.bigquery.v2.StandardSqlStructType.fields", - index=0, - number=1, - type=11, - cpp_type=10, - label=3, - has_default_value=False, - default_value=[], - message_type=None, - enum_type=None, - containing_type=None, - is_extension=False, - extension_scope=None, - serialized_options=None, - file=DESCRIPTOR, - create_key=_descriptor._internal_create_key, - ), - ], - extensions=[], - nested_types=[], - enum_types=[], - serialized_options=None, - is_extendable=False, - syntax="proto3", - extension_ranges=[], - oneofs=[], - serialized_start=709, - serialized_end=792, -) - -_STANDARDSQLDATATYPE.fields_by_name[ - "type_kind" -].enum_type = _STANDARDSQLDATATYPE_TYPEKIND -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.fields_by_name["struct_type"].message_type = _STANDARDSQLSTRUCTTYPE -_STANDARDSQLDATATYPE_TYPEKIND.containing_type = _STANDARDSQLDATATYPE -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["array_element_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "array_element_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append( - _STANDARDSQLDATATYPE.fields_by_name["struct_type"] -) -_STANDARDSQLDATATYPE.fields_by_name[ - "struct_type" -].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"] -_STANDARDSQLFIELD.fields_by_name["type"].message_type = _STANDARDSQLDATATYPE -_STANDARDSQLSTRUCTTYPE.fields_by_name["fields"].message_type = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlDataType"] = _STANDARDSQLDATATYPE -DESCRIPTOR.message_types_by_name["StandardSqlField"] = _STANDARDSQLFIELD -DESCRIPTOR.message_types_by_name["StandardSqlStructType"] = _STANDARDSQLSTRUCTTYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -StandardSqlDataType = _reflection.GeneratedProtocolMessageType( - "StandardSqlDataType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLDATATYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """The type of a variable, e.g., a function argument. Examples: INT64: - {type_kind=``INT64``} ARRAY: {type_kind=``ARRAY``, - array_element_type=``STRING``} STRUCT: - {type_kind=``STRUCT``, struct_type={fields=[ {name=``x``, - type={type_kind=``STRING``}}, {name=``y``, type={type_kind=``ARRAY``, - array_element_type=``DATE``}} ]}} - - Attributes: - type_kind: - Required. The top level type of this field. Can be any - standard SQL data type (e.g., ``INT64``, ``DATE``, ``ARRAY``). - array_element_type: - The type of the array’s elements, if type_kind = ``ARRAY``. - struct_type: - The fields of this struct, in order, if type_kind = ``STRUCT``. - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType) - }, -) -_sym_db.RegisterMessage(StandardSqlDataType) - -StandardSqlField = _reflection.GeneratedProtocolMessageType( - "StandardSqlField", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLFIELD, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2", - "__doc__": """A field or a column. - - Attributes: - name: - Optional. The name of this field. Can be absent for struct - fields. - type: - Optional. The type of this parameter. Absent if not explicitly - specified (e.g., CREATE FUNCTION statement can omit the return - type; in this case the output parameter does not have this - ``type`` field). - """, - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField) - }, -) -_sym_db.RegisterMessage(StandardSqlField) - -StandardSqlStructType = _reflection.GeneratedProtocolMessageType( - "StandardSqlStructType", - (_message.Message,), - { - "DESCRIPTOR": _STANDARDSQLSTRUCTTYPE, - "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2" - # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType) - }, -) -_sym_db.RegisterMessage(StandardSqlStructType) - - -DESCRIPTOR._options = None -_STANDARDSQLDATATYPE.fields_by_name["type_kind"]._options = None -_STANDARDSQLFIELD.fields_by_name["name"]._options = None -_STANDARDSQLFIELD.fields_by_name["type"]._options = None -# @@protoc_insertion_point(module_scope) diff --git a/google/cloud/bigquery_v2/proto/table_reference.proto b/google/cloud/bigquery_v2/proto/table_reference.proto deleted file mode 100644 index ba02f80c4..000000000 --- a/google/cloud/bigquery_v2/proto/table_reference.proto +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright 2020 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -syntax = "proto3"; - -package google.cloud.bigquery.v2; - -import "google/api/field_behavior.proto"; -import "google/api/annotations.proto"; - -option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery"; -option java_outer_classname = "TableReferenceProto"; -option java_package = "com.google.cloud.bigquery.v2"; - -message TableReference { - // Required. The ID of the project containing this table. - string project_id = 1 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the dataset containing this table. - string dataset_id = 2 [(google.api.field_behavior) = REQUIRED]; - - // Required. The ID of the table. The ID must contain only - // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum - // length is 1,024 characters. Certain operations allow - // suffixing of the table ID with a partition decorator, such as - // `sample_table$20190123`. - string table_id = 3 [(google.api.field_behavior) = REQUIRED]; -} From a0a9fa23037d20699a82716ce791cbe10a54ee30 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 28 May 2021 16:56:04 +0000 Subject: [PATCH 116/230] chore: new owl bot post processor docker image (#680) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 46e3f021c..da616c91a 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:3c3a445b3ddc99ccd5d31edc4b4519729635d20693900db32c4f587ed51f7479 + digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 diff --git a/docs/conf.py b/docs/conf.py index fdea01aad..1275fe3f1 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -364,6 +364,7 @@ "google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,), "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), + "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), } From d792ce09388a6ee3706777915dd2818d4c854f79 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 2 Jun 2021 09:17:20 +0200 Subject: [PATCH 117/230] feat: add support for Parquet options (#679) * feat: add support for Parquet options For load jobs and external tables config. * Simplify ParquetOptions.to_api_repr() Co-authored by Tres Seaver. * Expose ParquetOptions in top level namespace * Parquet options should be reflected in options --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/external_config.py | 28 ++++++- google/cloud/bigquery/format_options.py | 80 ++++++++++++++++++ google/cloud/bigquery/job/load.py | 21 +++++ tests/unit/job/test_load_config.py | 35 ++++++++ tests/unit/test_external_config.py | 100 +++++++++++++++++++++++ tests/unit/test_format_options.py | 41 ++++++++++ 7 files changed, 306 insertions(+), 1 deletion(-) create mode 100644 google/cloud/bigquery/format_options.py create mode 100644 tests/unit/test_format_options.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ec08b2c84..f031cd81d 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -47,6 +47,7 @@ from google.cloud.bigquery.external_config import CSVOptions from google.cloud.bigquery.external_config import GoogleSheetsOptions from google.cloud.bigquery.external_config import ExternalSourceFormat +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import Compression from google.cloud.bigquery.job import CopyJob from google.cloud.bigquery.job import CopyJobConfig @@ -136,6 +137,7 @@ "BigtableColumn", "CSVOptions", "GoogleSheetsOptions", + "ParquetOptions", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index ef4d569fa..0c49d2d76 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -27,6 +27,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -53,6 +54,12 @@ class ExternalSourceFormat(object): DATASTORE_BACKUP = "DATASTORE_BACKUP" """Specifies datastore backup format""" + ORC = "ORC" + """Specifies ORC format.""" + + PARQUET = "PARQUET" + """Specifies Parquet format.""" + BIGTABLE = "BIGTABLE" """Specifies Bigtable format.""" @@ -540,7 +547,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": return config -_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions) +_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions) class HivePartitioningOptions(object): @@ -784,6 +791,25 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options + """ + if self.source_format != ExternalSourceFormat.PARQUET: + return None + return self._options + + @parquet_options.setter + def parquet_options(self, value): + if self.source_format != ExternalSourceFormat.PARQUET: + msg = f"Cannot set Parquet options, source format is {self.source_format}" + raise TypeError(msg) + self._options = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py new file mode 100644 index 000000000..2c9a2ce20 --- /dev/null +++ b/google/cloud/bigquery/format_options.py @@ -0,0 +1,80 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +from typing import Dict + + +class ParquetOptions: + """Additional options if the PARQUET source format is used.""" + + _SOURCE_FORMAT = "PARQUET" + _RESOURCE_NAME = "parquetOptions" + + def __init__(self): + self._properties = {} + + @property + def enum_as_string(self) -> bool: + """Indicates whether to infer Parquet ENUM logical type as STRING instead of + BYTES by default. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enum_as_string + """ + return self._properties.get("enumAsString") + + @enum_as_string.setter + def enum_as_string(self, value: bool) -> None: + self._properties["enumAsString"] = value + + @property + def enable_list_inference(self) -> bool: + """Indicates whether to use schema inference specifically for Parquet LIST + logical type. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enable_list_inference + """ + return self._properties.get("enableListInference") + + @enable_list_inference.setter + def enable_list_inference(self, value: bool) -> None: + self._properties["enableListInference"] = value + + @classmethod + def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions": + """Factory: construct an instance from a resource dict. + + Args: + resource (Dict[str, bool]): + Definition of a :class:`~.format_options.ParquetOptions` instance in + the same representation as is returned from the API. + + Returns: + :class:`~.format_options.ParquetOptions`: + Configuration parsed from ``resource``. + """ + config = cls() + config._properties = copy.deepcopy(resource) + return config + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, bool]: + A dictionary in the format used by the BigQuery API. + """ + return copy.deepcopy(self._properties) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index b8174af3e..41d38dd74 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -16,6 +16,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions +from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import _to_schema_fields @@ -439,6 +440,26 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def parquet_options(self): + """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional + properties to set if ``sourceFormat`` is set to PARQUET. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options + """ + prop = self._get_sub_prop("parquetOptions") + if prop is not None: + prop = ParquetOptions.from_api_repr(prop) + return prop + + @parquet_options.setter + def parquet_options(self, value): + if value is not None: + self._set_sub_prop("parquetOptions", value.to_api_repr()) + else: + self._del_sub_prop("parquetOptions") + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 63f15ec5a..b0729e428 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -700,3 +700,38 @@ def test_write_disposition_setter(self): self.assertEqual( config._properties["load"]["writeDisposition"], write_disposition ) + + def test_parquet_options_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.parquet_options) + + def test_parquet_options_hit(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=True, enableListInference=False + ) + self.assertTrue(config.parquet_options.enum_as_string) + self.assertFalse(config.parquet_options.enable_list_inference) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + config = self._get_target_class()() + + config.parquet_options = parquet_options + self.assertEqual( + config._properties["load"]["parquetOptions"], + {"enumAsString": False, "enableListInference": True}, + ) + + def test_parquet_options_setter_clearing(self): + config = self._get_target_class()() + config._properties["load"]["parquetOptions"] = dict( + enumAsString=False, enableListInference=True + ) + + config.parquet_options = None + self.assertNotIn("parquetOptions", config._properties["load"]) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 648a8717e..7178367ea 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -425,6 +425,106 @@ def test_to_api_repr_bigtable(self): self.assertEqual(got_resource, exp_resource) + def test_parquet_options_getter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": True, "enableListInference": False} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + self.assertIsNone(ec.parquet_options.enum_as_string) + self.assertIsNone(ec.parquet_options.enable_list_inference) + + ec._options = parquet_options + + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + self.assertIs(ec.parquet_options, ec.options) + + def test_parquet_options_getter_non_parquet_format(self): + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + self.assertIsNone(ec.parquet_options) + + def test_parquet_options_setter(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + + ec.parquet_options = parquet_options + + # Setting Parquet options should be reflected in the generic options attribute. + self.assertFalse(ec.options.enum_as_string) + self.assertTrue(ec.options.enable_list_inference) + + def test_parquet_options_setter_non_parquet_format(self): + from google.cloud.bigquery.format_options import ParquetOptions + + parquet_options = ParquetOptions.from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV) + + with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): + ec.parquet_options = parquet_options + + def test_from_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "PARQUET", + "parquetOptions": {"enumAsString": True, "enableListInference": False}, + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, external_config.ExternalSourceFormat.PARQUET) + self.assertIsInstance(ec.options, ParquetOptions) + self.assertTrue(ec.parquet_options.enum_as_string) + self.assertFalse(ec.parquet_options.enable_list_inference) + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, resource) + + del resource["parquetOptions"]["enableListInference"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.options.enable_list_inference) + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_parquet(self): + from google.cloud.bigquery.format_options import ParquetOptions + + ec = external_config.ExternalConfig( + external_config.ExternalSourceFormat.PARQUET + ) + options = ParquetOptions.from_api_repr( + dict(enumAsString=False, enableListInference=True) + ) + ec._options = options + + exp_resource = { + "sourceFormat": external_config.ExternalSourceFormat.PARQUET, + "parquetOptions": {"enumAsString": False, "enableListInference": True}, + } + + got_resource = ec.to_api_repr() + + self.assertEqual(got_resource, exp_resource) + def _copy_and_update(d, u): d = copy.deepcopy(d) diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py new file mode 100644 index 000000000..ab5f9e05c --- /dev/null +++ b/tests/unit/test_format_options.py @@ -0,0 +1,41 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class TestParquetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.format_options import ParquetOptions + + return ParquetOptions + + def test_ctor(self): + config = self._get_target_class()() + assert config.enum_as_string is None + assert config.enable_list_inference is None + + def test_from_api_repr(self): + config = self._get_target_class().from_api_repr( + {"enumAsString": False, "enableListInference": True} + ) + assert not config.enum_as_string + assert config.enable_list_inference + + def test_to_api_repr(self): + config = self._get_target_class()() + config.enum_as_string = True + config.enable_list_inference = False + + result = config.to_api_repr() + assert result == {"enumAsString": True, "enableListInference": False} From 102d867dab4d0ee64a4e69a8f071c0d040edfe71 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 2 Jun 2021 09:48:10 +0200 Subject: [PATCH 118/230] chore: release 2.18.0 (#682) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a0227118..b8abc5abb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) + + +### Features + +* add support for Parquet options ([#679](https://www.github.com/googleapis/python-bigquery/issues/679)) ([d792ce0](https://www.github.com/googleapis/python-bigquery/commit/d792ce09388a6ee3706777915dd2818d4c854f79)) + ## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 422b383cc..a613e5ea2 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.17.0" +__version__ = "2.18.0" From 0cf4e31288e3adea7f64b01d4ddbd3a1026bb056 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 00:02:00 +0200 Subject: [PATCH 119/230] chore(deps): update dependency pyarrow to v4.0.1 (#681) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 2ed5b0df8..31a4ca5b8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.0 +pyarrow==4.0.1 pytz==2021.1 From dea2402ef62bcc00f2a392b16330a595db38ffb7 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 3 Jun 2021 10:29:17 +0200 Subject: [PATCH 120/230] chore(deps): update dependency google-cloud-bigquery to v2.18.0 (#683) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9fbe85970..4577dff02 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 31a4ca5b8..94fe39299 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.17.0 +google-cloud-bigquery==2.18.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 6 Jun 2021 10:18:13 -0400 Subject: [PATCH 121/230] feat: list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size (#686) --- google/cloud/bigquery/client.py | 39 ++- setup.py | 2 +- testing/constraints-3.6.txt | 2 +- tests/unit/test_client.py | 494 ------------------------------- tests/unit/test_list_datasets.py | 124 ++++++++ tests/unit/test_list_jobs.py | 291 ++++++++++++++++++ tests/unit/test_list_models.py | 11 +- tests/unit/test_list_projects.py | 119 ++++++++ tests/unit/test_list_routines.py | 11 +- tests/unit/test_list_tables.py | 19 ++ 10 files changed, 605 insertions(+), 507 deletions(-) create mode 100644 tests/unit/test_list_datasets.py create mode 100644 tests/unit/test_list_jobs.py create mode 100644 tests/unit/test_list_projects.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 7ef3795a8..2b7a5273e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -286,6 +286,7 @@ def list_projects( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -294,8 +295,8 @@ def list_projects( Args: max_results (Optional[int]): - Maximum number of projects to return, If not passed, - defaults to a value set by the API. + Maximum number of projects to return. + Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the projects. If not passed, @@ -310,6 +311,10 @@ def list_projects( The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of projects to return in each page. + Defaults to a value set by the API. + Returns: google.api_core.page_iterator.Iterator: Iterator of :class:`~google.cloud.bigquery.client.Project` @@ -335,6 +340,7 @@ def api_request(*args, **kwargs): items_key="projects", page_token=page_token, max_results=max_results, + page_size=page_size, ) def list_datasets( @@ -346,6 +352,7 @@ def list_datasets( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -375,6 +382,8 @@ def list_datasets( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of datasets to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -414,6 +423,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def dataset(self, dataset_id: str, project: str = None) -> DatasetReference: @@ -1270,6 +1280,7 @@ def list_models( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1288,7 +1299,7 @@ def list_models( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of models to return. If not passed, defaults to a + Maximum number of models to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the models. If not passed, @@ -1301,6 +1312,9 @@ def list_models( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of models to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1331,6 +1345,7 @@ def api_request(*args, **kwargs): items_key="models", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1342,6 +1357,7 @@ def list_routines( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1360,7 +1376,7 @@ def list_routines( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of routines to return. If not passed, defaults + Maximum number of routines to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the routines. If not passed, @@ -1373,6 +1389,9 @@ def list_routines( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of routines to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1403,6 +1422,7 @@ def api_request(*args, **kwargs): items_key="routines", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -1414,6 +1434,7 @@ def list_tables( page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1432,7 +1453,7 @@ def list_tables( to create a dataset reference from a string using :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. max_results (Optional[int]): - Maximum number of tables to return. If not passed, defaults + Maximum number of tables to return. Defaults to a value set by the API. page_token (Optional[str]): Token representing a cursor into the tables. If not passed, @@ -1445,6 +1466,9 @@ def list_tables( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + page_size (Optional[int]): + Maximum number of tables to return per page. + Defaults to a value set by the API. Returns: google.api_core.page_iterator.Iterator: @@ -1474,6 +1498,7 @@ def api_request(*args, **kwargs): items_key="tables", page_token=page_token, max_results=max_results, + page_size=page_size, ) result.dataset = dataset return result @@ -2112,6 +2137,7 @@ def list_jobs( timeout: float = None, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, + page_size: int = None, ) -> page_iterator.Iterator: """List jobs for the project associated with this client. @@ -2157,6 +2183,8 @@ def list_jobs( Max value for job creation time. If set, only jobs created before or at this timestamp are returned. If the datetime has no time zone assumes UTC time. + page_size (Optional[int]): + Maximum number of jobs to return per page. Returns: google.api_core.page_iterator.Iterator: @@ -2208,6 +2236,7 @@ def api_request(*args, **kwargs): page_token=page_token, max_results=max_results, extra_params=extra_params, + page_size=page_size, ) def load_table_from_uri( diff --git a/setup.py b/setup.py index 6a6202ef9..963eb73ec 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ - "google-api-core[grpc] >= 1.23.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", "google-resumable-media >= 0.6.0, < 2.0dev", diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 322373eba..71c9ff49a 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,7 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 -google-api-core==1.23.0 +google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 1346a1ef6..7a28ef248 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -471,221 +471,6 @@ def test_get_service_account_email_w_custom_retry(self): ], ) - def test_list_projects_defaults(self): - from google.cloud.bigquery.client import Project - - PROJECT_1 = "PROJECT_ONE" - PROJECT_2 = "PROJECT_TWO" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [ - { - "kind": "bigquery#project", - "id": PROJECT_1, - "numericId": 1, - "projectReference": {"projectId": PROJECT_1}, - "friendlyName": "One", - }, - { - "kind": "bigquery#project", - "id": PROJECT_2, - "numericId": 2, - "projectReference": {"projectId": PROJECT_2}, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - iterator = client.list_projects() - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), len(DATA["projects"])) - for found, expected in zip(projects, DATA["projects"]): - self.assertIsInstance(found, Project) - self.assertEqual(found.project_id, expected["id"]) - self.assertEqual(found.numeric_id, expected["numericId"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=None - ) - - def test_list_projects_w_timeout(self): - PROJECT_1 = "PROJECT_ONE" - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "projects": [], - } - creds = _make_credentials() - client = self._make_one(PROJECT_1, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(timeout=7.5) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - - conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params={}, timeout=7.5 - ) - - def test_list_projects_explicit_response_missing_projects_key(self): - TOKEN = "TOKEN" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_projects(max_results=3, page_token=TOKEN) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/projects"}, client, None) - projects = list(page) - token = iterator.next_page_token - - self.assertEqual(len(projects), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects", - query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, - ) - - def test_list_datasets_defaults(self): - from google.cloud.bigquery.dataset import DatasetListItem - - DATASET_1 = "dataset_one" - DATASET_2 = "dataset_two" - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - DATA = { - "nextPageToken": TOKEN, - "datasets": [ - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_1), - "datasetReference": { - "datasetId": DATASET_1, - "projectId": self.PROJECT, - }, - "friendlyName": None, - }, - { - "kind": "bigquery#dataset", - "id": "%s:%s" % (self.PROJECT, DATASET_2), - "datasetReference": { - "datasetId": DATASET_2, - "projectId": self.PROJECT, - }, - "friendlyName": "Two", - }, - ], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), len(DATA["datasets"])) - for found, expected in zip(datasets, DATA["datasets"]): - self.assertIsInstance(found, DatasetListItem) - self.assertEqual(found.full_dataset_id, expected["id"]) - self.assertEqual(found.friendly_name, expected["friendlyName"]) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None - ) - - def test_list_datasets_w_project_and_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - list(client.list_datasets(project="other-project", timeout=7.5)) - - final_attributes.assert_called_once_with( - {"path": "/projects/other-project/datasets"}, client, None - ) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/datasets", - query_params={}, - timeout=7.5, - ) - - def test_list_datasets_explicit_response_missing_datasets_key(self): - PATH = "projects/%s/datasets" % self.PROJECT - TOKEN = "TOKEN" - FILTER = "FILTER" - DATA = {} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_datasets( - include_all=True, filter=FILTER, max_results=3, page_token=TOKEN - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - datasets = list(page) - token = iterator.next_page_token - - self.assertEqual(len(datasets), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "all": True, - "filter": FILTER, - "maxResults": 3, - "pageToken": TOKEN, - }, - timeout=None, - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3239,285 +3024,6 @@ def test_cancel_job_w_timeout(self): timeout=7.5, ) - def test_list_jobs_defaults(self): - from google.cloud.bigquery.job import CopyJob - from google.cloud.bigquery.job import CreateDisposition - from google.cloud.bigquery.job import ExtractJob - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import QueryJob - from google.cloud.bigquery.job import WriteDisposition - - SOURCE_TABLE = "source_table" - DESTINATION_TABLE = "destination_table" - QUERY_DESTINATION_TABLE = "query_destination_table" - SOURCE_URI = "gs://test_bucket/src_object*" - DESTINATION_URI = "gs://test_bucket/dst_object*" - JOB_TYPES = { - "load_job": LoadJob, - "copy_job": CopyJob, - "extract_job": ExtractJob, - "query_job": QueryJob, - } - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - QUERY = "SELECT * from test_dataset:test_table" - ASYNC_QUERY_DATA = { - "id": "%s:%s" % (self.PROJECT, "query_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"}, - "state": "DONE", - "configuration": { - "query": { - "query": QUERY, - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": QUERY_DESTINATION_TABLE, - }, - "createDisposition": CreateDisposition.CREATE_IF_NEEDED, - "writeDisposition": WriteDisposition.WRITE_TRUNCATE, - } - }, - } - EXTRACT_DATA = { - "id": "%s:%s" % (self.PROJECT, "extract_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"}, - "state": "DONE", - "configuration": { - "extract": { - "sourceTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "destinationUris": [DESTINATION_URI], - } - }, - } - COPY_DATA = { - "id": "%s:%s" % (self.PROJECT, "copy_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"}, - "state": "DONE", - "configuration": { - "copy": { - "sourceTables": [ - { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - ], - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": DESTINATION_TABLE, - }, - } - }, - } - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - }, - "sourceUris": [SOURCE_URI], - } - }, - } - DATA = { - "nextPageToken": TOKEN, - "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], - } - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_load_job_wo_sourceUris(self): - from google.cloud.bigquery.job import LoadJob - - SOURCE_TABLE = "source_table" - JOB_TYPES = {"load_job": LoadJob} - PATH = "projects/%s/jobs" % self.PROJECT - TOKEN = "TOKEN" - LOAD_DATA = { - "id": "%s:%s" % (self.PROJECT, "load_job"), - "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"}, - "state": "DONE", - "configuration": { - "load": { - "destinationTable": { - "projectId": self.PROJECT, - "datasetId": self.DS_ID, - "tableId": SOURCE_TABLE, - } - } - }, - } - DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs() - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), len(DATA["jobs"])) - for found, expected in zip(jobs, DATA["jobs"]): - name = expected["jobReference"]["jobId"] - self.assertIsInstance(found, JOB_TYPES[name]) - self.assertEqual(found.job_id, name) - self.assertEqual(token, TOKEN) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_explicit_missing(self): - PATH = "projects/%s/jobs" % self.PROJECT - DATA = {} - TOKEN = "TOKEN" - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection(DATA) - - iterator = client.list_jobs( - max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" - ) - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - page = next(iterator.pages) - - final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) - jobs = list(page) - token = iterator.next_page_token - - self.assertEqual(len(jobs), 0) - self.assertIsNone(token) - - conn.api_request.assert_called_once_with( - method="GET", - path="/%s" % PATH, - query_params={ - "projection": "full", - "maxResults": 1000, - "pageToken": TOKEN, - "allUsers": True, - "stateFilter": "done", - }, - timeout=None, - ) - - def test_list_jobs_w_project(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(project="other-project")) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/other-project/jobs", - query_params={"projection": "full"}, - timeout=None, - ) - - def test_list_jobs_w_timeout(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - list(client.list_jobs(timeout=7.5)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/{}/jobs".format(self.PROJECT), - query_params={"projection": "full"}, - timeout=7.5, - ) - - def test_list_jobs_w_time_filter(self): - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}) - - # One millisecond after the unix epoch. - start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) - # One millisecond after the the 2038 31-bit signed int rollover - end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) - end_time_millis = (((2 ** 31) - 1) * 1000) + 1 - - list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) - - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={ - "projection": "full", - "minCreationTime": "1", - "maxCreationTime": str(end_time_millis), - }, - timeout=None, - ) - - def test_list_jobs_w_parent_job_filter(self): - from google.cloud.bigquery import job - - creds = _make_credentials() - client = self._make_one(self.PROJECT, creds) - conn = client._connection = make_connection({}, {}) - - parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] - - for parent_job in parent_job_args: - list(client.list_jobs(parent_job=parent_job)) - conn.api_request.assert_called_once_with( - method="GET", - path="/projects/%s/jobs" % self.PROJECT, - query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, - ) - conn.api_request.reset_mock() - def test_load_table_from_uri(self): from google.cloud.bigquery.job import LoadJob, LoadJobConfig diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py new file mode 100644 index 000000000..7793a7ba6 --- /dev/null +++ b/tests/unit/test_list_datasets.py @@ -0,0 +1,124 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_datasets_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.dataset import DatasetListItem + + DATASET_1 = "dataset_one" + DATASET_2 = "dataset_two" + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "datasets": [ + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_1), + "datasetReference": {"datasetId": DATASET_1, "projectId": PROJECT}, + "friendlyName": None, + }, + { + "kind": "bigquery#dataset", + "id": "%s:%s" % (PROJECT, DATASET_2), + "datasetReference": {"datasetId": DATASET_2, "projectId": PROJECT}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == len(DATA["datasets"]) + for found, expected in zip(datasets, DATA["datasets"]): + assert isinstance(found, DatasetListItem) + assert found.full_dataset_id == expected["id"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/%s" % PATH, query_params=query, timeout=None + ) + + +def test_list_datasets_w_project_and_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + list(client.list_datasets(project="other-project", timeout=7.5)) + + final_attributes.assert_called_once_with( + {"path": "/projects/other-project/datasets"}, client, None + ) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/datasets", + query_params={}, + timeout=7.5, + ) + + +def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): + PATH = "projects/%s/datasets" % PROJECT + TOKEN = "TOKEN" + FILTER = "FILTER" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_datasets( + include_all=True, filter=FILTER, max_results=3, page_token=TOKEN + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + datasets = list(page) + token = iterator.next_page_token + + assert len(datasets) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "all": True, + "filter": FILTER, + "maxResults": 3, + "pageToken": TOKEN, + }, + timeout=None, + ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py new file mode 100644 index 000000000..f348be724 --- /dev/null +++ b/tests/unit/test_list_jobs.py @@ -0,0 +1,291 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): + from google.cloud.bigquery.job import CopyJob + from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import ExtractJob + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import QueryJob + from google.cloud.bigquery.job import WriteDisposition + + SOURCE_TABLE = "source_table" + DESTINATION_TABLE = "destination_table" + QUERY_DESTINATION_TABLE = "query_destination_table" + SOURCE_URI = "gs://test_bucket/src_object*" + DESTINATION_URI = "gs://test_bucket/dst_object*" + JOB_TYPES = { + "load_job": LoadJob, + "copy_job": CopyJob, + "extract_job": ExtractJob, + "query_job": QueryJob, + } + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + QUERY = "SELECT * from test_dataset:test_table" + ASYNC_QUERY_DATA = { + "id": "%s:%s" % (PROJECT, "query_job"), + "jobReference": {"projectId": PROJECT, "jobId": "query_job"}, + "state": "DONE", + "configuration": { + "query": { + "query": QUERY, + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": QUERY_DESTINATION_TABLE, + }, + "createDisposition": CreateDisposition.CREATE_IF_NEEDED, + "writeDisposition": WriteDisposition.WRITE_TRUNCATE, + } + }, + } + EXTRACT_DATA = { + "id": "%s:%s" % (PROJECT, "extract_job"), + "jobReference": {"projectId": PROJECT, "jobId": "extract_job"}, + "state": "DONE", + "configuration": { + "extract": { + "sourceTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "destinationUris": [DESTINATION_URI], + } + }, + } + COPY_DATA = { + "id": "%s:%s" % (PROJECT, "copy_job"), + "jobReference": {"projectId": PROJECT, "jobId": "copy_job"}, + "state": "DONE", + "configuration": { + "copy": { + "sourceTables": [ + {"projectId": PROJECT, "datasetId": DS_ID, "tableId": SOURCE_TABLE} + ], + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": DESTINATION_TABLE, + }, + } + }, + } + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + }, + "sourceUris": [SOURCE_URI], + } + }, + } + DATA = { + "nextPageToken": TOKEN, + "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs(**extra) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params=dict({"projection": "full"}, **query), + timeout=None, + ) + + +def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): + from google.cloud.bigquery.job import LoadJob + + SOURCE_TABLE = "source_table" + JOB_TYPES = {"load_job": LoadJob} + PATH = "projects/%s/jobs" % PROJECT + TOKEN = "TOKEN" + LOAD_DATA = { + "id": "%s:%s" % (PROJECT, "load_job"), + "jobReference": {"projectId": PROJECT, "jobId": "load_job"}, + "state": "DONE", + "configuration": { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": DS_ID, + "tableId": SOURCE_TABLE, + } + } + }, + } + DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]} + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs() + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == len(DATA["jobs"]) + for found, expected in zip(jobs, DATA["jobs"]): + name = expected["jobReference"]["jobId"] + assert isinstance(found, JOB_TYPES[name]) + assert found.job_id == name + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_explicit_missing(client, PROJECT): + PATH = "projects/%s/jobs" % PROJECT + DATA = {} + TOKEN = "TOKEN" + conn = client._connection = make_connection(DATA) + + iterator = client.list_jobs( + max_results=1000, page_token=TOKEN, all_users=True, state_filter="done" + ) + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None) + jobs = list(page) + token = iterator.next_page_token + + assert len(jobs) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={ + "projection": "full", + "maxResults": 1000, + "pageToken": TOKEN, + "allUsers": True, + "stateFilter": "done", + }, + timeout=None, + ) + + +def test_list_jobs_w_project(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(project="other-project")) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/other-project/jobs", + query_params={"projection": "full"}, + timeout=None, + ) + + +def test_list_jobs_w_timeout(client, PROJECT): + conn = client._connection = make_connection({}) + + list(client.list_jobs(timeout=7.5)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/{}/jobs".format(PROJECT), + query_params={"projection": "full"}, + timeout=7.5, + ) + + +def test_list_jobs_w_time_filter(client, PROJECT): + conn = client._connection = make_connection({}) + + # One millisecond after the unix epoch. + start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000) + # One millisecond after the the 2038 31-bit signed int rollover + end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000) + end_time_millis = (((2 ** 31) - 1) * 1000) + 1 + + list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time)) + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={ + "projection": "full", + "minCreationTime": "1", + "maxCreationTime": str(end_time_millis), + }, + timeout=None, + ) + + +def test_list_jobs_w_parent_job_filter(client, PROJECT): + from google.cloud.bigquery import job + + conn = client._connection = make_connection({}, {}) + + parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)] + + for parent_job in parent_job_args: + list(client.list_jobs(parent_job=parent_job)) + conn.api_request.assert_called_once_with( + method="GET", + path="/projects/%s/jobs" % PROJECT, + query_params={"projection": "full", "parentJobId": "parent-job-123"}, + timeout=None, + ) + conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 56aa66126..4ede9a7dd 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -33,8 +33,13 @@ def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_ID): +def test_list_models_defaults( + make_dataset, get_reference, client, PROJECT, DS_ID, extra, query, +): from google.cloud.bigquery.model import Model MODEL_1 = "model_one" @@ -64,7 +69,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I conn = client._connection = make_connection(DATA) dataset = make_dataset(PROJECT, DS_ID) - iterator = client.list_models(dataset) + iterator = client.list_models(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) models = list(page) @@ -77,7 +82,7 @@ def test_list_models_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py new file mode 100644 index 000000000..a88540dd5 --- /dev/null +++ b/tests/unit/test_list_projects.py @@ -0,0 +1,119 @@ +# Copyright 2021 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import mock +import pytest + +from .helpers import make_connection + + +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) +def test_list_projects_defaults(client, PROJECT, extra, query): + from google.cloud.bigquery.client import Project + + PROJECT_2 = "PROJECT_TWO" + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [ + { + "kind": "bigquery#project", + "id": PROJECT, + "numericId": 1, + "projectReference": {"projectId": PROJECT}, + "friendlyName": "One", + }, + { + "kind": "bigquery#project", + "id": PROJECT_2, + "numericId": 2, + "projectReference": {"projectId": PROJECT_2}, + "friendlyName": "Two", + }, + ], + } + conn = client._connection = make_connection(DATA) + iterator = client.list_projects(**extra) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == len(DATA["projects"]) + for found, expected in zip(projects, DATA["projects"]): + assert isinstance(found, Project) + assert found.project_id == expected["id"] + assert found.numeric_id == expected["numericId"] + assert found.friendly_name == expected["friendlyName"] + assert token == TOKEN + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params=query, timeout=None + ) + + +def test_list_projects_w_timeout(client): + TOKEN = "TOKEN" + DATA = { + "nextPageToken": TOKEN, + "projects": [], + } + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(timeout=7.5) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + + conn.api_request.assert_called_once_with( + method="GET", path="/projects", query_params={}, timeout=7.5 + ) + + +def test_list_projects_explicit_response_missing_projects_key(client): + TOKEN = "TOKEN" + DATA = {} + conn = client._connection = make_connection(DATA) + + iterator = client.list_projects(max_results=3, page_token=TOKEN) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + page = next(iterator.pages) + + final_attributes.assert_called_once_with({"path": "/projects"}, client, None) + projects = list(page) + token = iterator.next_page_token + + assert len(projects) == 0 + assert token is None + + conn.api_request.assert_called_once_with( + method="GET", + path="/projects", + query_params={"maxResults": 3, "pageToken": TOKEN}, + timeout=None, + ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 714ede0d4..069966542 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -34,8 +34,13 @@ def test_list_routines_empty_w_timeout(client): ) +@pytest.mark.parametrize( + "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))] +) @dataset_polymorphic -def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): +def test_list_routines_defaults( + make_dataset, get_reference, client, PROJECT, extra, query +): from google.cloud.bigquery.routine import Routine project_id = PROJECT @@ -67,7 +72,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): conn = client._connection = make_connection(resource) dataset = make_dataset(client.project, dataset_id) - iterator = client.list_routines(dataset) + iterator = client.list_routines(dataset, **extra) assert iterator.dataset == get_reference(dataset) page = next(iterator.pages) routines = list(page) @@ -80,7 +85,7 @@ def test_list_routines_defaults(make_dataset, get_reference, client, PROJECT): assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params={}, timeout=None + method="GET", path=path, query_params=query, timeout=None ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 9acee9580..45d15bed3 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -157,3 +157,22 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): def test_list_tables_wrong_type(client): with pytest.raises(TypeError): client.list_tables(42) + + +@dataset_polymorphic +def test_list_tables_page_size(make_dataset, get_reference, client, PROJECT, DS_ID): + path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID) + conn = client._connection = make_connection({}) + + dataset = make_dataset(PROJECT, DS_ID) + iterator = client.list_tables(dataset, timeout=7.5, page_size=42) + assert iterator.dataset == get_reference(dataset) + page = next(iterator.pages) + tables = list(page) + token = iterator.next_page_token + + assert tables == [] + assert token is None + conn.api_request.assert_called_once_with( + method="GET", path=path, query_params=dict(maxResults=42), timeout=7.5 + ) From d034a4d34be500f665bfa75c53d1badcb6750445 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Sun, 6 Jun 2021 08:57:21 -0600 Subject: [PATCH 122/230] chore: release 2.19.0 (#688) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8abc5abb..a7d62cd36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) + + +### Features + +* list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size ([#686](https://www.github.com/googleapis/python-bigquery/issues/686)) ([1f1c4b7](https://www.github.com/googleapis/python-bigquery/commit/1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06)) + ## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a613e5ea2..2605c08a3 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.18.0" +__version__ = "2.19.0" From 1259e16394784315368e8be959c1ac097782b62e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 7 Jun 2021 14:55:58 +0200 Subject: [PATCH 123/230] feat: support script options in query job config (#690) --- google/cloud/bigquery/__init__.py | 4 ++ google/cloud/bigquery/enums.py | 13 ++++ google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 96 ++++++++++++++++++++++++++- tests/unit/job/test_query_config.py | 56 ++++++++++++++++ 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index f031cd81d..94f87304a 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -62,6 +63,7 @@ from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority from google.cloud.bigquery.job import SchemaUpdateOption +from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob from google.cloud.bigquery.job import WriteDisposition @@ -138,6 +140,7 @@ "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", + "ScriptOptions", "DEFAULT_RETRY", # Enum Constants "enums", @@ -147,6 +150,7 @@ "DeterminismLevel", "ExternalSourceFormat", "Encoding", + "KeyResultStatementKind", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 787c2449d..edf991b6f 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -142,6 +142,19 @@ class SourceFormat(object): """Specifies Orc format.""" +class KeyResultStatementKind: + """Determines which statement in the script represents the "key result". + + The "key result" is used to populate the schema and query results of the script job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#keyresultstatementkind + """ + + KEY_RESULT_STATEMENT_KIND_UNSPECIFIED = "KEY_RESULT_STATEMENT_KIND_UNSPECIFIED" + LAST = "LAST" + FIRST_SELECT = "FIRST_SELECT" + + _SQL_SCALAR_TYPES = frozenset( ( "INT64", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4945841d9..cdab92e05 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -34,6 +34,7 @@ from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry from google.cloud.bigquery.job.query import QueryPlanEntryStep +from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition @@ -67,6 +68,7 @@ "QueryJobConfig", "QueryPlanEntry", "QueryPlanEntryStep", + "ScriptOptions", "TimelineEntry", "Compression", "CreateDisposition", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f52f9c621..455ef4632 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -18,7 +18,7 @@ import copy import re import typing -from typing import Any, Dict, Union +from typing import Any, Dict, Optional, Union from google.api_core import exceptions from google.api_core.future import polling as polling_future @@ -28,6 +28,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery import _helpers from google.cloud.bigquery.query import _query_param_from_api_repr @@ -113,6 +114,82 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class ScriptOptions: + """Options controlling the execution of scripts. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ScriptOptions + """ + + def __init__( + self, + statement_timeout_ms: Optional[int] = None, + statement_byte_budget: Optional[int] = None, + key_result_statement: Optional[KeyResultStatementKind] = None, + ): + self._properties = {} + self.statement_timeout_ms = statement_timeout_ms + self.statement_byte_budget = statement_byte_budget + self.key_result_statement = key_result_statement + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "ScriptOptions": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: Any]): + ScriptOptions representation returned from API. + + Returns: + google.cloud.bigquery.ScriptOptions: + ScriptOptions sample parsed from ``resource``. + """ + entry = cls() + entry._properties = copy.deepcopy(resource) + return entry + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation.""" + return copy.deepcopy(self._properties) + + @property + def statement_timeout_ms(self) -> Union[int, None]: + """Timeout period for each statement in a script.""" + return _helpers._int_or_none(self._properties.get("statementTimeoutMs")) + + @statement_timeout_ms.setter + def statement_timeout_ms(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementTimeoutMs"] = value + + @property + def statement_byte_budget(self) -> Union[int, None]: + """Limit on the number of bytes billed per statement. + + Exceeding this budget results in an error. + """ + return _helpers._int_or_none(self._properties.get("statementByteBudget")) + + @statement_byte_budget.setter + def statement_byte_budget(self, value: Union[int, None]): + if value is not None: + value = str(value) + self._properties["statementByteBudget"] = value + + @property + def key_result_statement(self) -> Union[KeyResultStatementKind, None]: + """Determines which statement in the script represents the "key result". + + This is used to populate the schema and query results of the script job. + Default is ``KeyResultStatementKind.LAST``. + """ + return self._properties.get("keyResultStatement") + + @key_result_statement.setter + def key_result_statement(self, value: Union[KeyResultStatementKind, None]): + self._properties["keyResultStatement"] = value + + class QueryJobConfig(_JobConfig): """Configuration options for query jobs. @@ -502,6 +579,23 @@ def schema_update_options(self): def schema_update_options(self, values): self._set_sub_prop("schemaUpdateOptions", values) + @property + def script_options(self) -> ScriptOptions: + """Connection properties which can modify the query behavior. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions + """ + prop = self._get_sub_prop("scriptOptions") + if prop is not None: + prop = ScriptOptions.from_api_repr(prop) + return prop + + @script_options.setter + def script_options(self, value: Union[ScriptOptions, None]): + if value is not None: + value = value.to_api_repr() + self._set_sub_prop("scriptOptions", value) + def to_api_repr(self) -> dict: """Build an API representation of the query job config. diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index db03d6a3b..109cf7e44 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -253,3 +253,59 @@ def test_from_api_repr_with_encryption(self): self.assertEqual( config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME ) + + def test_to_api_repr_with_script_options_none(self): + config = self._make_one() + config.script_options = None + + resource = config.to_api_repr() + + self.assertEqual(resource, {"query": {"scriptOptions": None}}) + self.assertIsNone(config.script_options) + + def test_to_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + config = self._make_one() + config.script_options = ScriptOptions( + statement_timeout_ms=60, + statement_byte_budget=999, + key_result_statement=KeyResultStatementKind.FIRST_SELECT, + ) + + resource = config.to_api_repr() + + expected_script_options_repr = { + "statementTimeoutMs": "60", + "statementByteBudget": "999", + "keyResultStatement": KeyResultStatementKind.FIRST_SELECT, + } + self.assertEqual( + resource, {"query": {"scriptOptions": expected_script_options_repr}} + ) + + def test_from_api_repr_with_script_options(self): + from google.cloud.bigquery import KeyResultStatementKind + from google.cloud.bigquery import ScriptOptions + + resource = { + "query": { + "scriptOptions": { + "statementTimeoutMs": "42", + "statementByteBudget": "123", + "keyResultStatement": KeyResultStatementKind.LAST, + }, + }, + } + klass = self._get_target_class() + + config = klass.from_api_repr(resource) + + script_options = config.script_options + self.assertIsInstance(script_options, ScriptOptions) + self.assertEqual(script_options.statement_timeout_ms, 42) + self.assertEqual(script_options.statement_byte_budget, 123) + self.assertEqual( + script_options.key_result_statement, KeyResultStatementKind.LAST + ) From ed4286dcd39f8d60c2d6552b89bc12352b9bea91 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 7 Jun 2021 14:56:55 +0200 Subject: [PATCH 124/230] chore(deps): update dependency google-cloud-bigquery to v2.19.0 (#691) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4577dff02..b4dae32e9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 94fe39299..0188bde52 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.18.0 +google-cloud-bigquery==2.19.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 23140d943fc06652a19fa8866ccf6d8ba126318d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 7 Jun 2021 15:30:36 +0200 Subject: [PATCH 125/230] chore: release 2.20.0 (#693) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a7d62cd36..b08cd98c7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) + + +### Features + +* support script options in query job config ([#690](https://www.github.com/googleapis/python-bigquery/issues/690)) ([1259e16](https://www.github.com/googleapis/python-bigquery/commit/1259e16394784315368e8be959c1ac097782b62e)) + ## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2605c08a3..9fea4fece 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.19.0" +__version__ = "2.20.0" From 790d11bdbbf50d2f30c09ba4660d420ba2212866 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 8 Jun 2021 13:38:51 +0200 Subject: [PATCH 126/230] chore(deps): update dependency google-cloud-bigquery to v2.20.0 (#694) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index b4dae32e9..80fa8e454 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 0188bde52..391c85ae3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.19.0 +google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 grpcio==1.38.0 From 8cfdda8e30a50780ea27bc515a43502fd402280c Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Wed, 16 Jun 2021 07:17:37 -0400 Subject: [PATCH 127/230] chore: pin sphinx to version 4.0.1 (#701) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: pin sphinx to version 4.0.1 Fixes #700 * 🦉 Updates from OwlBot Co-authored-by: Owl Bot --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index a52025635..662abbd78 100644 --- a/noxfile.py +++ b/noxfile.py @@ -271,7 +271,7 @@ def blacken(session): def docs(session): """Build the docs.""" - session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme") + session.install("ipython", "recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme") session.install("google-cloud-storage") session.install("-e", ".[all]") @@ -295,7 +295,9 @@ def docfx(session): """Build the docfx yaml files for this library.""" session.install("-e", ".") - session.install("sphinx", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml") + session.install( + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) session.run( From b85c8d36e94e0aaf80ba3830ec767eea7153f14c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 17 Jun 2021 06:02:02 +0000 Subject: [PATCH 128/230] chore: remove u'' prefixes from strings in docs/conf.py (#702) Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index da616c91a..ea06d395e 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:c66ba3c8d7bc8566f47df841f98cd0097b28fff0b1864c86f5817f4c8c3e8600 + digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce diff --git a/docs/conf.py b/docs/conf.py index 1275fe3f1..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -80,9 +80,9 @@ master_doc = "index" # General information about the project. -project = u"google-cloud-bigquery" -copyright = u"2019, Google" -author = u"Google APIs" +project = "google-cloud-bigquery" +copyright = "2019, Google" +author = "Google APIs" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -282,7 +282,7 @@ ( master_doc, "google-cloud-bigquery.tex", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "manual", ) @@ -317,7 +317,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", [author], 1, ) @@ -336,7 +336,7 @@ ( master_doc, "google-cloud-bigquery", - u"google-cloud-bigquery Documentation", + "google-cloud-bigquery Documentation", author, "google-cloud-bigquery", "google-cloud-bigquery Library", From b2a689b05b4fdc3fc10767f44534d183137b9d21 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 17 Jun 2021 22:59:26 +0200 Subject: [PATCH 129/230] test: add column ACLs test with real policy tag (#678) * test: add column ACLs test with real policy tag * Use v1 version of the datacatalog client * Install datacatalog in pre-releease tests * Adjust test to actually make it work * Make sure taxonomy is properly cleaned up --- noxfile.py | 4 +++ tests/system/test_client.py | 68 +++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/noxfile.py b/noxfile.py index 662abbd78..0dfe7bf93 100644 --- a/noxfile.py +++ b/noxfile.py @@ -142,6 +142,9 @@ def system(session): else: session.install("google-cloud-storage", "-c", constraints_path) + # Data Catalog needed for the column ACL test with a real Policy Tag. + session.install("google-cloud-datacatalog", "-c", constraints_path) + session.install("-e", ".[all]", "-c", constraints_path) session.install("ipython", "-c", constraints_path) @@ -211,6 +214,7 @@ def prerelease_deps(session): session.install("--pre", "grpcio", "pandas") session.install( "freezegun", + "google-cloud-datacatalog", "google-cloud-storage", "google-cloud-testutils", "IPython", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index b4b0c053d..f91004eac 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -68,6 +68,8 @@ from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums from google.cloud import storage +from google.cloud.datacatalog_v1 import types as datacatalog_types +from google.cloud.datacatalog_v1 import PolicyTagManagerClient from test_utils.retry import RetryErrors from test_utils.retry import RetryInstanceState @@ -167,6 +169,8 @@ def setUp(self): self.to_delete = [dataset] def tearDown(self): + policy_tag_client = PolicyTagManagerClient() + def _still_in_use(bad_request): return any( error["reason"] == "resourceInUse" for error in bad_request._errors @@ -183,6 +187,8 @@ def _still_in_use(bad_request): retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True) elif isinstance(doomed, (Table, bigquery.TableReference)): retry_in_use(Config.CLIENT.delete_table)(doomed) + elif isinstance(doomed, datacatalog_types.Taxonomy): + policy_tag_client.delete_taxonomy(name=doomed.name) else: doomed.delete() @@ -381,6 +387,68 @@ def test_create_table_with_policy(self): table2 = Config.CLIENT.update_table(table, ["schema"]) self.assertEqual(policy_2, table2.schema[1].policy_tags) + def test_create_table_with_real_custom_policy(self): + from google.cloud.bigquery.schema import PolicyTagList + + policy_tag_client = PolicyTagManagerClient() + taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" + + new_taxonomy = datacatalog_types.Taxonomy( + display_name="Custom test taxonomy", + description="This taxonomy is ony used for a test.", + activated_policy_types=[ + datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL + ], + ) + + taxonomy = policy_tag_client.create_taxonomy( + parent=taxonomy_parent, taxonomy=new_taxonomy + ) + self.to_delete.insert(0, taxonomy) + + parent_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Parent policy tag", parent_policy_tag=None + ), + ) + child_policy_tag = policy_tag_client.create_policy_tag( + parent=taxonomy.name, + policy_tag=datacatalog_types.PolicyTag( + display_name="Child policy tag", + parent_policy_tag=parent_policy_tag.name, + ), + ) + + dataset = self.temp_dataset( + _make_dataset_id("create_table_with_real_custom_policy") + ) + table_id = "test_table" + policy_1 = PolicyTagList(names=[parent_policy_tag.name]) + policy_2 = PolicyTagList(names=[child_policy_tag.name]) + + schema = [ + bigquery.SchemaField( + "first_name", "STRING", mode="REQUIRED", policy_tags=policy_1 + ), + bigquery.SchemaField( + "age", "INTEGER", mode="REQUIRED", policy_tags=policy_2 + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertCountEqual( + list(table.schema[0].policy_tags.names), [parent_policy_tag.name] + ) + self.assertCountEqual( + list(table.schema[1].policy_tags.names), [child_policy_tag.name] + ) + def test_create_table_w_time_partitioning_w_clustering_fields(self): from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.table import TimePartitioningType From 9cdeee0255f62cab0d0394430ff211403773d0a0 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 18 Jun 2021 22:35:35 +0200 Subject: [PATCH 130/230] chore: resolve deprecation warnings from sys tests (#705) --- tests/system/test_client.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f91004eac..c4caadbe9 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1123,7 +1123,7 @@ def test_extract_table(self): job.result(timeout=100) self.to_delete.insert(0, destination) - got_bytes = retry_storage_errors(destination.download_as_string)() + got_bytes = retry_storage_errors(destination.download_as_bytes)() got = got_bytes.decode("utf-8") self.assertIn("Bharney Rhubble", got) @@ -2178,15 +2178,11 @@ def test_nested_table_to_arrow(self): self.assertEqual(tbl.num_rows, 1) self.assertEqual(tbl.num_columns, 3) # Columns may not appear in the requested order. - self.assertTrue( - pyarrow.types.is_float64(tbl.schema.field_by_name("float_col").type) - ) - self.assertTrue( - pyarrow.types.is_string(tbl.schema.field_by_name("string_col").type) - ) - record_col = tbl.schema.field_by_name("record_col").type + self.assertTrue(pyarrow.types.is_float64(tbl.schema.field("float_col").type)) + self.assertTrue(pyarrow.types.is_string(tbl.schema.field("string_col").type)) + record_col = tbl.schema.field("record_col").type self.assertTrue(pyarrow.types.is_struct(record_col)) - self.assertEqual(record_col.num_children, 2) + self.assertEqual(record_col.num_fields, 2) self.assertEqual(record_col[0].name, "nested_string") self.assertTrue(pyarrow.types.is_string(record_col[0].type)) self.assertEqual(record_col[1].name, "nested_repeated") From 27d6839ee8a40909e4199cfa0da8b6b64705b2e9 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sat, 19 Jun 2021 02:26:03 +0000 Subject: [PATCH 131/230] docs: omit mention of Python 2.7 in `CONTRIBUTING.rst` (#706) Source-Link: https://github.com/googleapis/synthtool/commit/b91f129527853d5b756146a0b5044481fb4e09a8 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 7 ++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index ea06d395e..cc49c6a3d 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:58c7342b0bccf85028100adaa3d856cb4a871c22ca9c01960d996e66c40548ce + digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 20ba9e62e..a9b389e83 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -69,7 +69,6 @@ We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: - $ nox -s unit-2.7 $ nox -s unit-3.8 $ ... @@ -144,7 +143,6 @@ Running System Tests # Run all system tests $ nox -s system-3.8 - $ nox -s system-2.7 # Run a single system test $ nox -s system-3.8 -- -k @@ -152,9 +150,8 @@ Running System Tests .. note:: - System tests are only configured to run under Python 2.7 and - Python 3.8. For expediency, we do not run them in older versions - of Python 3. + System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to From 0b20015c1727a2d7cd4234b18210db8d04d7ca77 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Sun, 20 Jun 2021 01:04:03 +0000 Subject: [PATCH 132/230] chore: update precommit hook pre-commit/pre-commit-hooks to v4 (#1083) (#709) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [pre-commit/pre-commit-hooks](https://togithub.com/pre-commit/pre-commit-hooks) | repository | major | `v3.4.0` -> `v4.0.1` | --- ### Release Notes
pre-commit/pre-commit-hooks ### [`v4.0.1`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.1) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v4.0.0...v4.0.1) ##### Fixes - `check-shebang-scripts-are-executable` fix entry point. - [#​602](https://togithub.com/pre-commit/pre-commit-hooks/issues/602) issue by [@​Person-93](https://togithub.com/Person-93). - [#​603](https://togithub.com/pre-commit/pre-commit-hooks/issues/603) PR by [@​scop](https://togithub.com/scop). ### [`v4.0.0`](https://togithub.com/pre-commit/pre-commit-hooks/releases/v4.0.0) [Compare Source](https://togithub.com/pre-commit/pre-commit-hooks/compare/v3.4.0...v4.0.0) ##### Features - `check-json`: report duplicate keys. - [#​558](https://togithub.com/pre-commit/pre-commit-hooks/issues/558) PR by [@​AdityaKhursale](https://togithub.com/AdityaKhursale). - [#​554](https://togithub.com/pre-commit/pre-commit-hooks/issues/554) issue by [@​adamchainz](https://togithub.com/adamchainz). - `no-commit-to-branch`: add `main` to default blocked branches. - [#​565](https://togithub.com/pre-commit/pre-commit-hooks/issues/565) PR by [@​ndevenish](https://togithub.com/ndevenish). - `check-case-conflict`: check conflicts in directory names as well. - [#​575](https://togithub.com/pre-commit/pre-commit-hooks/issues/575) PR by [@​slsyy](https://togithub.com/slsyy). - [#​70](https://togithub.com/pre-commit/pre-commit-hooks/issues/70) issue by [@​andyjack](https://togithub.com/andyjack). - `check-vcs-permalinks`: forbid other branch names. - [#​582](https://togithub.com/pre-commit/pre-commit-hooks/issues/582) PR by [@​jack1142](https://togithub.com/jack1142). - [#​581](https://togithub.com/pre-commit/pre-commit-hooks/issues/581) issue by [@​jack1142](https://togithub.com/jack1142). - `check-shebang-scripts-are-executable`: new hook which ensures shebang'd scripts are executable. - [#​545](https://togithub.com/pre-commit/pre-commit-hooks/issues/545) PR by [@​scop](https://togithub.com/scop). ##### Fixes - `check-executables-have-shebangs`: Short circuit shebang lookup on windows. - [#​544](https://togithub.com/pre-commit/pre-commit-hooks/issues/544) PR by [@​scop](https://togithub.com/scop). - `requirements-txt-fixer`: Fix comments which have indentation - [#​549](https://togithub.com/pre-commit/pre-commit-hooks/issues/549) PR by [@​greshilov](https://togithub.com/greshilov). - [#​548](https://togithub.com/pre-commit/pre-commit-hooks/issues/548) issue by [@​greshilov](https://togithub.com/greshilov). - `pretty-format-json`: write to stdout using UTF-8 encoding. - [#​571](https://togithub.com/pre-commit/pre-commit-hooks/issues/571) PR by [@​jack1142](https://togithub.com/jack1142). - [#​570](https://togithub.com/pre-commit/pre-commit-hooks/issues/570) issue by [@​jack1142](https://togithub.com/jack1142). - Use more inclusive language. - [#​599](https://togithub.com/pre-commit/pre-commit-hooks/issues/599) PR by [@​asottile](https://togithub.com/asottile). ##### Breaking changes - Remove deprecated hooks: `flake8`, `pyflakes`, `autopep8-wrapper`. - [#​597](https://togithub.com/pre-commit/pre-commit-hooks/issues/597) PR by [@​asottile](https://togithub.com/asottile).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻️ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/synthtool). Source-Link: https://github.com/googleapis/synthtool/commit/333fd90856f1454380514bc59fc0936cdaf1c202 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 --- .github/.OwlBot.lock.yaml | 2 +- .pre-commit-config.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cc49c6a3d..9602d5405 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b6169fc6a5207b11800a7c002d0c5c2bc6d82697185ca12e666f44031468cfcd + digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4f00c7cff..62eb5a77d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,7 +16,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.0.1 hooks: - id: trailing-whitespace - id: end-of-file-fixer From 18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 21 Jun 2021 16:18:27 +0200 Subject: [PATCH 133/230] feat: enable unsetting policy tags on schema fields (#703) * feat: enable unsetting policy tags on schema fields * Adjust API representation for STRUCT schema fields * De-dup logic for converting None policy tags --- google/cloud/bigquery/schema.py | 45 +++++++-- tests/system/test_client.py | 50 ++++++++++ tests/unit/job/test_load_config.py | 4 + tests/unit/test_client.py | 49 +++++++-- tests/unit/test_external_config.py | 9 +- tests/unit/test_schema.py | 154 +++++++++++++++++++++++++---- 6 files changed, 279 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 919d78b23..157db7ce6 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,6 +15,7 @@ """Schemas for BigQuery tables / queries.""" import collections +from typing import Optional from google.cloud.bigquery_v2 import types @@ -105,7 +106,26 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length self._fields = tuple(fields) - self._policy_tags = policy_tags + + self._policy_tags = self._determine_policy_tags(field_type, policy_tags) + + @staticmethod + def _determine_policy_tags( + field_type: str, given_policy_tags: Optional["PolicyTagList"] + ) -> Optional["PolicyTagList"]: + """Return the given policy tags, or their suitable representation if `None`. + + Args: + field_type: The type of the schema field. + given_policy_tags: The policy tags to maybe ajdust. + """ + if given_policy_tags is not None: + return given_policy_tags + + if field_type is not None and field_type.upper() in _STRUCT_TYPES: + return None + + return PolicyTagList() @staticmethod def __get_int(api_repr, name): @@ -126,18 +146,24 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object. """ + field_type = api_repr["type"].upper() + # Handle optional properties with default values mode = api_repr.get("mode", "NULLABLE") description = api_repr.get("description", _DEFAULT_VALUE) fields = api_repr.get("fields", ()) + policy_tags = cls._determine_policy_tags( + field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags")) + ) + return cls( - field_type=api_repr["type"].upper(), + field_type=field_type, fields=[cls.from_api_repr(f) for f in fields], mode=mode.upper(), description=description, name=api_repr["name"], - policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")), + policy_tags=policy_tags, precision=cls.__get_int(api_repr, "precision"), scale=cls.__get_int(api_repr, "scale"), max_length=cls.__get_int(api_repr, "maxLength"), @@ -218,9 +244,9 @@ def to_api_repr(self) -> dict: # add this to the serialized representation. if self.field_type.upper() in _STRUCT_TYPES: answer["fields"] = [f.to_api_repr() for f in self.fields] - - # If this contains a policy tag definition, include that as well: - if self.policy_tags is not None: + else: + # Explicitly include policy tag definition (we must not do it for RECORD + # fields, because those are not leaf fields). answer["policyTags"] = self.policy_tags.to_api_repr() # Done; return the serialized dictionary. @@ -244,6 +270,11 @@ def _key(self): field_type = f"{field_type}({self.precision}, {self.scale})" else: field_type = f"{field_type}({self.precision})" + + policy_tags = ( + () if self._policy_tags is None else tuple(sorted(self._policy_tags.names)) + ) + return ( self.name, field_type, @@ -251,7 +282,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.description, self._fields, - self._policy_tags, + policy_tags, ) def to_standard_sql(self) -> types.StandardSqlField: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c4caadbe9..ce3021399 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -653,6 +653,56 @@ def test_update_table_schema(self): self.assertEqual(found.field_type, expected.field_type) self.assertEqual(found.mode, expected.mode) + def test_unset_table_schema_attributes(self): + from google.cloud.bigquery.schema import PolicyTagList + + dataset = self.temp_dataset(_make_dataset_id("unset_policy_tags")) + table_id = "test_table" + policy_tags = PolicyTagList( + names=[ + "projects/{}/locations/us/taxonomies/1/policyTags/2".format( + Config.CLIENT.project + ), + ] + ) + + schema = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField( + "secret_int", + "INTEGER", + mode="REQUIRED", + description="This field is numeric", + policy_tags=policy_tags, + ), + ] + table_arg = Table(dataset.table(table_id), schema=schema) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(policy_tags, table.schema[1].policy_tags) + + # Amend the schema to replace the policy tags + new_schema = table.schema[:] + old_field = table.schema[1] + new_schema[1] = bigquery.SchemaField( + name=old_field.name, + field_type=old_field.field_type, + mode=old_field.mode, + description=None, + fields=old_field.fields, + policy_tags=None, + ) + + table.schema = new_schema + updated_table = Config.CLIENT.update_table(table, ["schema"]) + + self.assertFalse(updated_table.schema[1].description) # Empty string or None. + self.assertEqual(updated_table.schema[1].policy_tags.names, ()) + def test_update_table_clustering_configuration(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index b0729e428..eafe7e046 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -434,11 +434,13 @@ def test_schema_setter_fields(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } self.assertEqual( config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} @@ -451,11 +453,13 @@ def test_schema_setter_valid_mappings_list(self): "name": "full_name", "type": "STRING", "mode": "REQUIRED", + "policyTags": {"names": []}, } age_repr = { "name": "age", "type": "INTEGER", "mode": "REQUIRED", + "policyTags": {"names": []}, } schema = [full_name_repr, age_repr] config.schema = schema diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 7a28ef248..f6811e207 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -1019,8 +1019,18 @@ def test_create_table_w_schema_and_query(self): { "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query}, @@ -1054,8 +1064,18 @@ def test_create_table_w_schema_and_query(self): }, "schema": { "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ] }, "view": {"query": query, "useLegacySql": False}, @@ -2000,12 +2020,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2047,12 +2069,14 @@ def test_update_table(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "New field description", + "policyTags": {"names": []}, }, ] }, @@ -2173,14 +2197,21 @@ def test_update_table_w_query(self): "type": "STRING", "mode": "REQUIRED", "description": None, + "policyTags": {"names": []}, }, { "name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": "this is a column", + "policyTags": {"names": []}, + }, + { + "name": "country", + "type": "STRING", + "mode": "NULLABLE", + "policyTags": {"names": []}, }, - {"name": "country", "type": "STRING", "mode": "NULLABLE"}, ] } schema = [ @@ -6516,10 +6547,10 @@ def test_load_table_from_dataframe(self): assert field["type"] == table_field.field_type assert field["mode"] == table_field.mode assert len(field.get("fields", [])) == len(table_field.fields) + assert field["policyTags"]["names"] == [] # Omit unnecessary fields when they come from getting the table # (not passed in via job_config) assert "description" not in field - assert "policyTags" not in field @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -7718,18 +7749,21 @@ def test_schema_to_json_with_file_path(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] @@ -7762,18 +7796,21 @@ def test_schema_to_json_with_file_object(self): "description": "quarter", "mode": "REQUIRED", "name": "qtr", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "sales representative", "mode": "NULLABLE", "name": "rep", + "policyTags": {"names": []}, "type": "STRING", }, { "description": "total sales", "mode": "NULLABLE", "name": "sales", + "policyTags": {"names": []}, "type": "FLOAT", }, ] diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7178367ea..393df931e 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -78,7 +78,14 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { - "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] + "fields": [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + } + ] } got_resource = ec.to_api_repr() exp_resource = { diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 29c3bace5..d0b5ca54c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from google.cloud.bigquery.schema import PolicyTagList import unittest import mock @@ -41,6 +42,7 @@ def test_constructor_defaults(self): self.assertEqual(field.mode, "NULLABLE") self.assertIsNone(field.description) self.assertEqual(field.fields, ()) + self.assertEqual(field.policy_tags, PolicyTagList()) def test_constructor_explicit(self): field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing") @@ -104,7 +106,14 @@ def test_to_api_repr_with_subfield(self): self.assertEqual( field.to_api_repr(), { - "fields": [{"mode": "NULLABLE", "name": "bar", "type": "INTEGER"}], + "fields": [ + { + "mode": "NULLABLE", + "name": "bar", + "type": "INTEGER", + "policyTags": {"names": []}, + } + ], "mode": "REQUIRED", "name": "foo", "type": record_type, @@ -404,6 +413,23 @@ def test___eq___hit_w_fields(self): other = self._make_one("test", "RECORD", fields=[sub1, sub2]) self.assertEqual(field, other) + def test___eq___hit_w_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["bar", "foo"]), + ) + self.assertEqual(field, other) # Policy tags order does not matter. + def test___ne___wrong_type(self): field = self._make_one("toast", "INTEGER") other = object() @@ -426,6 +452,23 @@ def test___ne___different_values(self): ) self.assertNotEqual(field1, field2) + def test___ne___different_policy_tags(self): + field = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "bar"]), + ) + other = self._make_one( + "test", + "STRING", + mode="REQUIRED", + description="Testing", + policy_tags=PolicyTagList(names=["foo", "baz"]), + ) + self.assertNotEqual(field, other) + def test___hash__set_equality(self): sub1 = self._make_one("sub1", "STRING") sub2 = self._make_one("sub2", "STRING") @@ -446,7 +489,7 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)" + expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), ())" self.assertEqual(repr(field1), expected) @@ -524,10 +567,22 @@ def test_defaults(self): resource = self._call_fut([full_name, age]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( - resource[1], {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} + resource[1], + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) def test_w_description(self): @@ -553,11 +608,18 @@ def test_w_description(self): "type": "STRING", "mode": "REQUIRED", "description": DESCRIPTION, + "policyTags": {"names": []}, }, ) self.assertEqual( resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None}, + { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + "policyTags": {"names": []}, + }, ) def test_w_subfields(self): @@ -572,7 +634,13 @@ def test_w_subfields(self): resource = self._call_fut([full_name, phone]) self.assertEqual(len(resource), 2) self.assertEqual( - resource[0], {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + resource[0], + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ) self.assertEqual( resource[1], @@ -581,8 +649,18 @@ def test_w_subfields(self): "type": "RECORD", "mode": "REPEATED", "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + { + "name": "type", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, + { + "name": "number", + "type": "STRING", + "mode": "REQUIRED", + "policyTags": {"names": []}, + }, ], }, ) @@ -794,43 +872,83 @@ def test_from_api_repr_parameterized(api, expect, key2): [ ( dict(name="n", field_type="NUMERIC"), - dict(name="n", type="NUMERIC", mode="NULLABLE"), + dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="NUMERIC", precision=9), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="NUMERIC", precision=9, scale=2), - dict(name="n", type="NUMERIC", mode="NULLABLE", precision=9, scale=2), + dict( + name="n", + type="NUMERIC", + mode="NULLABLE", + precision=9, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC"), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE"), + dict( + name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []} + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2), - dict(name="n", type="BIGNUMERIC", mode="NULLABLE", precision=40, scale=2), + dict( + name="n", + type="BIGNUMERIC", + mode="NULLABLE", + precision=40, + scale=2, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="STRING"), - dict(name="n", type="STRING", mode="NULLABLE"), + dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="STRING", max_length=9), - dict(name="n", type="STRING", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="STRING", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ( dict(name="n", field_type="BYTES"), - dict(name="n", type="BYTES", mode="NULLABLE"), + dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}), ), ( dict(name="n", field_type="BYTES", max_length=9), - dict(name="n", type="BYTES", mode="NULLABLE", maxLength=9), + dict( + name="n", + type="BYTES", + mode="NULLABLE", + maxLength=9, + policyTags={"names": []}, + ), ), ], ) From ebb066f90ed3b35da0bc9f5f77e65cde35987d67 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 22 Jun 2021 18:30:12 +0000 Subject: [PATCH 134/230] chore: add kokoro 3.9 config templates (#712) Source-Link: https://github.com/googleapis/synthtool/commit/b0eb8a8b30b46a3c98d23c23107acb748c6601a1 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/samples/python3.9/common.cfg | 40 +++++++++++++++++++++ .kokoro/samples/python3.9/continuous.cfg | 6 ++++ .kokoro/samples/python3.9/periodic-head.cfg | 11 ++++++ .kokoro/samples/python3.9/periodic.cfg | 6 ++++ .kokoro/samples/python3.9/presubmit.cfg | 6 ++++ 6 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 .kokoro/samples/python3.9/common.cfg create mode 100644 .kokoro/samples/python3.9/continuous.cfg create mode 100644 .kokoro/samples/python3.9/periodic-head.cfg create mode 100644 .kokoro/samples/python3.9/periodic.cfg create mode 100644 .kokoro/samples/python3.9/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9602d5405..0954585f2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:b8c131c558606d3cea6e18f8e87befbd448c1482319b0db3c5d5388fa6ea72e3 + digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 diff --git a/.kokoro/samples/python3.9/common.cfg b/.kokoro/samples/python3.9/common.cfg new file mode 100644 index 000000000..f179577a5 --- /dev/null +++ b/.kokoro/samples/python3.9/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.9" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-py39" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.9/continuous.cfg b/.kokoro/samples/python3.9/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.9/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.9/periodic-head.cfg b/.kokoro/samples/python3.9/periodic-head.cfg new file mode 100644 index 000000000..f9cfcd33e --- /dev/null +++ b/.kokoro/samples/python3.9/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.9/periodic.cfg b/.kokoro/samples/python3.9/periodic.cfg new file mode 100644 index 000000000..50fec9649 --- /dev/null +++ b/.kokoro/samples/python3.9/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.9/presubmit.cfg b/.kokoro/samples/python3.9/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.9/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file From 92fbd4ade37e0be49dc278080ef73c83eafeea18 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 23 Jun 2021 20:24:30 +0000 Subject: [PATCH 135/230] feat: add always_use_jwt_access (#714) ... chore: update gapic-generator-ruby to the latest commit chore: release gapic-generator-typescript 1.5.0 Committer: @miraleung PiperOrigin-RevId: 380641501 Source-Link: https://github.com/googleapis/googleapis/commit/076f7e9f0b258bdb54338895d7251b202e8f0de3 Source-Link: https://github.com/googleapis/googleapis-gen/commit/27e4c88b4048e5f56508d4e1aa417d60a3380892 --- .coveragerc | 1 - 1 file changed, 1 deletion(-) diff --git a/.coveragerc b/.coveragerc index 23861a8eb..33ea00ba9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,7 +2,6 @@ branch = True [report] -fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 5e9494eb51ca5d31b7277f9f5d6d2d58ea2dd018 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 08:08:14 -0400 Subject: [PATCH 136/230] chore: pin sphinx plugin version to working one (#715) --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..2bc2afde1 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From ab4921347972256dbf5a9737dd42f32e90a38cad Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 24 Jun 2021 20:07:55 +0200 Subject: [PATCH 137/230] chore(deps): update dependency grpcio to v1.38.1 (#713) Co-authored-by: Anthonios Partheniou --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 391c85ae3..669b3ac85 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.20.0 google-cloud-bigquery-storage==2.4.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.0 +grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From b35e1ad47a93ff0997a78496cc59fcdd91345643 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Thu, 24 Jun 2021 16:10:31 -0400 Subject: [PATCH 138/230] Revert "chore: pin sphinx plugin version to working one (#715)" (#719) This reverts commit 5e9494eb51ca5d31b7277f9f5d6d2d58ea2dd018. --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2bc2afde1..0dfe7bf93 100644 --- a/noxfile.py +++ b/noxfile.py @@ -300,7 +300,7 @@ def docfx(session): session.install("-e", ".") session.install( - "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml==0.2.0" + "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml" ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) From 2a9618f4daaa4a014161e1a2f7376844eec9e8da Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 25 Jun 2021 08:56:40 +0200 Subject: [PATCH 139/230] feat: add max_results parameter to some of the QueryJob methods (#698) * feat: add max_results to a few QueryJob methods It is now possible to cap the number of result rows returned when invoking `to_dataframe()` or `to_arrow()` method on a `QueryJob` instance. * Work around a pytype complaint * Make _EmptyRowIterator a subclass of RowIterator Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> --- google/cloud/bigquery/_tqdm_helpers.py | 37 +++++++- google/cloud/bigquery/job/query.py | 22 ++++- google/cloud/bigquery/table.py | 53 ++++++++++- tests/unit/job/test_query_pandas.py | 101 ++++++++++++++++++++- tests/unit/test_signature_compatibility.py | 31 +++++-- tests/unit/test_table.py | 19 ++++ 6 files changed, 240 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py index 2fcf2a981..99e720e2b 100644 --- a/google/cloud/bigquery/_tqdm_helpers.py +++ b/google/cloud/bigquery/_tqdm_helpers.py @@ -16,6 +16,8 @@ import concurrent.futures import time +import typing +from typing import Optional import warnings try: @@ -23,6 +25,10 @@ except ImportError: # pragma: NO COVER tqdm = None +if typing.TYPE_CHECKING: # pragma: NO COVER + from google.cloud.bigquery import QueryJob + from google.cloud.bigquery.table import RowIterator + _NO_TQDM_ERROR = ( "A progress bar was requested, but there was an error loading the tqdm " "library. Please install tqdm to use the progress bar functionality." @@ -32,7 +38,7 @@ def get_progress_bar(progress_bar_type, description, total, unit): - """Construct a tqdm progress bar object, if tqdm is .""" + """Construct a tqdm progress bar object, if tqdm is installed.""" if tqdm is None: if progress_bar_type is not None: warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3) @@ -53,16 +59,34 @@ def get_progress_bar(progress_bar_type, description, total, unit): return None -def wait_for_query(query_job, progress_bar_type=None): - """Return query result and display a progress bar while the query running, if tqdm is installed.""" +def wait_for_query( + query_job: "QueryJob", + progress_bar_type: Optional[str] = None, + max_results: Optional[int] = None, +) -> "RowIterator": + """Return query result and display a progress bar while the query running, if tqdm is installed. + + Args: + query_job: + The job representing the execution of the query on the server. + progress_bar_type: + The type of progress bar to use to show query progress. + max_results: + The maximum number of rows the row iterator should return. + + Returns: + A row iterator over the query results. + """ default_total = 1 current_stage = None start_time = time.time() + progress_bar = get_progress_bar( progress_bar_type, "Query is running", default_total, "query" ) if progress_bar is None: - return query_job.result() + return query_job.result(max_results=max_results) + i = 0 while True: if query_job.query_plan: @@ -75,7 +99,9 @@ def wait_for_query(query_job, progress_bar_type=None): ), ) try: - query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + query_result = query_job.result( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results + ) progress_bar.update(default_total) progress_bar.set_description( "Query complete after {:0.2f}s".format(time.time() - start_time), @@ -89,5 +115,6 @@ def wait_for_query(query_job, progress_bar_type=None): progress_bar.update(i + 1) i += 1 continue + progress_bar.close() return query_result diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 455ef4632..6ff9f2647 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1300,12 +1300,14 @@ def result( return rows # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_arrow() + # changes to table.RowIterator.to_arrow(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_arrow( self, progress_bar_type: str = None, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, create_bqstorage_client: bool = True, + max_results: Optional[int] = None, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1349,6 +1351,11 @@ def to_arrow( ..versionadded:: 1.24.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: pyarrow.Table A :class:`pyarrow.Table` populated with row data and column @@ -1361,7 +1368,7 @@ def to_arrow( ..versionadded:: 1.17.0 """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, @@ -1369,7 +1376,8 @@ def to_arrow( ) # If changing the signature of this method, make sure to apply the same - # changes to table.RowIterator.to_dataframe() + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. def to_dataframe( self, bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, @@ -1377,6 +1385,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + max_results: Optional[int] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1423,6 +1432,11 @@ def to_dataframe( ..versionadded:: 1.26.0 + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + ..versionadded:: 2.21.0 + Returns: A :class:`~pandas.DataFrame` populated with row data and column headers from the query results. The column headers are derived @@ -1431,7 +1445,7 @@ def to_dataframe( Raises: ValueError: If the `pandas` library cannot be imported. """ - query_result = wait_for_query(self, progress_bar_type) + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( bqstorage_client=bqstorage_client, dtypes=dtypes, diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index b12209252..a1c13c85d 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -22,7 +22,7 @@ import operator import pytz import typing -from typing import Any, Dict, Iterable, Tuple +from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings try: @@ -1415,7 +1415,9 @@ class RowIterator(HTTPIterator): """A class for iterating through HTTP/JSON API row list responses. Args: - client (google.cloud.bigquery.Client): The API client. + client (Optional[google.cloud.bigquery.Client]): + The API client instance. This should always be non-`None`, except for + subclasses that do not use it, namely the ``_EmptyRowIterator``. api_request (Callable[google.cloud._http.JSONConnection.api_request]): The function to use to make API requests. path (str): The method path to query for the list of items. @@ -1480,7 +1482,7 @@ def __init__( self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False - self._project = client.project + self._project = client.project if client is not None else None self._schema = schema self._selected_fields = selected_fields self._table = table @@ -1895,7 +1897,7 @@ def to_dataframe( return df -class _EmptyRowIterator(object): +class _EmptyRowIterator(RowIterator): """An empty row iterator. This class prevents API requests when there are no rows to fetch or rows @@ -1907,6 +1909,18 @@ class _EmptyRowIterator(object): pages = () total_rows = 0 + def __init__( + self, client=None, api_request=None, path=None, schema=(), *args, **kwargs + ): + super().__init__( + client=client, + api_request=api_request, + path=path, + schema=schema, + *args, + **kwargs, + ) + def to_arrow( self, progress_bar_type=None, @@ -1951,6 +1965,37 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_dataframe_iterable( + self, + bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, + dtypes: Optional[Dict[str, Any]] = None, + max_queue_size: Optional[int] = None, + ) -> Iterator["pandas.DataFrame"]: + """Create an iterable of pandas DataFrames, to process the table as a stream. + + ..versionadded:: 2.21.0 + + Args: + bqstorage_client: + Ignored. Added for compatibility with RowIterator. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + Ignored. Added for compatibility with RowIterator. + + max_queue_size: + Ignored. Added for compatibility with RowIterator. + + Returns: + An iterator yielding a single empty :class:`~pandas.DataFrame`. + + Raises: + ValueError: + If the :mod:`pandas` library cannot be imported. + """ + if pandas is None: + raise ValueError(_NO_PANDAS_ERROR) + return iter((pandas.DataFrame(),)) + def __iter__(self): return iter(()) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 0f9623203..c537802f4 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -238,6 +238,41 @@ def test_to_arrow(): ] +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +def test_to_arrow_max_results_no_progress_bar(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + + connection = _make_connection({}) + client = _make_client(connection=connection) + begun_resource = _make_job_resource(job_type="query") + job = target_class.from_api_repr(begun_resource, client) + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator, + ) + with result_patch as result_patch_tqdm: + tbl = job.to_arrow(create_bqstorage_client=False, max_results=123) + + result_patch_tqdm.assert_called_once_with(max_results=123) + + assert isinstance(tbl, pyarrow.Table) + assert tbl.num_rows == 2 + + @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") def test_to_arrow_w_tqdm_w_query_plan(): @@ -290,7 +325,9 @@ def test_to_arrow_w_tqdm_w_query_plan(): assert result_patch_tqdm.call_count == 3 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -341,7 +378,9 @@ def test_to_arrow_w_tqdm_w_pending_status(): assert result_patch_tqdm.call_count == 2 assert isinstance(tbl, pyarrow.Table) assert tbl.num_rows == 2 - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") @@ -716,7 +755,9 @@ def test_to_dataframe_w_tqdm_pending(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df) == ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -774,4 +815,56 @@ def test_to_dataframe_w_tqdm(): assert isinstance(df, pandas.DataFrame) assert len(df) == 4 # verify the number of rows assert list(df), ["name", "age"] # verify the column names - result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL) + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None + ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`") +def test_to_dataframe_w_tqdm_max_results(): + from google.cloud.bigquery import table + from google.cloud.bigquery.job import QueryJob as target_class + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL + + begun_resource = _make_job_resource(job_type="query") + schema = [ + SchemaField("name", "STRING", mode="NULLABLE"), + SchemaField("age", "INTEGER", mode="NULLABLE"), + ] + rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}] + + connection = _make_connection({}) + client = _make_client(connection=connection) + job = target_class.from_api_repr(begun_resource, client) + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + row_iterator = table.RowIterator(client, api_request, path, schema) + + job._properties["statistics"] = { + "query": { + "queryPlan": [ + {"name": "S00: Input", "id": "0", "status": "COMPLETE"}, + {"name": "S01: Output", "id": "1", "status": "COMPLETE"}, + ] + }, + } + reload_patch = mock.patch( + "google.cloud.bigquery.job._AsyncJob.reload", autospec=True + ) + result_patch = mock.patch( + "google.cloud.bigquery.job.QueryJob.result", + side_effect=[concurrent.futures.TimeoutError, row_iterator], + ) + + with result_patch as result_patch_tqdm, reload_patch: + job.to_dataframe( + progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3 + ) + + assert result_patch_tqdm.call_count == 2 + result_patch_tqdm.assert_called_with( + timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 + ) diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py index e5016b0e5..07b823e2c 100644 --- a/tests/unit/test_signature_compatibility.py +++ b/tests/unit/test_signature_compatibility.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from collections import OrderedDict import inspect import pytest @@ -32,12 +33,30 @@ def row_iterator_class(): def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_arrow) - sig2 = inspect.signature(row_iterator_class.to_arrow) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_arrow) + iterator_sig = inspect.signature(row_iterator_class.to_arrow) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class): - sig = inspect.signature(query_job_class.to_dataframe) - sig2 = inspect.signature(row_iterator_class.to_dataframe) - assert sig == sig2 + query_job_sig = inspect.signature(query_job_class.to_dataframe) + iterator_sig = inspect.signature(row_iterator_class.to_dataframe) + + assert "max_results" in query_job_sig.parameters + + # Compare the signatures while ignoring the max_results parameter, which is + # specific to the method on QueryJob. + params = OrderedDict(query_job_sig.parameters) + del params["max_results"] + query_job_sig = query_job_sig.replace(parameters=params.values()) + + assert query_job_sig == iterator_sig diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 0f2ab00c1..f4038835c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1571,6 +1571,25 @@ def test_to_dataframe(self): self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 0) # verify the number of rows + @mock.patch("google.cloud.bigquery.table.pandas", new=None) + def test_to_dataframe_iterable_error_if_pandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaises(ValueError): + row_iterator.to_dataframe_iterable() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_iterable(self): + row_iterator = self._make_one() + df_iter = row_iterator.to_dataframe_iterable() + + result = list(df_iter) + + self.assertEqual(len(result), 1) + df = result[0] + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 0) # Verify the number of rows. + self.assertEqual(len(df.columns), 0) + class TestRowIterator(unittest.TestCase): def _class_under_test(self): From 36b6a852a59d7022d85446b013b535e6d705f4c3 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Fri, 25 Jun 2021 16:59:16 -0400 Subject: [PATCH 140/230] Revert "feat: add always_use_jwt_access (#714)" (#722) This reverts commit 92fbd4ade37e0be49dc278080ef73c83eafeea18. --- .coveragerc | 1 + 1 file changed, 1 insertion(+) diff --git a/.coveragerc b/.coveragerc index 33ea00ba9..23861a8eb 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,6 +2,7 @@ branch = True [report] +fail_under = 100 show_missing = True omit = google/cloud/bigquery/__init__.py From 3c355909b4c9e7666e047eff983e682b3f8fd348 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 25 Jun 2021 22:52:15 +0000 Subject: [PATCH 141/230] chore(python): simplify nox steps in CONTRIBUTING.rst (#721) Source-Link: https://github.com/googleapis/synthtool/commit/26558bae8976a985d73c2d98c31d8612273f907d Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 14 ++++++-------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 0954585f2..e2b39f946 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:df50e8d462f86d6bcb42f27ecad55bb12c404f1c65de9c6fe4c4d25120080bd6 + digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index a9b389e83..102355b3a 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -68,14 +68,12 @@ Using ``nox`` We use `nox `__ to instrument our tests. - To test your changes, run unit tests with ``nox``:: + $ nox -s unit - $ nox -s unit-3.8 - $ ... +- To run a single unit test:: -- Args to pytest can be passed through the nox command separated by a `--`. For - example, to run a single test:: + $ nox -s unit-3.9 -- -k - $ nox -s unit-3.8 -- -k .. note:: @@ -142,7 +140,7 @@ Running System Tests - To run system tests, you can execute:: # Run all system tests - $ nox -s system-3.8 + $ nox -s system # Run a single system test $ nox -s system-3.8 -- -k @@ -215,8 +213,8 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py -We also explicitly decided to support Python 3 beginning with version -3.6. Reasons for this include: +We also explicitly decided to support Python 3 beginning with version 3.6. +Reasons for this include: - Encouraging use of newest versions of Python 3 - Taking the lead of `prominent`_ open-source `projects`_ From ddd9ce714ebc496e22bc122e89f3bbee36ea15be Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 28 Jun 2021 18:26:24 +0200 Subject: [PATCH 142/230] chore: require grpcio >= 1.38.1 (#725) --- setup.py | 3 ++- testing/constraints-3.6.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 963eb73ec..fcb1dd966 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ # 'Development Status :: 5 - Production/Stable' release_status = "Development Status :: 5 - Production/Stable" dependencies = [ + "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", "proto-plus >= 1.10.0", "google-cloud-core >= 1.4.1, < 2.0dev", @@ -46,7 +47,7 @@ # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.32.0, < 2.0dev", + "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 1.0.0, < 5.0dev", ], "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index 71c9ff49a..af6e82efd 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -9,7 +9,7 @@ google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 google-resumable-media==0.6.0 -grpcio==1.32.0 +grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 From d047419879e807e123296da2eee89a5253050166 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 09:24:21 +0200 Subject: [PATCH 143/230] fix: inserting non-finite floats with insert_rows() (#728) --- google/cloud/bigquery/_helpers.py | 8 +++++++- tests/unit/test__helpers.py | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 7602483c2..77054542a 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -17,6 +17,7 @@ import base64 import datetime import decimal +import math import re from google.cloud._helpers import UTC @@ -305,7 +306,12 @@ def _int_to_json(value): def _float_to_json(value): """Coerce 'value' to an JSON-compatible representation.""" - return value if value is None else float(value) + if value is None: + return None + elif math.isnan(value) or math.isinf(value): + return str(value) + else: + return float(value) def _decimal_to_json(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0ac76d424..c62947d37 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -656,9 +656,24 @@ def _call_fut(self, value): return _float_to_json(value) + def test_w_none(self): + self.assertEqual(self._call_fut(None), None) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_nan(self): + result = self._call_fut(float("nan")) + self.assertEqual(result.lower(), "nan") + + def test_w_infinity(self): + result = self._call_fut(float("inf")) + self.assertEqual(result.lower(), "inf") + + def test_w_negative_infinity(self): + result = self._call_fut(float("-inf")) + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From e99abbbca8d3f234325e86857b4ba71403f4be6b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 1 Jul 2021 12:22:26 +0200 Subject: [PATCH 144/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.5.0 (#731) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.4.0` -> `==2.5.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/compatibility-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.5.0/confidence-slim/2.4.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-storage ### [`v2.5.0`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​250-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev240v250-2021-06-29) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.4.0...v2.5.0) ##### ⚠ BREAKING CHANGES - remove default deadline for AppendRows API ([#​205](https://togithub.com/googleapis/python-bigquery-storage/issues/205)) ##### Features - Add ZSTD compression as an option for Arrow ([#​197](https://www.github.com/googleapis/python-bigquery-storage/issues/197)) ([f941446](https://www.github.com/googleapis/python-bigquery-storage/commit/f9414469fac37bf05db28230a1a6c1e3f7342e8d)) - new JSON type through BigQuery Write ([#​178](https://www.github.com/googleapis/python-bigquery-storage/issues/178)) ([a6d6afa](https://www.github.com/googleapis/python-bigquery-storage/commit/a6d6afa8654907701aab2724f940be8f63edd0ea)) ##### Bug Fixes - **deps:** add packaging requirement ([#​200](https://www.github.com/googleapis/python-bigquery-storage/issues/200)) ([f2203fe](https://www.github.com/googleapis/python-bigquery-storage/commit/f2203fefe36dd043a258adb85e970fef14cf6ebc)) - remove default deadline for AppendRows API ([#​205](https://www.github.com/googleapis/python-bigquery-storage/issues/205)) ([cd4e637](https://www.github.com/googleapis/python-bigquery-storage/commit/cd4e637c4c74f21be50c3b0ebdfeebb1dfb88cbb)) ##### Documentation - omit mention of Python 2.7 in 'CONTRIBUTING.rst' ([#​1127](https://www.github.com/googleapis/python-bigquery-storage/issues/1127)) ([#​212](https://www.github.com/googleapis/python-bigquery-storage/issues/212)) ([8bcc4cd](https://www.github.com/googleapis/python-bigquery-storage/commit/8bcc4cd298eb0f5da03ecf66670982ab41e35c88)) ##### Miscellaneous Chores - release 2.5.0 ([#​220](https://www.github.com/googleapis/python-bigquery-storage/issues/220)) ([946c8a9](https://www.github.com/googleapis/python-bigquery-storage/commit/946c8a91c2d74c6bf37b333a4d0483f4483dcbce))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 80fa8e454..83ab92ee5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 669b3ac85..6b966fb07 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.4.0 +google-cloud-bigquery-storage==2.5.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 38b3ef96c3dedc139b84f0ff06885141ae7ce78c Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 1 Jul 2021 10:49:50 -0400 Subject: [PATCH 145/230] feat: Support passing struct data to the DB API (#718) --- docs/dbapi.rst | 11 +- google/cloud/bigquery/dbapi/_helpers.py | 252 ++++++++++++++++++--- google/cloud/bigquery/dbapi/cursor.py | 28 ++- tests/system/conftest.py | 7 +- tests/system/test_pandas.py | 11 +- tests/system/test_structs.py | 31 +++ tests/unit/test_dbapi__helpers.py | 282 +++++++++++++++++++++++- tests/unit/test_dbapi_cursor.py | 26 +++ 8 files changed, 597 insertions(+), 51 deletions(-) create mode 100644 tests/system/test_structs.py diff --git a/docs/dbapi.rst b/docs/dbapi.rst index 41ec85833..81f000bc7 100644 --- a/docs/dbapi.rst +++ b/docs/dbapi.rst @@ -25,7 +25,7 @@ and using named parameters:: Providing explicit type information ----------------------------------- -BigQuery requires type information for parameters. The The BigQuery +BigQuery requires type information for parameters. The BigQuery DB-API can usually determine parameter types for parameters based on provided values. Sometimes, however, types can't be determined (for example when `None` is passed) or are determined incorrectly (for @@ -37,7 +37,14 @@ colon, as in:: insert into people (name, income) values (%(name:string)s, %(income:numeric)s) -For unnamed parameters, use the named syntax with a type, but now +For unnamed parameters, use the named syntax with a type, but no name, as in:: insert into people (name, income) values (%(:string)s, %(:numeric)s) + +Providing type information is the *only* way to pass `struct` data:: + + cursor.execute( + "insert into points (point) values (%(:struct)s)", + [{"x": 10, "y": 20}], + ) diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py index 3b0d8134c..9c134b47c 100644 --- a/google/cloud/bigquery/dbapi/_helpers.py +++ b/google/cloud/bigquery/dbapi/_helpers.py @@ -18,18 +18,34 @@ import decimal import functools import numbers +import re +import typing from google.cloud import bigquery -from google.cloud.bigquery import table, enums +from google.cloud.bigquery import table, enums, query from google.cloud.bigquery.dbapi import exceptions _NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28") _NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28") +type_parameters_re = re.compile( + r""" + \( + \s*[0-9]+\s* + (, + \s*[0-9]+\s* + )* + \) + """, + re.VERBOSE, +) + def _parameter_type(name, value, query_parameter_type=None, value_doc=""): if query_parameter_type: + # Strip type parameters + query_parameter_type = type_parameters_re.sub("", query_parameter_type) try: parameter_type = getattr( enums.SqlParameterScalarTypes, query_parameter_type.upper() @@ -113,6 +129,197 @@ def array_to_query_parameter(value, name=None, query_parameter_type=None): return bigquery.ArrayQueryParameter(name, array_type, value) +def _parse_struct_fields( + fields, + base, + parse_struct_field=re.compile( + r""" + (?:(\w+)\s+) # field name + ([A-Z0-9<> ,()]+) # Field type + $""", + re.VERBOSE | re.IGNORECASE, + ).match, +): + # Split a string of struct fields. They're defined by commas, but + # we have to avoid splitting on commas internal to fields. For + # example: + # name string, children array> + # + # only has 2 top-level fields. + fields = fields.split(",") + fields = list(reversed(fields)) # in the off chance that there are very many + while fields: + field = fields.pop() + while fields and field.count("<") != field.count(">"): + field += "," + fields.pop() + + m = parse_struct_field(field.strip()) + if not m: + raise exceptions.ProgrammingError( + f"Invalid struct field, {field}, in {base}" + ) + yield m.group(1, 2) + + +SCALAR, ARRAY, STRUCT = "sar" + + +def _parse_type( + type_, + name, + base, + complex_query_parameter_parse=re.compile( + r""" + \s* + (ARRAY|STRUCT|RECORD) # Type + \s* + <([A-Z0-9<> ,()]+)> # Subtype(s) + \s*$ + """, + re.IGNORECASE | re.VERBOSE, + ).match, +): + if "<" not in type_: + # Scalar + + # Strip type parameters + type_ = type_parameters_re.sub("", type_).strip() + try: + type_ = getattr(enums.SqlParameterScalarTypes, type_.upper()) + except AttributeError: + raise exceptions.ProgrammingError( + f"The given parameter type, {type_}," + f"{' for ' + name if name else ''}" + f" is not a valid BigQuery scalar type, in {base}." + ) + if name: + type_ = type_.with_name(name) + return SCALAR, type_ + + m = complex_query_parameter_parse(type_) + if not m: + raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}") + tname, sub = m.group(1, 2) + if tname.upper() == "ARRAY": + sub_type = complex_query_parameter_type(None, sub, base) + if isinstance(sub_type, query.ArrayQueryParameterType): + raise exceptions.ProgrammingError(f"Array can't contain an array in {base}") + sub_type._complex__src = sub + return ARRAY, sub_type + else: + return STRUCT, _parse_struct_fields(sub, base) + + +def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str): + """Construct a parameter type (`StructQueryParameterType`) for a complex type + + or a non-complex type that's part of a complex type. + + Examples: + + array> + + struct>> + + This is used for computing array types. + """ + + type_type, sub_type = _parse_type(type_, name, base) + if type_type == SCALAR: + type_ = sub_type + elif type_type == ARRAY: + type_ = query.ArrayQueryParameterType(sub_type, name=name) + elif type_type == STRUCT: + fields = [ + complex_query_parameter_type(field_name, field_type, base) + for field_name, field_type in sub_type + ] + type_ = query.StructQueryParameterType(*fields, name=name) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return type_ + + +def complex_query_parameter( + name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None +): + """ + Construct a query parameter for a complex type (array or struct record) + + or for a subtype, which may not be complex + + Examples: + + array> + + struct>> + + """ + base = base or type_ + + type_type, sub_type = _parse_type(type_, name, base) + + if type_type == SCALAR: + param = query.ScalarQueryParameter(name, sub_type._type, value) + elif type_type == ARRAY: + if not array_like(value): + raise exceptions.ProgrammingError( + f"Array type with non-array-like value" + f" with type {type(value).__name__}" + ) + param = query.ArrayQueryParameter( + name, + sub_type, + value + if isinstance(sub_type, query.ScalarQueryParameterType) + else [ + complex_query_parameter(None, v, sub_type._complex__src, base) + for v in value + ], + ) + elif type_type == STRUCT: + if not isinstance(value, collections_abc.Mapping): + raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}") + value_keys = set(value) + fields = [] + for field_name, field_type in sub_type: + if field_name not in value: + raise exceptions.ProgrammingError( + f"No field value for {field_name} in {type_}" + ) + value_keys.remove(field_name) + fields.append( + complex_query_parameter(field_name, value[field_name], field_type, base) + ) + if value_keys: + raise exceptions.ProgrammingError(f"Extra data keys for {type_}") + + param = query.StructQueryParameter(name, *fields) + else: # pragma: NO COVER + raise AssertionError("Bad type_type", type_type) # Can't happen :) + + return param + + +def _dispatch_parameter(type_, value, name=None): + if type_ is not None and "<" in type_: + param = complex_query_parameter(name, value, type_) + elif isinstance(value, collections_abc.Mapping): + raise NotImplementedError( + f"STRUCT-like parameter values are not supported" + f"{' (parameter ' + name + ')' if name else ''}," + f" unless an explicit type is give in the parameter placeholder" + f" (e.g. '%({name if name else ''}:struct<...>)s')." + ) + elif array_like(value): + param = array_to_query_parameter(value, name, type_) + else: + param = scalar_to_query_parameter(value, name, type_) + + return param + + def to_query_parameters_list(parameters, parameter_types): """Converts a sequence of parameter values into query parameters. @@ -126,19 +333,10 @@ def to_query_parameters_list(parameters, parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of query parameters. """ - result = [] - - for value, type_ in zip(parameters, parameter_types): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError("STRUCT-like parameter values are not supported.") - elif array_like(value): - param = array_to_query_parameter(value, None, type_) - else: - param = scalar_to_query_parameter(value, None, type_) - - result.append(param) - - return result + return [ + _dispatch_parameter(type_, value) + for value, type_ in zip(parameters, parameter_types) + ] def to_query_parameters_dict(parameters, query_parameter_types): @@ -154,28 +352,10 @@ def to_query_parameters_dict(parameters, query_parameter_types): List[google.cloud.bigquery.query._AbstractQueryParameter]: A list of named query parameters. """ - result = [] - - for name, value in parameters.items(): - if isinstance(value, collections_abc.Mapping): - raise NotImplementedError( - "STRUCT-like parameter values are not supported " - "(parameter {}).".format(name) - ) - else: - query_parameter_type = query_parameter_types.get(name) - if array_like(value): - param = array_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type - ) - else: - param = scalar_to_query_parameter( - value, name=name, query_parameter_type=query_parameter_type, - ) - - result.append(param) - - return result + return [ + _dispatch_parameter(query_parameter_types.get(name), value, name) + for name, value in parameters.items() + ] def to_query_parameters(parameters, parameter_types): diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index c8fc49378..587598d5f 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -483,7 +483,33 @@ def _format_operation(operation, parameters): def _extract_types( - operation, extra_type_sub=re.compile(r"(%*)%(?:\(([^:)]*)(?::(\w+))?\))?s").sub + operation, + extra_type_sub=re.compile( + r""" + (%*) # Extra %s. We'll deal with these in the replacement code + + % # Beginning of replacement, %s, %(...)s + + (?:\( # Begin of optional name and/or type + ([^:)]*) # name + (?:: # ':' introduces type + ( # start of type group + [a-zA-Z0-9<>, ]+ # First part, no parens + + (?: # start sets of parens + non-paren text + \([0-9 ,]+\) # comma-separated groups of digits in parens + # (e.g. string(10)) + (?=[, >)]) # Must be followed by ,>) or space + [a-zA-Z0-9<>, ]* # Optional non-paren chars + )* # Can be zero or more of parens and following text + ) # end of type group + )? # close type clause ":type" + \))? # End of optional name and/or type + + s # End of replacement + """, + re.VERBOSE, + ).sub, ): """Remove type information from parameter placeholders. diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4b5fcb543..4eef60e92 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -31,9 +31,14 @@ def bqstorage_client(bigquery_client): return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials) -@pytest.fixture +@pytest.fixture(scope="session") def dataset_id(bigquery_client): dataset_id = f"bqsystem_{helpers.temp_suffix()}" bigquery_client.create_dataset(dataset_id) yield dataset_id bigquery_client.delete_dataset(dataset_id, delete_contents=True) + + +@pytest.fixture +def table_id(dataset_id): + return f"{dataset_id}.table_{helpers.temp_suffix()}" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 1164e36da..ddf5eaf43 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -149,7 +149,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype( reason="Only `pandas version >=1.0.0` is supported", ) def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): """Test that a DataFrame containing column with None-type values and int64 datatype can be uploaded without specifying a schema. @@ -157,9 +157,6 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema( https://github.com/googleapis/python-bigquery/issues/22 """ - table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format( - bigquery_client.project, dataset_id - ) df_data = collections.OrderedDict( [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))] ) @@ -511,7 +508,7 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( - bigquery_client, dataset_id + bigquery_client, dataset_id, table_id ): from google.cloud.bigquery.job import SourceFormat @@ -536,10 +533,6 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats( ) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) - table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format( - bigquery_client.project, dataset_id - ) - job_config = bigquery.LoadJobConfig( schema=table_schema, source_format=SourceFormat.CSV ) diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py new file mode 100644 index 000000000..20740f614 --- /dev/null +++ b/tests/system/test_structs.py @@ -0,0 +1,31 @@ +import datetime + +import pytest + +from google.cloud.bigquery.dbapi import connect + +person_type = "struct>>" +person_type_sized = ( + "struct>>" +) + + +@pytest.mark.parametrize("person_type_decl", [person_type, person_type_sized]) +def test_structs(bigquery_client, dataset_id, person_type_decl, table_id): + conn = connect(bigquery_client) + cursor = conn.cursor() + cursor.execute(f"create table {table_id} (person {person_type_decl})") + data = dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ) + cursor.execute( + f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data), + ) + + cursor.execute(f"select * from {table_id}") + [[result]] = list(cursor) + assert result == data diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 250ba46d9..b33203354 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -16,6 +16,7 @@ import decimal import math import operator as op +import re import unittest import pytest @@ -394,11 +395,13 @@ def test_to_query_parameters_dict_w_types(): assert sorted( _helpers.to_query_parameters( - dict(i=1, x=1.2, y=None, z=[]), dict(x="numeric", y="string", z="float64") + dict(i=1, x=1.2, y=None, q="hi", z=[]), + dict(x="numeric", y="string", q="string(9)", z="float64"), ), key=lambda p: p.name, ) == [ bigquery.ScalarQueryParameter("i", "INT64", 1), + bigquery.ScalarQueryParameter("q", "STRING", "hi"), bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2), bigquery.ScalarQueryParameter("y", "STRING", None), bigquery.ArrayQueryParameter("z", "FLOAT64", []), @@ -409,10 +412,285 @@ def test_to_query_parameters_list_w_types(): from google.cloud import bigquery assert _helpers.to_query_parameters( - [1, 1.2, None, []], [None, "numeric", "string", "float64"] + [1, 1.2, None, "hi", []], [None, "numeric", "string", "string(9)", "float64"] ) == [ bigquery.ScalarQueryParameter(None, "INT64", 1), bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2), bigquery.ScalarQueryParameter(None, "STRING", None), + bigquery.ScalarQueryParameter(None, "STRING", "hi"), bigquery.ArrayQueryParameter(None, "FLOAT64", []), ] + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": []}, + }, + ), + ( + [1, 2], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]}, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + dict( + name="par", + children=[ + dict(name="ch1", bdate=datetime.date(2021, 1, 1)), + dict(name="ch2", bdate=datetime.date(2021, 1, 2)), + ], + ), + "struct>>", + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + { + "name": "children", + "type": { + "arrayType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "type": "ARRAY", + }, + }, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "children": { + "arrayValues": [ + { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + { + "structValues": { + "bdate": {"value": "2021-01-02"}, + "name": {"value": "ch2"}, + } + }, + ] + }, + "name": {"value": "par"}, + } + }, + }, + ), + ( + ["1", "hi"], + "ARRAY", + { + "parameterType": {"type": "ARRAY", "arrayType": {"type": "STRING"}}, + "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "hi"}]}, + }, + ), + ], +) +def test_complex_query_parameter_type(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + + param = complex_query_parameter("test", value, type_).to_api_repr() + assert param.pop("name") == "test" + assert param == expect + + +def _expected_error_match(expect): + return "^" + re.escape(expect) + "$" + + +@pytest.mark.parametrize( + "value,type_,expect", + [ + ( + [], + "ARRAY", + "The given parameter type, INT," + " is not a valid BigQuery scalar type, in ARRAY.", + ), + ([], "x", "Invalid parameter type, x"), + ({}, "struct", "Invalid struct field, int, in struct"), + ( + {"x": 1}, + "struct", + "The given parameter type, int," + " for x is not a valid BigQuery scalar type, in struct.", + ), + ([], "x<", "Invalid parameter type, x<"), + (0, "ARRAY", "Array type with non-array-like value with type int"), + ( + [], + "ARRAY>", + "Array can't contain an array in ARRAY>", + ), + ([], "struct", "Non-mapping value for type struct"), + ({}, "struct", "No field value for x in struct"), + ({"x": 1, "y": 1}, "struct", "Extra data keys for struct"), + ([], "array>", "Invalid struct field, xxx, in array>"), + ([], "array<<>>", "Invalid parameter type, <>"), + ], +) +def test_complex_query_parameter_type_errors(type_, value, expect): + from google.cloud.bigquery.dbapi._helpers import complex_query_parameter + from google.cloud.bigquery.dbapi import exceptions + + with pytest.raises( + exceptions.ProgrammingError, match=_expected_error_match(expect), + ): + complex_query_parameter("test", value, type_) + + +@pytest.mark.parametrize( + "parameters,parameter_types,expect", + [ + ( + [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))], + ["ARRAY", "struct"], + [ + { + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ( + dict(ids=[], child=dict(name="ch1", bdate=datetime.date(2021, 1, 1))), + dict(ids="ARRAY", child="struct"), + [ + { + "name": "ids", + "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"}, + "parameterValue": {"arrayValues": []}, + }, + { + "name": "child", + "parameterType": { + "structTypes": [ + {"name": "name", "type": {"type": "STRING"}}, + {"name": "bdate", "type": {"type": "DATE"}}, + ], + "type": "STRUCT", + }, + "parameterValue": { + "structValues": { + "bdate": {"value": "2021-01-01"}, + "name": {"value": "ch1"}, + } + }, + }, + ], + ), + ], +) +def test_to_query_parameters_complex_types(parameters, parameter_types, expect): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)] + assert result == expect + + +def test_to_query_parameters_struct_error(): + from google.cloud.bigquery.dbapi._helpers import to_query_parameters + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported, " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(:struct<...>)s')." + ), + ): + to_query_parameters([dict(x=1)], [None]) + + with pytest.raises( + NotImplementedError, + match=_expected_error_match( + "STRUCT-like parameter values are not supported (parameter foo), " + "unless an explicit type is give in the parameter placeholder " + "(e.g. '%(foo:struct<...>)s')." + ), + ): + to_query_parameters(dict(foo=dict(x=1)), {}) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index a2d6693d0..026810aaf 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -809,6 +809,32 @@ def test__format_operation_no_placeholders(self): "values(%%%%%(foo:INT64)s, %(bar)s)", ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")), ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%%%%%(foo:struct)s, %(bar)s)", + ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:struct)s, %(bar)s)", + ( + "values(%(foo)s, %(bar)s)", + dict(foo="struct"), + ), + ), + ( + "values(%(foo:string(10))s, %(bar)s)", + ("values(%(foo)s, %(bar)s)", dict(foo="string(10)")), + ), ], ) def test__extract_types(inp, expect): From 1246da86b78b03ca1aa2c45ec71649e294cfb2f1 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 1 Jul 2021 19:17:01 +0200 Subject: [PATCH 146/230] feat: make it easier to disable best-effort deduplication with streaming inserts (#734) * feat: make it easier to disable row insert IDs * Also accept any iterables for row_ids --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/client.py | 47 +++++++-- google/cloud/bigquery/enums.py | 7 ++ tests/unit/test_client.py | 153 ++++++++++++++++++++++++++++-- 4 files changed, 195 insertions(+), 14 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 94f87304a..dfe3a6320 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -37,6 +37,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -144,6 +145,7 @@ "DEFAULT_RETRY", # Enum Constants "enums", + "AutoRowIDs", "Compression", "CreateDisposition", "DestinationFormat", diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2b7a5273e..2a02c7629 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -68,6 +68,7 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.enums import AutoRowIDs from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job @@ -3349,7 +3350,7 @@ def insert_rows_json( self, table: Union[Table, TableReference, str], json_rows: Sequence[Dict], - row_ids: Sequence[str] = None, + row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID, skip_invalid_rows: bool = None, ignore_unknown_values: bool = None, template_suffix: str = None, @@ -3371,11 +3372,20 @@ def insert_rows_json( json_rows (Sequence[Dict]): Row data to be inserted. Keys must match the table schema fields and values must be JSON-compatible representations. - row_ids (Optional[Sequence[Optional[str]]]): + row_ids (Union[Iterable[str], AutoRowIDs, None]): Unique IDs, one per row being inserted. An ID can also be ``None``, indicating that an explicit insert ID should **not** be used for that row. If the argument is omitted altogether, unique IDs are created automatically. + + .. versionchanged:: 2.21.0 + Can also be an iterable, not just a sequence, or an + :class:`AutoRowIDs` enum member. + + .. deprecated:: 2.21.0 + Passing ``None`` to explicitly request autogenerating insert IDs is + deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead. + skip_invalid_rows (Optional[bool]): Insert all valid rows of a request, even if invalid rows exist. The default value is ``False``, which causes the entire request @@ -3415,12 +3425,37 @@ def insert_rows_json( rows_info = [] data = {"rows": rows_info} - for index, row in enumerate(json_rows): + if row_ids is None: + warnings.warn( + "Passing None for row_ids is deprecated. To explicitly request " + "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead", + category=DeprecationWarning, + ) + row_ids = AutoRowIDs.GENERATE_UUID + + if not isinstance(row_ids, AutoRowIDs): + try: + row_ids_iter = iter(row_ids) + except TypeError: + msg = "row_ids is neither an iterable nor an AutoRowIDs enum member" + raise TypeError(msg) + + for i, row in enumerate(json_rows): info = {"json": row} - if row_ids is not None: - info["insertId"] = row_ids[index] - else: + + if row_ids is AutoRowIDs.GENERATE_UUID: info["insertId"] = str(uuid.uuid4()) + elif row_ids is AutoRowIDs.DISABLED: + info["insertId"] = None + else: + try: + insert_id = next(row_ids_iter) + except StopIteration: + msg = f"row_ids did not generate enough IDs, error at index {i}" + raise ValueError(msg) + else: + info["insertId"] = insert_id + rows_info.append(info) if skip_invalid_rows is not None: diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index edf991b6f..dbbd02635 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -21,6 +21,13 @@ from google.cloud.bigquery.query import ScalarQueryParameterType +class AutoRowIDs(enum.Enum): + """How to handle automatic insert IDs when inserting rows as a stream.""" + + DISABLED = enum.auto() + GENERATE_UUID = enum.auto() + + class Compression(object): """The compression type to use for exported files. The default value is :attr:`NONE`. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index f6811e207..dffe7bdba 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5434,7 +5434,7 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None ) - def test_insert_rows_json(self): + def test_insert_rows_json_default_behavior(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -5481,8 +5481,10 @@ def test_insert_rows_json(self): method="POST", path="/%s" % PATH, data=SENT, timeout=7.5, ) - def test_insert_rows_json_with_string_id(self): - rows = [{"col1": "val1"}] + def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() http = object() client = self._make_one( @@ -5490,20 +5492,116 @@ def test_insert_rows_json_with_string_id(self): ) conn = client._connection = make_connection({}) - with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))): - errors = client.insert_rows_json("proj.dset.tbl", rows) + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with uuid_patcher: + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.GENERATE_UUID + ) self.assertEqual(len(errors), 0) - expected = { - "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)] + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] } conn.api_request.assert_called_once_with( method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", - data=expected, + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): + from google.cloud.bigquery import AutoRowIDs + + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + errors = client.insert_rows_json( + "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED, + ) + + self.assertEqual(len(errors), 0) + + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": None}, + {"json": {"col2": "val2"}, "insertId": None}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + + def test_insert_rows_json_with_iterator_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + row_ids_iter = map(str, itertools.count(42)) + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=row_ids_iter) + + self.assertEqual(len(errors), 0) + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "42"}, + {"json": {"col2": "val2"}, "insertId": "43"}, + {"json": {"col3": "val3"}, "insertId": "44"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, timeout=None, ) + def test_insert_rows_json_with_non_iterable_row_ids(self): + rows = [{"col1": "val1"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + with self.assertRaises(TypeError) as exc: + client.insert_rows_json("proj.dset.tbl", rows, row_ids=object()) + + err_msg = str(exc.exception) + self.assertIn("row_ids", err_msg) + self.assertIn("iterable", err_msg) + + def test_insert_rows_json_with_too_few_row_ids(self): + rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + client._connection = make_connection({}) + + insert_ids = ["10", "20"] + + error_msg_pattern = "row_ids did not generate enough IDs.*index 2" + with self.assertRaisesRegex(ValueError, error_msg_pattern): + client.insert_rows_json("proj.dset.tbl", rows, row_ids=insert_ids) + def test_insert_rows_json_w_explicit_none_insert_ids(self): rows = [{"col1": "val1"}, {"col2": "val2"}] creds = _make_credentials() @@ -5526,6 +5624,45 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): timeout=None, ) + def test_insert_rows_json_w_none_insert_ids_sequence(self): + rows = [{"col1": "val1"}, {"col2": "val2"}] + creds = _make_credentials() + http = object() + client = self._make_one( + project="default-project", credentials=creds, _http=http + ) + conn = client._connection = make_connection({}) + + uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))) + with warnings.catch_warnings(record=True) as warned, uuid_patcher: + errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=None) + + self.assertEqual(len(errors), 0) + + # Passing row_ids=None should have resulted in a deprecation warning. + matches = [ + warning + for warning in warned + if issubclass(warning.category, DeprecationWarning) + and "row_ids" in str(warning) + and "AutoRowIDs.GENERATE_UUID" in str(warning) + ] + assert matches, "The expected deprecation warning was not raised." + + # Check row data sent to the backend. + expected_row_data = { + "rows": [ + {"json": {"col1": "val1"}, "insertId": "0"}, + {"json": {"col2": "val2"}, "insertId": "1"}, + ] + } + conn.api_request.assert_called_once_with( + method="POST", + path="/projects/proj/datasets/dset/tables/tbl/insertAll", + data=expected_row_data, + timeout=None, + ) + def test_insert_rows_w_wrong_arg(self): from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.schema import SchemaField From 145944f24fedc4d739687399a8309f9d51d43dfd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 9 Jul 2021 14:21:09 -0500 Subject: [PATCH 147/230] docs: add docs for all enums in module (#745) --- docs/conf.py | 1 + docs/enums.rst | 6 ++++++ docs/reference.rst | 7 ++++--- 3 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 docs/enums.rst diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/enums.rst b/docs/enums.rst new file mode 100644 index 000000000..57608968a --- /dev/null +++ b/docs/enums.rst @@ -0,0 +1,6 @@ +BigQuery Enums +============== + +.. automodule:: google.cloud.bigquery.enums + :members: + :undoc-members: diff --git a/docs/reference.rst b/docs/reference.rst index 52d916f96..694379cd2 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -173,10 +173,11 @@ Magics Enums ===== -.. autosummary:: - :toctree: generated +.. toctree:: + :maxdepth: 2 + + enums - enums.StandardSqlDataTypes Encryption Configuration ======================== From cd2f09e96c4db5f63afa4fe7179c71b8872d48a2 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Sat, 10 Jul 2021 11:05:31 +0200 Subject: [PATCH 148/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.0 (#743) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 83ab92ee5..30a59c15a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 6b966fb07..ce02ac7ed 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.20.0 -google-cloud-bigquery-storage==2.5.0 +google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 ipython==7.16.1; python_version < '3.7' From 7d2d3e906a9eb161911a198fb925ad79de5df934 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:07:00 +0200 Subject: [PATCH 149/230] feat: add support for decimal target types (#735) * feat: add support for decimal target types * Add decimal target types support to ExternalConfig * Remove ambiguous parts of DecimalTargetType docs. --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/enums.py | 18 +++++++ google/cloud/bigquery/external_config.py | 23 +++++++++ google/cloud/bigquery/job/load.py | 23 +++++++++ tests/data/numeric_38_12.parquet | Bin 0 -> 307 bytes tests/system/test_client.py | 54 +++++++++++++++++++++ tests/unit/job/test_load_config.py | 39 +++++++++++++++ tests/unit/test_external_config.py | 58 +++++++++++++++++++++++ 8 files changed, 217 insertions(+) create mode 100644 tests/data/numeric_38_12.parquet diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index dfe3a6320..b97224176 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -38,6 +38,7 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import enums from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import DecimalTargetType from google.cloud.bigquery.enums import KeyResultStatementKind from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlDataTypes @@ -148,6 +149,7 @@ "AutoRowIDs", "Compression", "CreateDisposition", + "DecimalTargetType", "DestinationFormat", "DeterminismLevel", "ExternalSourceFormat", diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index dbbd02635..ef35dffe0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -49,6 +49,24 @@ class Compression(object): """Specifies no compression.""" +class DecimalTargetType: + """The data types that could be used as a target type when converting decimal values. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType + + .. versionadded:: 2.21.0 + """ + + NUMERIC = "NUMERIC" + """Decimal values could be converted to NUMERIC type.""" + + BIGNUMERIC = "BIGNUMERIC" + """Decimal values could be converted to BIGNUMERIC type.""" + + STRING = "STRING" + """Decimal values could be converted to STRING type.""" + + class CreateDisposition(object): """Specifies whether the job is allowed to create new tables. The default value is :attr:`CREATE_IF_NEEDED`. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 0c49d2d76..f1692ba50 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +from typing import FrozenSet, Iterable, Optional from google.cloud.bigquery._helpers import _to_bytes from google.cloud.bigquery._helpers import _bytes_to_json @@ -693,6 +694,28 @@ def compression(self): def compression(self, value): self._properties["compression"] = value + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._properties.get("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._properties["decimalTargetTypes"] = list(value) + else: + if "decimalTargetTypes" in self._properties: + del self._properties["decimalTargetTypes"] + @property def hive_partitioning(self): """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 41d38dd74..bdee5cb6b 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,6 +14,8 @@ """Classes for load jobs.""" +from typing import FrozenSet, Iterable, Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions @@ -121,6 +123,27 @@ def create_disposition(self): def create_disposition(self, value): self._set_sub_prop("createDisposition", value) + @property + def decimal_target_types(self) -> Optional[FrozenSet[str]]: + """Possible SQL data types to which the source decimal values are converted. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types + + .. versionadded:: 2.21.0 + """ + prop = self._get_sub_prop("decimalTargetTypes") + if prop is not None: + prop = frozenset(prop) + return prop + + @decimal_target_types.setter + def decimal_target_types(self, value: Optional[Iterable[str]]): + if value is not None: + self._set_sub_prop("decimalTargetTypes", list(value)) + else: + self._del_sub_prop("decimalTargetTypes") + @property def destination_encryption_configuration(self): """Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom diff --git a/tests/data/numeric_38_12.parquet b/tests/data/numeric_38_12.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ef4db91ea9a90b2e230b8057fb05446e3f25dfe4 GIT binary patch literal 307 zcmWG=3^EjD5cLuD(Gg_MA>{7D-4+Ihd3~*3z?EmNe|HVLj zCLm^JMpq11$pBF*$|R}d!JUy=mKtB2oRONFD9Xej$|tHI$|S)WpPZj#pvomGA?m{* z#v&;rDWk?ABg!Xf%D}-U36&NF%82b_1R^yK8=~C?(!s)@;aQMaR9Ko?qMKW!P?lO$ zoSC1eV5n!IXQ-f&RGOKSqF|DklxUKYm};JsWRPN#nv#^9W^QR@oS158Y;2ikWRaF) ToMxVuoTkZe0_d>-U~mBdp9NGq literal 0 HcmV?d00001 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ce3021399..460296b2f 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -864,6 +864,60 @@ def test_load_table_from_local_avro_file_then_dump_table(self): sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength) ) + def test_load_table_from_local_parquet_file_decimal_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + from google.cloud.bigquery.job import SourceFormat + from google.cloud.bigquery.job import WriteDisposition + + TABLE_NAME = "test_table_parquet" + + expected_rows = [ + (decimal.Decimal("123.999999999999"),), + (decimal.Decimal("99999999999999999999999999.999999999999"),), + ] + + dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump")) + table_ref = dataset.table(TABLE_NAME) + table = Table(table_ref) + self.to_delete.insert(0, table) + + job_config = bigquery.LoadJobConfig() + job_config.source_format = SourceFormat.PARQUET + job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE + job_config.decimal_target_types = [ + DecimalTargetType.NUMERIC, + DecimalTargetType.BIGNUMERIC, + DecimalTargetType.STRING, + ] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + job.result(timeout=JOB_TIMEOUT) # Retry until done. + + self.assertEqual(job.output_rows, len(expected_rows)) + + table = Config.CLIENT.get_table(table) + rows = self._fetch_single_page(table) + row_tuples = [r.values() for r in rows] + self.assertEqual(sorted(row_tuples), sorted(expected_rows)) + + # Forcing the NUMERIC type, however, should result in an error. + job_config.decimal_target_types = [DecimalTargetType.NUMERIC] + + with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file: + job = Config.CLIENT.load_table_from_file( + parquet_file, table_ref, job_config=job_config + ) + + with self.assertRaises(BadRequest) as exc_info: + job.result(timeout=JOB_TIMEOUT) + + exc_msg = str(exc_info.exception) + self.assertIn("out of valid NUMERIC range", exc_msg) + def test_load_table_from_json_basic_use(self): table_schema = ( bigquery.SchemaField("name", "STRING", mode="REQUIRED"), diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index eafe7e046..190bd16dc 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -122,6 +122,45 @@ def test_create_disposition_setter(self): config.create_disposition = disposition self.assertEqual(config._properties["load"]["createDisposition"], disposition) + def test_decimal_target_types_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.decimal_target_types) + + def test_decimal_target_types_hit(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + expected = frozenset(decimal_target_types) + self.assertEqual(config.decimal_target_types, expected) + + def test_decimal_target_types_setter(self): + from google.cloud.bigquery.enums import DecimalTargetType + + decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC) + config = self._get_target_class()() + config.decimal_target_types = decimal_target_types + self.assertEqual( + config._properties["load"]["decimalTargetTypes"], + list(decimal_target_types), + ) + + def test_decimal_target_types_setter_w_none(self): + from google.cloud.bigquery.enums import DecimalTargetType + + config = self._get_target_class()() + decimal_target_types = [DecimalTargetType.BIGNUMERIC] + config._properties["load"]["decimalTargetTypes"] = decimal_target_types + + config.decimal_target_types = None + + self.assertIsNone(config.decimal_target_types) + self.assertNotIn("decimalTargetTypes", config._properties["load"]) + + config.decimal_target_types = None # No error if unsetting an unset property. + def test_destination_encryption_configuration_missing(self): config = self._get_target_class()() self.assertIsNone(config.destination_encryption_configuration) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 393df931e..1f49dba5d 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -532,6 +532,64 @@ def test_to_api_repr_parquet(self): self.assertEqual(got_resource, exp_resource) + def test_from_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + resource = _copy_and_update( + self.BASE_RESOURCE, + { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC], + }, + ) + + ec = external_config.ExternalConfig.from_api_repr(resource) + + self._verify_base(ec) + self.assertEqual(ec.source_format, "FORMAT_FOO") + self.assertEqual( + ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC]) + ) + + # converting back to API representation should yield the same result + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + del resource["decimalTargetTypes"] + ec = external_config.ExternalConfig.from_api_repr(resource) + self.assertIsNone(ec.decimal_target_types) + + got_resource = ec.to_api_repr() + self.assertEqual(got_resource, resource) + + def test_to_api_repr_decimal_target_types(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING] + + got_resource = ec.to_api_repr() + + expected_resource = { + "sourceFormat": "FORMAT_FOO", + "decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING], + } + self.assertEqual(got_resource, expected_resource) + + def test_to_api_repr_decimal_target_types_unset(self): + from google.cloud.bigquery.enums import DecimalTargetType + + ec = external_config.ExternalConfig("FORMAT_FOO") + ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC] + ec.decimal_target_types = None + + got_resource = ec.to_api_repr() + + expected_resource = {"sourceFormat": "FORMAT_FOO"} + self.assertEqual(got_resource, expected_resource) + + ec.decimal_target_types = None # No error if unsetting when already unset. + def _copy_and_update(d, u): d = copy.deepcopy(d) From ba86b2a6300ae5a9f3c803beeb42bda4c522e34c Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Sat, 10 Jul 2021 11:29:00 +0200 Subject: [PATCH 150/230] feat: add support for table snapshots (#740) * feat: add support for table snapshots * Add system test for table snapshots * Make test taxonomy resource name unique * Store timezone aware snapshot time on snapshots * Make copy config tests more detailed * Use unique resource ID differently for display name * Add new classes to docs --- docs/reference.rst | 2 + google/cloud/bigquery/__init__.py | 4 ++ google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/copy_.py | 38 ++++++++++++++ google/cloud/bigquery/table.py | 37 ++++++++++++++ tests/system/test_client.py | 71 ++++++++++++++++++++++++- tests/unit/job/test_copy.py | 34 +++++++++++- tests/unit/test_table.py | 74 +++++++++++++++++++++++++++ 8 files changed, 260 insertions(+), 2 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 694379cd2..cb2faa5ec 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -59,6 +59,7 @@ Job-Related Types job.CreateDisposition job.DestinationFormat job.Encoding + job.OperationType job.QueryPlanEntry job.QueryPlanEntryStep job.QueryPriority @@ -90,6 +91,7 @@ Table table.RangePartitioning table.Row table.RowIterator + table.SnapshotDefinition table.Table table.TableListItem table.TableReference diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index b97224176..65dde5d94 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -61,6 +61,7 @@ from google.cloud.bigquery.job import ExtractJobConfig from google.cloud.bigquery.job import LoadJob from google.cloud.bigquery.job import LoadJobConfig +from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import QueryJob from google.cloud.bigquery.job import QueryJobConfig from google.cloud.bigquery.job import QueryPriority @@ -87,6 +88,7 @@ from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import Row +from google.cloud.bigquery.table import SnapshotDefinition from google.cloud.bigquery.table import Table from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioningType @@ -115,6 +117,7 @@ "PartitionRange", "RangePartitioning", "Row", + "SnapshotDefinition", "TimePartitioning", "TimePartitioningType", # Jobs @@ -155,6 +158,7 @@ "ExternalSourceFormat", "Encoding", "KeyResultStatementKind", + "OperationType", "QueryPriority", "SchemaUpdateOption", "SourceFormat", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index cdab92e05..6bdfa09be 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -25,6 +25,7 @@ from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig +from google.cloud.bigquery.job.copy_ import OperationType from google.cloud.bigquery.job.extract import ExtractJob from google.cloud.bigquery.job.extract import ExtractJobConfig from google.cloud.bigquery.job.load import LoadJob @@ -59,6 +60,7 @@ "UnknownJob", "CopyJob", "CopyJobConfig", + "OperationType", "ExtractJob", "ExtractJobConfig", "LoadJob", diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py index 95f4b613b..c6ee98944 100644 --- a/google/cloud/bigquery/job/copy_.py +++ b/google/cloud/bigquery/job/copy_.py @@ -14,6 +14,8 @@ """Classes for copy jobs.""" +from typing import Optional + from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import _helpers from google.cloud.bigquery.table import TableReference @@ -23,6 +25,25 @@ from google.cloud.bigquery.job.base import _JobReference +class OperationType: + """Different operation types supported in table copy job. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype + """ + + OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED" + """Unspecified operation type.""" + + COPY = "COPY" + """The source and destination table have the same table type.""" + + SNAPSHOT = "SNAPSHOT" + """The source table type is TABLE and the destination table type is SNAPSHOT.""" + + RESTORE = "RESTORE" + """The source table type is SNAPSHOT and the destination table type is TABLE.""" + + class CopyJobConfig(_JobConfig): """Configuration options for copy jobs. @@ -85,6 +106,23 @@ def destination_encryption_configuration(self, value): api_repr = value.to_api_repr() self._set_sub_prop("destinationEncryptionConfiguration", api_repr) + @property + def operation_type(self) -> str: + """The operation to perform with this copy job. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type + """ + return self._get_sub_prop( + "operationType", OperationType.OPERATION_TYPE_UNSPECIFIED + ) + + @operation_type.setter + def operation_type(self, value: Optional[str]): + if value is None: + value = OperationType.OPERATION_TYPE_UNSPECIFIED + self._set_sub_prop("operationType", value) + class CopyJob(_AsyncJob): """Asynchronous job: copy data into a table from other tables. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a1c13c85d..765110ae6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -321,6 +321,7 @@ class Table(object): "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", "schema": "schema", + "snapshot_definition": "snapshotDefinition", "streaming_buffer": "streamingBuffer", "self_link": "selfLink", "table_id": ["tableReference", "tableId"], @@ -910,6 +911,19 @@ def external_data_configuration(self, value): self._PROPERTY_TO_API_FIELD["external_data_configuration"] ] = api_repr + @property + def snapshot_definition(self) -> Optional["SnapshotDefinition"]: + """Information about the snapshot. This value is set via snapshot creation. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition + """ + snapshot_info = self._properties.get( + self._PROPERTY_TO_API_FIELD["snapshot_definition"] + ) + if snapshot_info is not None: + snapshot_info = SnapshotDefinition(snapshot_info) + return snapshot_info + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. @@ -1274,6 +1288,29 @@ def __init__(self, resource): ) +class SnapshotDefinition: + """Information about base table and snapshot time of the snapshot. + + See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition + + Args: + resource: Snapshot definition representation returned from the API. + """ + + def __init__(self, resource: Dict[str, Any]): + self.base_table_reference = None + if "baseTableReference" in resource: + self.base_table_reference = TableReference.from_api_repr( + resource["baseTableReference"] + ) + + self.snapshot_time = None + if "snapshotTime" in resource: + self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime( + resource["snapshotTime"] + ) + + class Row(object): """A BigQuery row. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 460296b2f..7234333a2 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -394,7 +394,7 @@ def test_create_table_with_real_custom_policy(self): taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us" new_taxonomy = datacatalog_types.Taxonomy( - display_name="Custom test taxonomy", + display_name="Custom test taxonomy" + unique_resource_id(), description="This taxonomy is ony used for a test.", activated_policy_types=[ datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL @@ -2370,6 +2370,75 @@ def test_parameterized_types_round_trip(self): self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) + def test_table_snapshots(self): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{Config.DATASET}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + self.to_delete.insert(0, source_table) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + snapshot_table = client.get_table(snapshot_table_path) + self.to_delete.insert(0, snapshot_table) + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index fb0c87391..992efcf6b 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -28,18 +28,34 @@ def _get_target_class(): return CopyJobConfig + def test_ctor_defaults(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one() + + assert config.create_disposition is None + assert config.write_disposition is None + assert config.destination_encryption_configuration is None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + def test_ctor_w_properties(self): from google.cloud.bigquery.job import CreateDisposition + from google.cloud.bigquery.job import OperationType from google.cloud.bigquery.job import WriteDisposition create_disposition = CreateDisposition.CREATE_NEVER write_disposition = WriteDisposition.WRITE_TRUNCATE + snapshot_operation = OperationType.SNAPSHOT + config = self._get_target_class()( - create_disposition=create_disposition, write_disposition=write_disposition + create_disposition=create_disposition, + write_disposition=write_disposition, + operation_type=snapshot_operation, ) self.assertEqual(config.create_disposition, create_disposition) self.assertEqual(config.write_disposition, write_disposition) + self.assertEqual(config.operation_type, snapshot_operation) def test_to_api_repr_with_encryption(self): from google.cloud.bigquery.encryption_configuration import ( @@ -70,6 +86,22 @@ def test_to_api_repr_with_encryption_none(self): resource, {"copy": {"destinationEncryptionConfiguration": None}} ) + def test_operation_type_setting_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=OperationType.SNAPSHOT) + + # Setting it to None is the same as setting it to OPERATION_TYPE_UNSPECIFIED. + config.operation_type = None + assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED + + def test_operation_type_setting_non_none(self): + from google.cloud.bigquery.job import OperationType + + config = self._make_one(operation_type=None) + config.operation_type = OperationType.RESTORE + assert config.operation_type == OperationType.RESTORE + class TestCopyJob(_Base): JOB_TYPE = "copy" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index f4038835c..b30f16fe0 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -684,6 +684,40 @@ def test_props_set_by_server(self): self.assertEqual(table.full_table_id, TABLE_FULL_ID) self.assertEqual(table.table_type, "TABLE") + def test_snapshot_definition_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.snapshot_definition is None + + def test_snapshot_definition_set(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import SnapshotDefinition + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["snapshotDefinition"] = { + "baseTableReference": { + "projectId": "project_x", + "datasetId": "dataset_y", + "tableId": "table_z", + }, + "snapshotTime": "2010-09-28T10:20:30.123Z", + } + + snapshot = table.snapshot_definition + + assert isinstance(snapshot, SnapshotDefinition) + assert snapshot.base_table_reference.path == ( + "/projects/project_x/datasets/dataset_y/tables/table_z" + ) + assert snapshot.snapshot_time == datetime.datetime( + 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC + ) + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1509,6 +1543,46 @@ def test_to_api_repr(self): self.assertEqual(table.to_api_repr(), resource) +class TestSnapshotDefinition: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import SnapshotDefinition + + return SnapshotDefinition + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one(resource={}) + assert instance.base_table_reference is None + assert instance.snapshot_time is None + + def test_ctor_full_resource(self): + from google.cloud._helpers import UTC + from google.cloud.bigquery.table import TableReference + + resource = { + "baseTableReference": { + "projectId": "my-project", + "datasetId": "your-dataset", + "tableId": "our-table", + }, + "snapshotTime": "2005-06-07T19:35:02.123Z", + } + instance = self._make_one(resource) + + expected_table_ref = TableReference.from_string( + "my-project.your-dataset.our-table" + ) + assert instance.base_table_reference == expected_table_ref + + expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC) + assert instance.snapshot_time == expected_time + + class TestRow(unittest.TestCase): def test_row(self): from google.cloud.bigquery.table import Row From 67bc5fbd306be7cdffd216f3791d4024acfa95b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20Br=C3=A6dstrup?= <3591721+LinuxChristian@users.noreply.github.com> Date: Mon, 12 Jul 2021 21:21:24 +0200 Subject: [PATCH 151/230] fix: use pandas function to check for NaN (#750) * fix: use pandas function to check for NaN Starting with pandas 1.0, an experimental pandas.NA value (singleton) is available to represent scalar missing values as opposed to numpy.nan. Comparing the variable with itself results in a pandas.NA value that doesn't support type-casting to boolean. Using the build-in pandas.isna function handles all pandas supported NaN values. * tests: Skip tests if pandas below required version * tests: compare expected and actual directly as lists * Fix pytest.mark.skipif spelling Co-authored-by: Peter Lamut --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test__pandas_helpers.py | 40 ++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index e93a99eba..285c0e83c 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -780,7 +780,7 @@ def dataframe_to_json_generator(dataframe): output = {} for column, value in zip(dataframe.columns, row): # Omit NaN values. - if value != value: + if pandas.isna(value): continue output[column] = value yield output diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 39a3d845b..aa87e28f5 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -19,6 +19,7 @@ import operator import queue import warnings +import pkg_resources import mock @@ -47,6 +48,14 @@ except ImportError: # pragma: NO COVER bigquery_storage = None +PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0") + +if pandas is not None: + PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version +else: + # Set to less than MIN version. + PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") + skip_if_no_bignumeric = pytest.mark.skipif( not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", @@ -734,6 +743,37 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( assert columns_and_indexes == expected +@pytest.mark.skipif( + pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION, + reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA", +) +def test_dataframe_to_json_generator(module_under_test): + utcnow = datetime.datetime.utcnow() + df_data = collections.OrderedDict( + [ + ("a_series", [pandas.NA, 2, 3, 4]), + ("b_series", [0.1, float("NaN"), 0.3, 0.4]), + ("c_series", ["a", "b", pandas.NA, "d"]), + ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]), + ("e_series", [True, False, True, None]), + ] + ) + dataframe = pandas.DataFrame( + df_data, index=pandas.Index([4, 5, 6, 7], name="a_index") + ) + + dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()}) + + rows = module_under_test.dataframe_to_json_generator(dataframe) + expected = [ + {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True}, + {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False}, + {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True}, + {"a_series": 4, "b_series": 0.4, "c_series": "d"}, + ] + assert list(rows) == expected + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_list_columns_and_indexes_with_named_index(module_under_test): df_data = collections.OrderedDict( From 7771f34421654575c9eb2dbdef7ba0e9384beb39 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 13 Jul 2021 19:06:15 +0200 Subject: [PATCH 152/230] chore: release 2.21.0 (#753) Supersedes #711. ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-13) ### Features * Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- CHANGELOG.md | 24 ++++++++++++++++++++++++ docs/conf.py | 1 - google/cloud/bigquery/version.py | 2 +- 3 files changed, 25 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b08cd98c7..7344542b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,30 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) + + +### Features + +* Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) +* Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) +* Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) +* Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) +* Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) +* Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) + + +### Bug Fixes + +* Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) +* Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) + + +### Documentation + +* Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) +* Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) + ## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07) diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9fea4fece..563b0e160 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.20.0" +__version__ = "2.21.0" From dea92d36ffa4d4dcf5cbbddbbf439df049da9558 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 13 Jul 2021 19:42:22 +0200 Subject: [PATCH 153/230] chore(deps): update dependency google-cloud-bigquery to v2.21.0 (#755) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.20.0` -> `==2.21.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/compatibility-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.21.0/confidence-slim/2.20.0)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.21.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2210-httpswwwgithubcomgoogleapispython-bigquerycomparev2200v2210-2021-07-12) ##### Features - Add max_results parameter to some of the `QueryJob` methods. ([#​698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) - Add support for decimal target types. ([#​735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) - Add support for table snapshots. ([#​740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) - Enable unsetting policy tags on schema fields. ([#​703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) - Make it easier to disable best-effort deduplication with streaming inserts. ([#​734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) - Support passing struct data to the DB API. ([#​718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ##### Bug Fixes - Inserting non-finite floats with `insert_rows()`. ([#​728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) - Use `pandas` function to check for `NaN`. ([#​750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ##### Documentation - Add docs for all enums in module. ([#​745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) - Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#​706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 30a59c15a..c7aa209ad 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ce02ac7ed..b62c84c33 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.20.0 +google-cloud-bigquery==2.21.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From 481b86449e37e58a4f354343ed14f4dfd6ef60dd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 15:24:57 +0000 Subject: [PATCH 154/230] build(python): exit with success status if no samples found (#759) Source-Link: https://github.com/googleapis/synthtool/commit/53ea3896a52f87c758e79b5a19fa338c83925a98 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/test-samples-impl.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index e2b39f946..a5d3697f2 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:99d90d097e4a4710cc8658ee0b5b963f4426d0e424819787c3ac1405c9a26719 + digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index cf5de74c1..311a8d54b 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -20,9 +20,9 @@ set -eo pipefail # Enables `**` to include files nested inside sub-folders shopt -s globstar -# Exit early if samples directory doesn't exist -if [ ! -d "./samples" ]; then - echo "No tests run. `./samples` not found" +# Exit early if samples don't exist +if ! find samples -name 'requirements.txt' | grep -q .; then + echo "No tests run. './samples/**/requirements.txt' not found" exit 0 fi From 5437d443c3e89cf0458771662c9ef2d2f2f8e4d8 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 16:16:13 +0000 Subject: [PATCH 155/230] chore: release 2.21.0 (#760) :robot: I have created a release \*beep\* \*boop\* --- ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-14) ### Features * add always_use_jwt_access ([#714](https://www.github.com/googleapis/python-bigquery/issues/714)) ([92fbd4a](https://www.github.com/googleapis/python-bigquery/commit/92fbd4ade37e0be49dc278080ef73c83eafeea18)) * add max_results parameter to some of the QueryJob methods ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da)) * add support for decimal target types ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934)) * add support for table snapshots ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c)) * enable unsetting policy tags on schema fields ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7)) * make it easier to disable best-effort deduplication with streaming inserts ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1)) * Support passing struct data to the DB API ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c)) ### Bug Fixes * inserting non-finite floats with insert_rows() ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166)) * use pandas function to check for NaN ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3)) ### Documentation * add docs for all enums in module ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd)) * omit mention of Python 2.7 in `CONTRIBUTING.rst` ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7344542b4..5fba4c517 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) From 5deef6f1c548791ccbe6e8daf7fd60876d727a7c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 14 Jul 2021 12:49:33 -0400 Subject: [PATCH 156/230] build(python): remove python 3.7 from kokoro Dockerfile (#762) Source-Link: https://github.com/googleapis/synthtool/commit/e44dc0c742b1230887a73552357e0c18dcc30b92 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/docker/docs/Dockerfile | 35 ++-------------------------------- 2 files changed, 3 insertions(+), 34 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a5d3697f2..cb06536da 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:e1793a23ae0ee9aafb2e3a53b564a351f74790dbe3c2d75f8fc3b8c43e5c036c + digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile index 412b0b56a..4e1b1fb8b 100644 --- a/.kokoro/docker/docs/Dockerfile +++ b/.kokoro/docker/docs/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update \ libssl-dev \ libsqlite3-dev \ portaudio19-dev \ + python3-distutils \ redis-server \ software-properties-common \ ssh \ @@ -59,40 +60,8 @@ RUN apt-get update \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb - -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 3.7.8 3.8.5; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ - ; done \ - && rm -rf "${GNUPGHOME}" \ - && rm -rf /usr/src/python* \ - && rm -rf ~/.cache/ - RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.7 /tmp/get-pip.py \ && python3.8 /tmp/get-pip.py \ && rm /tmp/get-pip.py -CMD ["python3.7"] +CMD ["python3.8"] From 45b93089f5398740413104285cc8acfd5ebc9c08 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 21:20:27 +0200 Subject: [PATCH 157/230] fix: avoid possible job already exists error (#751) * fix: avoid possible job already exists error If job create request fails, a query job might still have started successfully. This commit handles this edge case and returns such query job one can be found. * Catch only Conflict errors on query job create --- google/cloud/bigquery/client.py | 26 +++++++++++- tests/unit/test_client.py | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2a02c7629..de259abce 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3190,6 +3190,7 @@ def query( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + job_id_given = job_id is not None job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -3221,9 +3222,30 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry, timeout=timeout) - return query_job + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job def insert_rows( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dffe7bdba..2be8daab6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4617,6 +4617,81 @@ def test_query_w_query_parameters(self): }, ) + def test_query_job_rpc_fail_w_random_error(self): + from google.api_core.exceptions import Unknown + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Unknown("Not sure what went wrong.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Unknown, match="Not sure what went wrong."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_job_id_given(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost yor job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails, the original exception should be raised. + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id=None) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): + from google.api_core.exceptions import Conflict + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", return_value=mock.sentinel.query_job + ) + + with job_begin_patcher, get_job_patcher: + result = client.query("SELECT 1;", job_id=None) + + assert result is mock.sentinel.query_job + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table From 2bb8de680d07b6b5b0bfd67ad2ef559886d38dca Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 14 Jul 2021 17:29:00 -0400 Subject: [PATCH 158/230] chore: expand range to allow 2.x versions (#768) api-core, cloud-core, and resumable-media wil all be releasing Python3-only 2.x versions shortly. Closes #767. --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index fcb1dd966..71958ccf9 100644 --- a/setup.py +++ b/setup.py @@ -30,10 +30,10 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 2.0.0dev", + "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 2.0dev", - "google-resumable-media >= 0.6.0, < 2.0dev", + "google-cloud-core >= 1.4.1, < 3.0dev", + "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", "requests >= 2.18.0, < 3.0.0dev", From 87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 14 Jul 2021 17:30:13 -0500 Subject: [PATCH 159/230] deps: allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` (#770) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Note**: PR is empty because this is purely to make sure CHANGELOG is updated. Follow-up to https://github.com/googleapis/python-bigquery/pull/768 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #767 🦕 From c45a7380871af3dfbd3c45524cb606c60e1a01d1 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 15 Jul 2021 04:19:25 -0500 Subject: [PATCH 160/230] feat: add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields (#736) * feat: add LoadJobConfig.projection_fields to select DATASTORE_BACKUP fields * add type annotations * annotate setter too Co-authored-by: Peter Lamut --- google/cloud/bigquery/job/load.py | 24 ++++++++++++++++++++++-- tests/unit/job/test_load_config.py | 11 +++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index bdee5cb6b..f1b045412 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -14,7 +14,7 @@ """Classes for load jobs.""" -from typing import FrozenSet, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.external_config import HivePartitioningOptions @@ -25,7 +25,6 @@ from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.table import TimePartitioning - from google.cloud.bigquery.job.base import _AsyncJob from google.cloud.bigquery.job.base import _JobConfig from google.cloud.bigquery.job.base import _JobReference @@ -300,6 +299,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def projection_fields(self) -> Optional[List[str]]: + """Optional[List[str]]: If + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to + "DATASTORE_BACKUP", indicates which entity properties to load into + BigQuery from a Cloud Datastore backup. + + Property names are case sensitive and must be top-level properties. If + no properties are specified, BigQuery loads all properties. If any + named property isn't found in the Cloud Datastore backup, an invalid + error is returned in the job result. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields + """ + return self._get_sub_prop("projectionFields") + + @projection_fields.setter + def projection_fields(self, value: Optional[List[str]]): + self._set_sub_prop("projectionFields", value) + @property def quote_character(self): """Optional[str]: Character used to quote data sections (CSV only). diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 190bd16dc..cbe087dac 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -424,6 +424,17 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_projection_fields_miss(self): + config = self._get_target_class()() + self.assertIsNone(config.projection_fields) + + def test_projection_fields_hit(self): + config = self._get_target_class()() + fields = ["email", "postal_code"] + config.projection_fields = fields + self.assertEqual(config._properties["load"]["projectionFields"], fields) + self.assertEqual(config.projection_fields, fields) + def test_quote_character_missing(self): config = self._get_target_class()() self.assertIsNone(config.quote_character) From 36fe86f41c1a8f46167284f752a6d6bbf886a04b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 15 Jul 2021 19:37:17 +0200 Subject: [PATCH 161/230] feat: add support for more detailed DML stats (#758) * feat: add support for more detailed DML stats * Move is None check of DmlStats one level higher --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 37 ++++++++++++++++ tests/system/test_client.py | 56 +++++++++++++++++++++++ tests/unit/job/test_query.py | 64 +++++++++++++++++++++++++++ tests/unit/job/test_query_stats.py | 37 ++++++++++++++++ 7 files changed, 199 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index cb2faa5ec..8c38d0c44 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -58,6 +58,7 @@ Job-Related Types job.Compression job.CreateDisposition job.DestinationFormat + job.DmlStats job.Encoding job.OperationType job.QueryPlanEntry diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 65dde5d94..ced8cefae 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -56,6 +56,7 @@ from google.cloud.bigquery.job import CopyJobConfig from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import DestinationFormat +from google.cloud.bigquery.job import DmlStats from google.cloud.bigquery.job import Encoding from google.cloud.bigquery.job import ExtractJob from google.cloud.bigquery.job import ExtractJobConfig @@ -142,6 +143,7 @@ "BigtableOptions", "BigtableColumnFamily", "BigtableColumn", + "DmlStats", "CSVOptions", "GoogleSheetsOptions", "ParquetOptions", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 6bdfa09be..4c16d0e20 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -31,6 +31,7 @@ from google.cloud.bigquery.job.load import LoadJob from google.cloud.bigquery.job.load import LoadJobConfig from google.cloud.bigquery.job.query import _contains_order_by +from google.cloud.bigquery.job.query import DmlStats from google.cloud.bigquery.job.query import QueryJob from google.cloud.bigquery.job.query import QueryJobConfig from google.cloud.bigquery.job.query import QueryPlanEntry @@ -66,6 +67,7 @@ "LoadJob", "LoadJobConfig", "_contains_order_by", + "DmlStats", "QueryJob", "QueryJobConfig", "QueryPlanEntry", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 6ff9f2647..d588e9b5a 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -114,6 +114,35 @@ def _to_api_repr_table_defs(value): return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()} +class DmlStats(typing.NamedTuple): + """Detailed statistics for DML statements. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats + """ + + inserted_row_count: int = 0 + """Number of inserted rows. Populated by DML INSERT and MERGE statements.""" + + deleted_row_count: int = 0 + """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements. + """ + + updated_row_count: int = 0 + """Number of updated rows. Populated by DML UPDATE and MERGE statements.""" + + @classmethod + def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": + # NOTE: The field order here must match the order of fields set at the + # class level. + api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount") + + args = ( + int(stats.get(api_field, default_val)) + for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) + ) + return cls(*args) + + class ScriptOptions: """Options controlling the execution of scripts. @@ -1079,6 +1108,14 @@ def estimated_bytes_processed(self): result = int(result) return result + @property + def dml_stats(self) -> Optional[DmlStats]: + stats = self._job_statistics().get("dmlStats") + if stats is None: + return None + else: + return DmlStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 7234333a2..cbca73619 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1521,6 +1521,62 @@ def test_query_statistics(self): self.assertGreater(stages_with_inputs, 0) self.assertGreater(len(plan), stages_with_inputs) + def test_dml_statistics(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id) + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 4 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 0 + + # Update some of the rows. + sql = f""" + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 2 + assert query_job.dml_stats.deleted_row_count == 0 + + # Now delete a few rows and check the stats. + sql = f""" + DELETE FROM `{table_id}` + WHERE foo != "two"; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + assert query_job.dml_stats is not None + assert query_job.dml_stats.inserted_row_count == 0 + assert query_job.dml_stats.updated_row_count == 0 + assert query_job.dml_stats.deleted_row_count == 3 + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4665933ea..482f7f3af 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -110,6 +110,24 @@ def _verify_table_definitions(self, job, config): self.assertIsNotNone(expected_ec) self.assertEqual(found_ec.to_api_repr(), expected_ec) + def _verify_dml_stats_resource_properties(self, job, resource): + query_stats = resource.get("statistics", {}).get("query", {}) + + if "dmlStats" in query_stats: + resource_dml_stats = query_stats["dmlStats"] + job_dml_stats = job.dml_stats + assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get( + "insertedRowCount", "0" + ) + assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get( + "updatedRowCount", "0" + ) + assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get( + "deletedRowCount", "0" + ) + else: + assert job.dml_stats is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -118,6 +136,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) + self._verify_dml_stats_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -130,16 +149,19 @@ def _verifyResourceProperties(self, job, resource): self._verify_table_definitions(job, query_config) self.assertEqual(job.query, query_config["query"]) + if "createDisposition" in query_config: self.assertEqual(job.create_disposition, query_config["createDisposition"]) else: self.assertIsNone(job.create_disposition) + if "defaultDataset" in query_config: ds_ref = job.default_dataset ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id} self.assertEqual(ds_ref, query_config["defaultDataset"]) else: self.assertIsNone(job.default_dataset) + if "destinationTable" in query_config: table = job.destination tb_ref = { @@ -150,14 +172,17 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(tb_ref, query_config["destinationTable"]) else: self.assertIsNone(job.destination) + if "priority" in query_config: self.assertEqual(job.priority, query_config["priority"]) else: self.assertIsNone(job.priority) + if "writeDisposition" in query_config: self.assertEqual(job.write_disposition, query_config["writeDisposition"]) else: self.assertIsNone(job.write_disposition) + if "destinationEncryptionConfiguration" in query_config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -166,6 +191,7 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "schemaUpdateOptions" in query_config: self.assertEqual( job.schema_update_options, query_config["schemaUpdateOptions"] @@ -190,6 +216,7 @@ def test_ctor_defaults(self): self.assertIsNone(job.create_disposition) self.assertIsNone(job.default_dataset) self.assertIsNone(job.destination) + self.assertIsNone(job.dml_stats) self.assertIsNone(job.flatten_results) self.assertIsNone(job.priority) self.assertIsNone(job.use_query_cache) @@ -278,6 +305,26 @@ def test_from_api_repr_with_encryption(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_dml_stats(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": { + "query": { + "dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"}, + }, + }, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption @@ -815,6 +862,23 @@ def test_estimated_bytes_processed(self): query_stats["estimatedBytesProcessed"] = str(est_bytes) self.assertEqual(job.estimated_bytes_processed, est_bytes) + def test_dml_stats(self): + from google.cloud.bigquery.job.query import DmlStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.dml_stats is None + + statistics = job._properties["statistics"] = {} + assert job.dml_stats is None + + query_stats = statistics["query"] = {} + assert job.dml_stats is None + + query_stats["dmlStats"] = {"insertedRowCount": "35"} + assert isinstance(job.dml_stats, DmlStats) + assert job.dml_stats.inserted_row_count == 35 + def test_result(self): from google.cloud.bigquery.table import RowIterator diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 09a0efc45..e70eb097c 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -15,6 +15,43 @@ from .helpers import _Base +class TestDmlStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import DmlStats + + return DmlStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + dml_stats = self._make_one() + assert dml_stats.inserted_row_count == 0 + assert dml_stats.deleted_row_count == 0 + assert dml_stats.updated_row_count == 0 + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr({"deletedRowCount": "12"}) + + assert isinstance(result, klass) + assert result.inserted_row_count == 0 + assert result.deleted_row_count == 12 + assert result.updated_row_count == 0 + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + result = klass.from_api_repr( + {"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"} + ) + + assert isinstance(result, klass) + assert result.inserted_row_count == 7 + assert result.deleted_row_count == 25 + assert result.updated_row_count == 4 + + class TestQueryPlanEntryStep(_Base): KIND = "KIND" SUBSTEPS = ("SUB1", "SUB2") From 4ff8bed5c3f13df1930afee244ed776b21551800 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 03:18:14 -0500 Subject: [PATCH 162/230] refactor: omit `read_session` with latest google-cloud-bigquery-storage (#748) * refactor: omit `read_session` with latest google-cloud-bigquery-storage `read_session` is unnecessary as of `google-cloud-bigquery-storage>=2.6.0`. This will allow us to more loudly deprecate the use of `rows(read_session)`. Rather than require 2.6.0, version switches will allow us to keep our requirements range wider. Will want to give this version some time to bake before making it required. * optimize _verify_bq_storage_version * fix failing tests due to optimization * fix unit tests * create BQStorageVersions class for version comparisons * add type annotations Also, use packaging directly, since that's all pkg_resources does https://github.com/pypa/setuptools/blob/a4dbe3457d89cf67ee3aa571fdb149e6eb544e88/pkg_resources/__init__.py\#L112 * allow legacy versions * fix coverage * fix coverage * add tests for version helpers --- google/cloud/bigquery/_helpers.py | 74 +++++++++++++++++------- google/cloud/bigquery/_pandas_helpers.py | 10 +++- google/cloud/bigquery/client.py | 4 +- google/cloud/bigquery/table.py | 2 +- tests/unit/test__helpers.py | 39 +++++++++++-- tests/unit/test__pandas_helpers.py | 69 ++++++++++++++++++++++ tests/unit/test_client.py | 4 +- tests/unit/test_magics.py | 2 +- tests/unit/test_table.py | 2 +- 9 files changed, 174 insertions(+), 32 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 77054542a..bf0f80e22 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -26,7 +26,7 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes -import pkg_resources +import packaging.version from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -41,31 +41,65 @@ re.VERBOSE, ) -_MIN_BQ_STORAGE_VERSION = pkg_resources.parse_version("2.0.0") +_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0") +_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0") -def _verify_bq_storage_version(): - """Verify that a recent enough version of BigQuery Storage extra is installed. +class BQStorageVersions: + """Version comparisons for google-cloud-bigqueyr-storage package.""" - The function assumes that google-cloud-bigquery-storage extra is installed, and - should thus be used in places where this assumption holds. + def __init__(self): + self._installed_version = None - Because `pip` can install an outdated version of this extra despite the constraints - in setup.py, the the calling code can use this helper to verify the version - compatibility at runtime. - """ - from google.cloud import bigquery_storage + @property + def installed_version(self) -> packaging.version.Version: + """Return the parsed version of google-cloud-bigquery-storage.""" + if self._installed_version is None: + from google.cloud import bigquery_storage - installed_version = pkg_resources.parse_version( - getattr(bigquery_storage, "__version__", "legacy") - ) + self._installed_version = packaging.version.parse( + # Use 0.0.0, since it is earlier than any released version. + # Legacy versions also have the same property, but + # creating a LegacyVersion has been deprecated. + # https://github.com/pypa/packaging/issues/321 + getattr(bigquery_storage, "__version__", "0.0.0") + ) - if installed_version < _MIN_BQ_STORAGE_VERSION: - msg = ( - "Dependency google-cloud-bigquery-storage is outdated, please upgrade " - f"it to version >= 2.0.0 (version found: {installed_version})." - ) - raise LegacyBigQueryStorageError(msg) + return self._installed_version + + @property + def is_read_session_optional(self) -> bool: + """True if read_session is optional to rows(). + + See: https://github.com/googleapis/python-bigquery-storage/pull/228 + """ + return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION + + def verify_version(self): + """Verify that a recent enough version of BigQuery Storage extra is + installed. + + The function assumes that google-cloud-bigquery-storage extra is + installed, and should thus be used in places where this assumption + holds. + + Because `pip` can install an outdated version of this extra despite the + constraints in `setup.py`, the calling code can use this helper to + verify the version compatibility at runtime. + + Raises: + LegacyBigQueryStorageError: + If the google-cloud-bigquery-storage package is outdated. + """ + if self.installed_version < _MIN_BQ_STORAGE_VERSION: + msg = ( + "Dependency google-cloud-bigquery-storage is outdated, please upgrade " + f"it to version >= 2.0.0 (version found: {self.installed_version})." + ) + raise LegacyBigQueryStorageError(msg) + + +BQ_STORAGE_VERSIONS = BQStorageVersions() def _not_null(value, field): diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 285c0e83c..2ff96da4d 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -41,6 +41,7 @@ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too. _ARROW_COMPRESSION_SUPPORT = True +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema @@ -590,7 +591,14 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - rowstream = bqstorage_client.read_rows(stream.name).rows(session) + reader = bqstorage_client.read_rows(stream.name) + + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) for page in rowstream.pages: if download_state.done: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index de259abce..8572ba911 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery._helpers import _get_sub_prop from google.cloud.bigquery._helpers import _record_field_to_json from google.cloud.bigquery._helpers import _str_or_none -from google.cloud.bigquery._helpers import _verify_bq_storage_version +from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS from google.cloud.bigquery._helpers import _verify_job_config_type from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _pandas_helpers @@ -508,7 +508,7 @@ def _ensure_bqstorage_client( return None try: - _verify_bq_storage_version() + BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return None diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 765110ae6..2d9c15f50 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1565,7 +1565,7 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False try: - _helpers._verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS.verify_version() except LegacyBigQueryStorageError as exc: warnings.warn(str(exc)) return False diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index c62947d37..af026ccbe 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -26,11 +26,17 @@ @unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`") -class Test_verify_bq_storage_version(unittest.TestCase): +class TestBQStorageVersions(unittest.TestCase): + def _object_under_test(self): + from google.cloud.bigquery import _helpers + + return _helpers.BQStorageVersions() + def _call_fut(self): - from google.cloud.bigquery._helpers import _verify_bq_storage_version + from google.cloud.bigquery import _helpers - return _verify_bq_storage_version() + _helpers.BQ_STORAGE_VERSIONS._installed_version = None + return _helpers.BQ_STORAGE_VERSIONS.verify_version() def test_raises_no_error_w_recent_bqstorage(self): from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError @@ -53,10 +59,35 @@ def test_raises_error_w_unknown_bqstorage_version(self): with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module: del fake_module.__version__ - error_pattern = r"version found: legacy" + error_pattern = r"version found: 0.0.0" with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern): self._call_fut() + def test_installed_version_returns_cached(self): + versions = self._object_under_test() + versions._installed_version = object() + assert versions.installed_version is versions._installed_version + + def test_installed_version_returns_parsed_version(self): + versions = self._object_under_test() + + with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"): + version = versions.installed_version + + assert version.major == 1 + assert version.minor == 2 + assert version.micro == 3 + + def test_is_read_session_optional_true(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"): + assert versions.is_read_session_optional + + def test_is_read_session_optional_false(self): + versions = self._object_under_test() + with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"): + assert not versions.is_read_session_optional + class Test_not_null(unittest.TestCase): def _call_fut(self, value, field): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index aa87e28f5..0ba671cd9 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -40,11 +40,14 @@ import pytz from google import api_core +from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage + + _helpers.BQ_STORAGE_VERSIONS.verify_version() except ImportError: # pragma: NO COVER bigquery_storage = None @@ -1311,6 +1314,72 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test): assert schema_arg == expected_schema_arg +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test__download_table_bqstorage_stream_includes_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with(session) + + +@pytest.mark.skipif( + bigquery_storage is None + or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional, + reason="Requires `google-cloud-bigquery-storage` >= 2.6.0", +) +def test__download_table_bqstorage_stream_omits_read_session( + monkeypatch, module_under_test +): + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None) + monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0") + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + bqstorage_client.read_rows.return_value = reader + session = google.cloud.bigquery_storage_v1.types.ReadSession() + + module_under_test._download_table_bqstorage_stream( + module_under_test._DownloadState(), + bqstorage_client, + session, + google.cloud.bigquery_storage_v1.types.ReadStream(name="test"), + queue.Queue(), + mock.Mock(), + ) + + reader.rows.assert_called_once_with() + + @pytest.mark.parametrize( "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize", [ diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 2be8daab6..6b62eb85b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -663,7 +663,7 @@ def test_ensure_bqstorage_client_obsolete_dependency(self): client = self._make_one(project=self.PROJECT, credentials=creds) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: @@ -700,7 +700,7 @@ def test_ensure_bqstorage_client_existing_client_check_fails(self): mock_storage_client = mock.sentinel.mock_storage_client patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 5e9bf28a9..d030482cc 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -368,7 +368,7 @@ def test__make_bqstorage_client_true_obsolete_dependency(): ) patcher = mock.patch( - "google.cloud.bigquery.client._verify_bq_storage_version", + "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b30f16fe0..37650cd27 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1889,7 +1889,7 @@ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self): iterator = self._make_one(first_page_response=None) # not cached patcher = mock.patch( - "google.cloud.bigquery.table._helpers._verify_bq_storage_version", + "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version", side_effect=LegacyBigQueryStorageError("BQ Storage too old"), ) with patcher, warnings.catch_warnings(record=True) as warned: From 22fd848cae4af1148040e1faa31dd15a4d674687 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 16 Jul 2021 12:02:15 -0500 Subject: [PATCH 163/230] docs: add loading data from Firestore backup sample (#737) Follow-up to https://github.com/googleapis/python-bigquery/pull/736 To be included here: https://cloud.google.com/bigquery/docs/loading-data-cloud-firestore Also * Use `google-cloud-testutils` for cleanup as described in https://github.com/googleapis/python-test-utils/pull/39 --- samples/snippets/conftest.py | 39 +++++-------- samples/snippets/load_table_uri_firestore.py | 55 +++++++++++++++++++ .../snippets/load_table_uri_firestore_test.py | 21 +++++++ samples/snippets/requirements-test.txt | 1 + samples/snippets/test_update_with_dml.py | 4 +- 5 files changed, 94 insertions(+), 26 deletions(-) create mode 100644 samples/snippets/load_table_uri_firestore.py create mode 100644 samples/snippets/load_table_uri_firestore_test.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index cb11eb68f..000e5f85c 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -12,38 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime -import random - from google.cloud import bigquery import pytest +import test_utils.prefixer -RESOURCE_PREFIX = "python_bigquery_samples_snippets" -RESOURCE_DATE_FORMAT = "%Y%m%d_%H%M%S" -RESOURCE_DATE_LENGTH = 4 + 2 + 2 + 1 + 2 + 2 + 2 - - -def resource_prefix() -> str: - timestamp = datetime.datetime.utcnow().strftime(RESOURCE_DATE_FORMAT) - random_string = hex(random.randrange(1000000))[2:] - return f"{RESOURCE_PREFIX}_{timestamp}_{random_string}" - - -def resource_name_to_date(resource_name: str): - start_date = len(RESOURCE_PREFIX) + 1 - date_string = resource_name[start_date : start_date + RESOURCE_DATE_LENGTH] - return datetime.datetime.strptime(date_string, RESOURCE_DATE_FORMAT) +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets") @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): - yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) for dataset in bigquery_client.list_datasets(): - if ( - dataset.dataset_id.startswith(RESOURCE_PREFIX) - and resource_name_to_date(dataset.dataset_id) < yesterday - ): + if prefixer.should_cleanup(dataset.dataset_id): bigquery_client.delete_dataset( dataset, delete_contents=True, not_found_ok=True ) @@ -62,7 +42,7 @@ def project_id(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client: bigquery.Client, project_id: str): - dataset_id = resource_prefix() + dataset_id = prefixer.create_prefix() full_dataset_id = f"{project_id}.{dataset_id}" dataset = bigquery.Dataset(full_dataset_id) bigquery_client.create_dataset(dataset) @@ -70,6 +50,17 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture +def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + """Create a new table ID each time, so random_table_id can be used as + target for load jobs. + """ + random_table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id}.{random_table_id}" + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def bigquery_client_patch(monkeypatch, bigquery_client): monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client) diff --git a/samples/snippets/load_table_uri_firestore.py b/samples/snippets/load_table_uri_firestore.py new file mode 100644 index 000000000..bf9d01349 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def load_table_uri_firestore(table_id): + orig_table_id = table_id + # [START bigquery_load_table_gcs_firestore] + # TODO(developer): Set table_id to the ID of the table to create. + table_id = "your-project.your_dataset.your_table_name" + + # TODO(developer): Set uri to the path of the kind export metadata + uri = ( + "gs://cloud-samples-data/bigquery/us-states" + "/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states" + "/all_namespaces_kind_us-states.export_metadata" + ) + + # TODO(developer): Set projection_fields to a list of document properties + # to import. Leave unset or set to `None` for all fields. + projection_fields = ["name", "post_abbr"] + + # [END bigquery_load_table_gcs_firestore] + table_id = orig_table_id + + # [START bigquery_load_table_gcs_firestore] + from google.cloud import bigquery + + # Construct a BigQuery client object. + client = bigquery.Client() + + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.DATASTORE_BACKUP, + projection_fields=projection_fields, + ) + + load_job = client.load_table_from_uri( + uri, table_id, job_config=job_config + ) # Make an API request. + + load_job.result() # Waits for the job to complete. + + destination_table = client.get_table(table_id) + print("Loaded {} rows.".format(destination_table.num_rows)) + # [END bigquery_load_table_gcs_firestore] diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py new file mode 100644 index 000000000..ffa02cdf9 --- /dev/null +++ b/samples/snippets/load_table_uri_firestore_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import load_table_uri_firestore + + +def test_load_table_uri_firestore(capsys, random_table_id): + load_table_uri_firestore.load_table_uri_firestore(random_table_id) + out, _ = capsys.readouterr() + assert "Loaded 50 rows." in out diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index b0cf76724..9e9d4e40f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,2 +1,3 @@ +google-cloud-testutils==0.3.0 pytest==6.2.4 mock==4.0.3 diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py index 3cca7a649..912fd76e2 100644 --- a/samples/snippets/test_update_with_dml.py +++ b/samples/snippets/test_update_with_dml.py @@ -15,13 +15,13 @@ from google.cloud import bigquery import pytest -from conftest import resource_prefix +from conftest import prefixer import update_with_dml @pytest.fixture def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): - table_id = f"{resource_prefix()}_update_with_dml" + table_id = f"{prefixer.create_prefix()}_update_with_dml" yield table_id full_table_id = f"{project_id}.{dataset_id}.{table_id}" bigquery_client.delete_table(full_table_id, not_found_ok=True) From b8b5433898ec881f8da1303614780a660d94733a Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 10:00:41 +0200 Subject: [PATCH 164/230] feat: add standard sql table type, update scalar type enums (#777) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add standard sql table type, update scalar type enums Committer: @shollyman PiperOrigin-RevId: 385164907 Source-Link: https://github.com/googleapis/googleapis/commit/9ae82b82bdb634058af4b2bafe53c37b8566f68d Source-Link: https://github.com/googleapis/googleapis-gen/commit/bc1724b0b544bdcd9b5b2f4e3d8676f75adacfdf * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * fix: exclude copying microgenerated '.coveragrc' * fix: add 'INTERVAL'/'JSON' to _SQL_SCALAR_TYPES Co-authored-by: Owl Bot Co-authored-by: Tres Seaver --- google/cloud/bigquery/enums.py | 2 ++ google/cloud/bigquery_v2/__init__.py | 2 ++ google/cloud/bigquery_v2/types/__init__.py | 2 ++ .../cloud/bigquery_v2/types/standard_sql.py | 19 ++++++++++++++++++- owlbot.py | 1 + 5 files changed, 25 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index ef35dffe0..0da01d665 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -191,9 +191,11 @@ class KeyResultStatementKind: "DATE", "TIME", "DATETIME", + "INTERVAL", "GEOGRAPHY", "NUMERIC", "BIGNUMERIC", + "JSON", ) ) diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py index 476bd5747..f9957efa9 100644 --- a/google/cloud/bigquery_v2/__init__.py +++ b/google/cloud/bigquery_v2/__init__.py @@ -26,6 +26,7 @@ from .types.standard_sql import StandardSqlDataType from .types.standard_sql import StandardSqlField from .types.standard_sql import StandardSqlStructType +from .types.standard_sql import StandardSqlTableType from .types.table_reference import TableReference __all__ = ( @@ -40,5 +41,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py index 9c850dca1..83bbb3a54 100644 --- a/google/cloud/bigquery_v2/types/__init__.py +++ b/google/cloud/bigquery_v2/types/__init__.py @@ -27,6 +27,7 @@ StandardSqlDataType, StandardSqlField, StandardSqlStructType, + StandardSqlTableType, ) from .table_reference import TableReference @@ -42,5 +43,6 @@ "StandardSqlDataType", "StandardSqlField", "StandardSqlStructType", + "StandardSqlTableType", "TableReference", ) diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py index b2191a417..7a845fc48 100644 --- a/google/cloud/bigquery_v2/types/standard_sql.py +++ b/google/cloud/bigquery_v2/types/standard_sql.py @@ -18,7 +18,12 @@ __protobuf__ = proto.module( package="google.cloud.bigquery.v2", - manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",}, + manifest={ + "StandardSqlDataType", + "StandardSqlField", + "StandardSqlStructType", + "StandardSqlTableType", + }, ) @@ -54,9 +59,11 @@ class TypeKind(proto.Enum): DATE = 10 TIME = 20 DATETIME = 21 + INTERVAL = 26 GEOGRAPHY = 22 NUMERIC = 23 BIGNUMERIC = 24 + JSON = 25 ARRAY = 16 STRUCT = 17 @@ -97,4 +104,14 @@ class StandardSqlStructType(proto.Message): fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) +class StandardSqlTableType(proto.Message): + r"""A table type + Attributes: + columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]): + The columns in this table type + """ + + columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",) + + __all__ = tuple(sorted(__protobuf__.manifest)) diff --git a/owlbot.py b/owlbot.py index 476c5ee5d..09845480a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -70,6 +70,7 @@ library, excludes=[ "*.tar.gz", + ".coveragerc", "docs/index.rst", f"docs/bigquery_{library.name}/*_service.rst", f"docs/bigquery_{library.name}/services.rst", From 8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 19 Jul 2021 22:39:44 +0200 Subject: [PATCH 165/230] feat: add support for user defined Table View Functions (#724) * Add auxiliary classes for TVF routines * Add return_table_type property to Routine * Add system test for TVF routines * Use the generated StandardSqlTableType class * Update docs with new changes * Add missing space in misc. Sphinx directives --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/job/query.py | 14 +-- google/cloud/bigquery/routine/__init__.py | 2 + google/cloud/bigquery/routine/routine.py | 45 ++++++++ google/cloud/bigquery/table.py | 14 +-- tests/system/test_client.py | 79 ++++++++++++++ tests/unit/routine/test_routine.py | 127 ++++++++++++++++++++++ 8 files changed, 270 insertions(+), 14 deletions(-) diff --git a/docs/reference.rst b/docs/reference.rst index 8c38d0c44..8a5bff9a4 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -118,6 +118,7 @@ Routine routine.Routine routine.RoutineArgument routine.RoutineReference + routine.RoutineType Schema ====== diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index ced8cefae..222aadcc9 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -85,6 +85,7 @@ from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference +from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import PartitionRange from google.cloud.bigquery.table import RangePartitioning @@ -162,6 +163,7 @@ "KeyResultStatementKind", "OperationType", "QueryPriority", + "RoutineType", "SchemaUpdateOption", "SourceFormat", "SqlTypeNames", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index d588e9b5a..2cb7ee28e 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1386,12 +1386,12 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: pyarrow.Table @@ -1403,7 +1403,7 @@ def to_arrow( ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_arrow( @@ -1452,7 +1452,7 @@ def to_dataframe( :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` for details. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1461,18 +1461,18 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 max_results (Optional[int]): Maximum number of rows to include in the result. No limit by default. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Returns: A :class:`~pandas.DataFrame` populated with row data and column diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index d1c79b05e..7353073c8 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -19,6 +19,7 @@ from google.cloud.bigquery.routine.routine import Routine from google.cloud.bigquery.routine.routine import RoutineArgument from google.cloud.bigquery.routine.routine import RoutineReference +from google.cloud.bigquery.routine.routine import RoutineType __all__ = ( @@ -26,4 +27,5 @@ "Routine", "RoutineArgument", "RoutineReference", + "RoutineType", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index bbc0a7693..a776212c3 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -21,6 +21,21 @@ import google.cloud._helpers from google.cloud.bigquery import _helpers import google.cloud.bigquery_v2.types +from google.cloud.bigquery_v2.types import StandardSqlTableType + + +class RoutineType: + """The fine-grained type of the routine. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype + + .. versionadded:: 2.22.0 + """ + + ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED" + SCALAR_FUNCTION = "SCALAR_FUNCTION" + PROCEDURE = "PROCEDURE" + TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION" class Routine(object): @@ -48,6 +63,7 @@ class Routine(object): "modified": "lastModifiedTime", "reference": "routineReference", "return_type": "returnType", + "return_table_type": "returnTableType", "type_": "routineType", "description": "description", "determinism_level": "determinismLevel", @@ -204,6 +220,35 @@ def return_type(self, value): resource = None self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource + @property + def return_table_type(self) -> StandardSqlTableType: + """The return type of a Table Valued Function (TVF) routine. + + .. versionadded:: 2.22.0 + """ + resource = self._properties.get( + self._PROPERTY_TO_API_FIELD["return_table_type"] + ) + if not resource: + return resource + + output = google.cloud.bigquery_v2.types.StandardSqlTableType() + raw_protobuf = json_format.ParseDict( + resource, output._pb, ignore_unknown_fields=True + ) + return type(output).wrap(raw_protobuf) + + @return_table_type.setter + def return_table_type(self, value): + if not value: + resource = None + else: + resource = { + "columns": [json_format.MessageToDict(col._pb) for col in value.columns] + } + + self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource + @property def imported_libraries(self): """List[str]: The path of the imported JavaScript libraries. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 2d9c15f50..18d969a3f 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1684,7 +1684,7 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 Returns: pyarrow.Table @@ -1695,7 +1695,7 @@ def to_arrow( Raises: ValueError: If the :mod:`pyarrow` library cannot be imported. - ..versionadded:: 1.17.0 + .. versionadded:: 1.17.0 """ if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) @@ -1775,7 +1775,7 @@ def to_dataframe_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. - ..versionadded:: 2.14.0 + .. versionadded:: 2.14.0 Returns: pandas.DataFrame: @@ -1861,7 +1861,7 @@ def to_dataframe( Use the :func:`tqdm.tqdm_gui` function to display a progress bar as a graphical dialog box. - ..versionadded:: 1.11.0 + .. versionadded:: 1.11.0 create_bqstorage_client (Optional[bool]): If ``True`` (default), create a BigQuery Storage API client using the default API settings. The BigQuery Storage API @@ -1870,13 +1870,13 @@ def to_dataframe( This argument does nothing if ``bqstorage_client`` is supplied. - ..versionadded:: 1.24.0 + .. versionadded:: 1.24.0 date_as_object (Optional[bool]): If ``True`` (default), cast dates to objects. If ``False``, convert to datetime64[ns] dtype. - ..versionadded:: 1.26.0 + .. versionadded:: 1.26.0 Returns: pandas.DataFrame: @@ -2010,7 +2010,7 @@ def to_dataframe_iterable( ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. - ..versionadded:: 2.21.0 + .. versionadded:: 2.21.0 Args: bqstorage_client: diff --git a/tests/system/test_client.py b/tests/system/test_client.py index cbca73619..ceb62b8cd 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2228,6 +2228,85 @@ def test_create_routine(self): assert len(rows) == 1 assert rows[0].max_value == 100.0 + def test_create_tvf_routine(self): + from google.cloud.bigquery import Routine, RoutineArgument, RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + INT64 = StandardSqlDataType.TypeKind.INT64 + STRING = StandardSqlDataType.TypeKind.STRING + + client = Config.CLIENT + + dataset = self.temp_dataset(_make_dataset_id("create_tvf_routine")) + routine_ref = dataset.routine("test_tvf_routine") + + routine_body = """ + SELECT int_col, str_col + FROM ( + UNNEST([1, 2, 3]) int_col + JOIN + (SELECT str_col FROM UNNEST(["one", "two", "three"]) str_col) + ON TRUE + ) + WHERE int_col > threshold + """ + + return_table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", type=StandardSqlDataType(type_kind=INT64), + ), + StandardSqlField( + name="str_col", type=StandardSqlDataType(type_kind=STRING), + ), + ] + ) + + routine_args = [ + RoutineArgument( + name="threshold", data_type=StandardSqlDataType(type_kind=INT64), + ) + ] + + routine_def = Routine( + routine_ref, + type_=RoutineType.TABLE_VALUED_FUNCTION, + arguments=routine_args, + return_table_type=return_table_type, + body=routine_body, + ) + + # Create TVF routine. + client.delete_routine(routine_ref, not_found_ok=True) + routine = client.create_routine(routine_def) + + assert routine.body == routine_body + assert routine.return_table_type == return_table_type + assert routine.arguments == routine_args + + # Execute the routine to see if it's working as expected. + query_job = client.query( + f""" + SELECT int_col, str_col + FROM `{routine.reference}`(1) + ORDER BY int_col, str_col ASC + """ + ) + + result_rows = [tuple(row) for row in query_job.result()] + expected = [ + (2, "one"), + (2, "three"), + (2, "two"), + (3, "one"), + (3, "three"), + (3, "two"), + ] + assert result_rows == expected + def test_create_table_rows_fetch_nested_schema(self): table_name = "test_table" dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema")) diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index 0a59e7c5f..fdaf13324 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -156,12 +156,86 @@ def test_from_api_repr(target_class): assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType( type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64 ) + assert actual_routine.return_table_type is None assert actual_routine.type_ == "SCALAR_FUNCTION" assert actual_routine._properties["someNewField"] == "someValue" assert actual_routine.description == "A routine description." assert actual_routine.determinism_level == "DETERMINISTIC" +def test_from_api_repr_tvf_function(target_class): + from google.cloud.bigquery.routine import RoutineArgument + from google.cloud.bigquery.routine import RoutineReference + from google.cloud.bigquery.routine import RoutineType + + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + creation_time = datetime.datetime( + 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + modified_time = datetime.datetime( + 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC + ) + resource = { + "routineReference": { + "projectId": "my-project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + "etag": "abcdefg", + "creationTime": str(google.cloud._helpers._millis(creation_time)), + "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)), + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a", + "arguments": [{"name": "a", "dataType": {"typeKind": "INT64"}}], + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "someNewField": "someValue", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC, + } + actual_routine = target_class.from_api_repr(resource) + + assert actual_routine.project == "my-project" + assert actual_routine.dataset_id == "my_dataset" + assert actual_routine.routine_id == "my_routine" + assert ( + actual_routine.path + == "/projects/my-project/datasets/my_dataset/routines/my_routine" + ) + assert actual_routine.reference == RoutineReference.from_string( + "my-project.my_dataset.my_routine" + ) + assert actual_routine.etag == "abcdefg" + assert actual_routine.created == creation_time + assert actual_routine.modified == modified_time + assert actual_routine.arguments == [ + RoutineArgument( + name="a", + data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a" + assert actual_routine.language == "SQL" + assert actual_routine.return_type is None + assert actual_routine.return_table_type == StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ) + ] + ) + assert actual_routine.type_ == RoutineType.TABLE_VALUED_FUNCTION + assert actual_routine._properties["someNewField"] == "someValue" + assert actual_routine.description == "A routine description." + assert actual_routine.determinism_level == "DETERMINISTIC" + + def test_from_api_repr_w_minimal_resource(target_class): from google.cloud.bigquery.routine import RoutineReference @@ -261,6 +335,24 @@ def test_from_api_repr_w_unknown_fields(target_class): ["return_type"], {"returnType": {"typeKind": "INT64"}}, ), + ( + { + "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > 1", + "language": "SQL", + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + }, + "routineType": "TABLE_VALUED_FUNCTION", + "description": "A routine description.", + "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED, + }, + ["return_table_type"], + { + "returnTableType": { + "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}] + } + }, + ), ( { "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}], @@ -361,6 +453,41 @@ def test_set_return_type_w_none(object_under_test): assert object_under_test._properties["returnType"] is None +def test_set_return_table_type_w_none(object_under_test): + object_under_test.return_table_type = None + assert object_under_test.return_table_type is None + assert object_under_test._properties["returnTableType"] is None + + +def test_set_return_table_type_w_not_none(object_under_test): + StandardSqlDataType = bigquery_v2.types.StandardSqlDataType + StandardSqlField = bigquery_v2.types.StandardSqlField + StandardSqlTableType = bigquery_v2.types.StandardSqlTableType + + table_type = StandardSqlTableType( + columns=[ + StandardSqlField( + name="int_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64), + ), + StandardSqlField( + name="str_col", + type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING), + ), + ] + ) + + object_under_test.return_table_type = table_type + + assert object_under_test.return_table_type == table_type + assert object_under_test._properties["returnTableType"] == { + "columns": [ + {"name": "int_col", "type": {"typeKind": "INT64"}}, + {"name": "str_col", "type": {"typeKind": "STRING"}}, + ] + } + + def test_set_description_w_none(object_under_test): object_under_test.description = None assert object_under_test.description is None From 5802092bef4cc1627f4568694fd56b6aa16507ff Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 Jul 2021 20:58:25 +0000 Subject: [PATCH 166/230] chore: release 2.22.0 (#771) :robot: I have created a release \*beep\* \*boop\* --- ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) ### Features * add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) * add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) * add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) * add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) ### Bug Fixes * avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) ### Dependencies * allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) ### Documentation * add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 25 +++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5fba4c517..2439d64b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,31 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) + + +### Features + +* add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1)) +* add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a)) +* add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b)) +* add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3)) + + +### Bug Fixes + +* avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08)) + + +### Dependencies + +* allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65)) + + +### Documentation + +* add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687)) + ## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 563b0e160..2db0ca518 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.21.0" +__version__ = "2.22.0" From 7a55a7789a5d3f8f5e4f1293e1cdccc374ea03b7 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Tue, 20 Jul 2021 02:07:00 -0600 Subject: [PATCH 167/230] chore: add note to preserve >1, <3 version range for google-api-core, google-cloud-core (#784) * fix(deps): pin 'google-{api,cloud}-core', 'google-auth' to allow 2.x versions * Update setup.py --- setup.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 71958ccf9..0ca19b576 100644 --- a/setup.py +++ b/setup.py @@ -30,9 +30,15 @@ release_status = "Development Status :: 5 - Production/Stable" dependencies = [ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695 - "google-api-core[grpc] >= 1.29.0, < 3.0.0dev", + # NOTE: Maintainers, please do not require google-api-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-api-core[grpc] >= 1.29.0, <3.0.0dev", "proto-plus >= 1.10.0", - "google-cloud-core >= 1.4.1, < 3.0dev", + # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x + # Until this issue is closed + # https://github.com/googleapis/google-cloud-python/issues/10566 + "google-cloud-core >= 1.4.1, <3.0.0dev", "google-resumable-media >= 0.6.0, < 3.0dev", "packaging >= 14.3", "protobuf >= 3.12.0", From d1cbc3817a4b93f61356bd14ba51fb176e5d0269 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 20 Jul 2021 10:07:30 +0200 Subject: [PATCH 168/230] chore(deps): update dependency google-cloud-bigquery to v2.22.0 (#783) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c7aa209ad..d70ac3fa4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index b62c84c33..7b4721eac 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.21.0 +google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.38.1 From e403721af1373eb1f1a1c7be5b2182e3819ed1f9 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 21 Jul 2021 18:59:49 +0200 Subject: [PATCH 169/230] fix: issue a warning if buggy pyarrow is detected (#787) Some pyarrow versions can cause issue when loading data from dataframe. This commit detects if such pyarrow version is installed and warns the user. --- google/cloud/bigquery/client.py | 15 +++++++++++++ tests/unit/test_client.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8572ba911..273cf5f77 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -27,6 +27,7 @@ import json import math import os +import packaging.version import tempfile from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union import uuid @@ -34,6 +35,8 @@ try: import pyarrow + + _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None @@ -118,6 +121,9 @@ # https://github.com/googleapis/python-bigquery/issues/438 _MIN_GET_QUERY_RESULTS_TIMEOUT = 120 +# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414 +_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")]) + class Project(object): """Wrapper for resource describing a BigQuery project. @@ -2609,6 +2615,15 @@ def load_table_from_dataframe( try: if job_config.source_format == job.SourceFormat.PARQUET: + if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS: + msg = ( + "Loading dataframe data in PARQUET format with pyarrow " + f"{_PYARROW_VERSION} can result in data corruption. It is " + "therefore *strongly* advised to use a different pyarrow " + "version or a different source format. " + "See: https://github.com/googleapis/python-bigquery/issues/781" + ) + warnings.warn(msg, category=RuntimeWarning) if job_config.schema: if parquet_compression == "snappy": # adjust the default value diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6b62eb85b..c1aba9b67 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -27,6 +27,7 @@ import warnings import mock +import packaging import requests import pytest import pytz @@ -7510,6 +7511,42 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self): parquet_compression="gzip", ) + def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self): + pytest.importorskip("pandas", reason="Requires `pandas`") + pytest.importorskip("pyarrow", reason="Requires `pyarrow`") + + client = self._make_client() + records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}] + dataframe = pandas.DataFrame(records) + + pyarrow_version_patch = mock.patch( + "google.cloud.bigquery.client._PYARROW_VERSION", + packaging.version.parse("2.0.0"), # A known bad version of pyarrow. + ) + get_table_patch = mock.patch( + "google.cloud.bigquery.client.Client.get_table", + autospec=True, + side_effect=google.api_core.exceptions.NotFound("Table not found"), + ) + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch, get_table_patch, pyarrow_version_patch: + with warnings.catch_warnings(record=True) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, location=self.LOCATION, + ) + + expected_warnings = [ + warning for warning in warned if "pyarrow" in str(warning).lower() + ] + assert len(expected_warnings) == 1 + assert issubclass(expected_warnings[0].category, RuntimeWarning) + msg = str(expected_warnings[0].message) + assert "pyarrow 2.0.0" in msg + assert "data corruption" in msg + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self): From e58702967d572e83b4c774278818302594a511b7 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 21 Jul 2021 19:37:06 +0200 Subject: [PATCH 170/230] feat: add Samples section to CONTRIBUTING.rst (#785) Source-Link: https://github.com/googleapis/synthtool/commit/52e4e46eff2a0b70e3ff5506a02929d089d077d4 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- CONTRIBUTING.rst | 24 ++++++++++++++++++++++++ samples/geography/noxfile.py | 5 +++-- samples/snippets/noxfile.py | 5 +++-- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cb06536da..d57f74204 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:5ff7446edeaede81c3ed58b23a4e76a5403fba1350ce28478045657303b6479d + digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 102355b3a..2faf5aed3 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -177,6 +177,30 @@ Build the docs via: $ nox -s docs +************************* +Samples and code snippets +************************* + +Code samples and snippets live in the `samples/` catalogue. Feel free to +provide more examples, but make sure to write tests for those examples. +Each folder containing example code requires its own `noxfile.py` script +which automates testing. If you decide to create a new folder, you can +base it on the `samples/snippets` folder (providing `noxfile.py` and +the requirements files). + +The tests will run against a real Google Cloud Project, so you should +configure them just like the System Tests. + +- To run sample tests, you can execute:: + + # Run all tests in a folder + $ cd samples/snippets + $ nox -s py-3.8 + + # Run a single sample test + $ cd samples/snippets + $ nox -s py-3.8 -- -k + ******************************************** Note About ``README`` as it pertains to PyPI ******************************************** diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 160fe7286..9fc7f1782 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 160fe7286..9fc7f1782 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -28,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -159,7 +160,7 @@ def lint(session: nox.sessions.Session) -> None: @nox.session def blacken(session: nox.sessions.Session) -> None: - session.install("black") + session.install(BLACK_VERSION) python_files = [path for path in os.listdir(".") if path.endswith(".py")] session.run("black", *python_files) From 46e65a6338b7c59acad895edebb97fd2e841d4a3 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jul 2021 08:44:11 -0500 Subject: [PATCH 171/230] chore: release 2.22.1 (#794) Release-As: 2.22.1 From be9b242f2180f5b795dfb3a168a97af1682999fd Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 22 Jul 2021 10:21:02 -0500 Subject: [PATCH 172/230] docs: add sample to delete job metadata (#798) Planned to be included in https://cloud.google.com/bigquery/docs/managing-jobs --- samples/snippets/conftest.py | 25 ++++++++++++++++ samples/snippets/delete_job.py | 44 +++++++++++++++++++++++++++++ samples/snippets/delete_job_test.py | 33 ++++++++++++++++++++++ tests/system/test_client.py | 17 ----------- 4 files changed, 102 insertions(+), 17 deletions(-) create mode 100644 samples/snippets/delete_job.py create mode 100644 samples/snippets/delete_job_test.py diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py index 000e5f85c..74984f902 100644 --- a/samples/snippets/conftest.py +++ b/samples/snippets/conftest.py @@ -50,6 +50,31 @@ def dataset_id(bigquery_client: bigquery.Client, project_id: str): bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) +@pytest.fixture(scope="session") +def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str): + dataset_id = prefixer.create_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + dataset.location = "us-east1" + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture(scope="session") +def table_id_us_east1( + bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str +): + table_id = prefixer.create_prefix() + full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}" + table = bigquery.Table( + full_table_id, schema=[bigquery.SchemaField("string_col", "STRING")] + ) + bigquery_client.create_table(table) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + @pytest.fixture def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): """Create a new table ID each time, so random_table_id can be used as diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py new file mode 100644 index 000000000..abed0c90d --- /dev/null +++ b/samples/snippets/delete_job.py @@ -0,0 +1,44 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_job_metadata(job_id: str, location: str): + orig_job_id = job_id + orig_location = location + # [START bigquery_delete_job] + from google.cloud import bigquery + from google.api_core import exceptions + + # TODO(developer): Set the job ID to the ID of the job whose metadata you + # wish to delete. + job_id = "abcd-efgh-ijkl-mnop" + + # TODO(developer): Set the location to the region or multi-region + # containing the job. + location = "us-east1" + + # [END bigquery_delete_job] + job_id = orig_job_id + location = orig_location + + # [START bigquery_delete_job] + client = bigquery.Client() + + client.delete_job_metadata(job_id, location=location) + + try: + client.get_job(job_id, location=location) + except exceptions.NotFound: + print(f"Job metadata for job {location}:{job_id} was deleted.") + # [END bigquery_delete_job] diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py new file mode 100644 index 000000000..c9baa817d --- /dev/null +++ b/samples/snippets/delete_job_test.py @@ -0,0 +1,33 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +import delete_job + + +def test_delete_job_metadata( + capsys, bigquery_client: bigquery.Client, table_id_us_east1: str +): + query_job: bigquery.QueryJob = bigquery_client.query( + f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1", + ) + query_job.result() + assert query_job.job_id is not None + + delete_job.delete_job_metadata(query_job.job_id, "us-east1") + + out, _ = capsys.readouterr() + assert "deleted" in out + assert f"us-east1:{query_job.job_id}" in out diff --git a/tests/system/test_client.py b/tests/system/test_client.py index ceb62b8cd..2536af9fc 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -63,7 +63,6 @@ from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table from google.cloud._helpers import UTC from google.cloud.bigquery import dbapi, enums @@ -506,22 +505,6 @@ def test_delete_dataset_delete_contents_false(self): with self.assertRaises(exceptions.BadRequest): Config.CLIENT.delete_dataset(dataset) - def test_delete_job_metadata(self): - dataset_id = _make_dataset_id("us_east1") - self.temp_dataset(dataset_id, location="us-east1") - full_table_id = f"{Config.CLIENT.project}.{dataset_id}.test_delete_job_metadata" - table = Table(full_table_id, schema=[SchemaField("col", "STRING")]) - Config.CLIENT.create_table(table) - query_job: bigquery.QueryJob = Config.CLIENT.query( - f"SELECT COUNT(*) FROM `{full_table_id}`", location="us-east1", - ) - query_job.result() - self.assertIsNotNone(Config.CLIENT.get_job(query_job)) - - Config.CLIENT.delete_job_metadata(query_job) - with self.assertRaises(NotFound): - Config.CLIENT.get_job(query_job) - def test_get_table_w_public_dataset(self): public = "bigquery-public-data" dataset_id = "samples" From f0990f2cd27b3a71040d67b4d335f3daef1120cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 22 Jul 2021 17:43:54 +0200 Subject: [PATCH 173/230] chore(deps): update dependency grpcio to v1.39.0 (#796) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 7b4721eac..3d9dce718 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,7 +1,7 @@ google-cloud-bigquery==2.22.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 -grpcio==1.38.1 +grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' From b804373277c1c1baa3370ebfb4783503b7ff360f Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Thu, 22 Jul 2021 14:36:30 -0400 Subject: [PATCH 174/230] fix: use a larger chunk size when loading data (#799) * The chunk size used for data uploads was too small (1MB). Now it's 100MB. * fix: The chunk size used for data uploads was too small --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 273cf5f77..742ecac2e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -98,7 +98,7 @@ from google.cloud.bigquery.table import RowIterator -_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB +_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 _BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c1aba9b67..535685511 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8076,3 +8076,23 @@ def test_schema_to_json_with_file_object(self): client.schema_to_json(schema_list, fake_file) assert file_content == json.loads(fake_file.getvalue()) + + +def test_upload_chunksize(client): + with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU: + upload = RU.return_value + + upload.finished = False + + def transmit_next_chunk(transport): + upload.finished = True + result = mock.MagicMock() + result.json.return_value = {} + return result + + upload.transmit_next_chunk = transmit_next_chunk + f = io.BytesIO() + client.load_table_from_file(f, "foo.bar") + + chunk_size = RU.call_args_list[0][0][1] + assert chunk_size == 100 * (1 << 20) From f20ee503f395b0443b570efb56c75b0b40d31179 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 22 Jul 2021 14:26:01 -0500 Subject: [PATCH 175/230] chore: release 2.22.1 (#797) * chore: release 2.22.1 * remove misc Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2439d64b0..7dbc5d4da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) + + +### Bug Fixes + +* issue a warning if buggy pyarrow is detected ([#787](https://www.github.com/googleapis/python-bigquery/issues/787)) ([e403721](https://www.github.com/googleapis/python-bigquery/commit/e403721af1373eb1f1a1c7be5b2182e3819ed1f9)) +* use a larger chunk size when loading data ([#799](https://www.github.com/googleapis/python-bigquery/issues/799)) ([b804373](https://www.github.com/googleapis/python-bigquery/commit/b804373277c1c1baa3370ebfb4783503b7ff360f)) + + +### Documentation + +* add Samples section to CONTRIBUTING.rst ([#785](https://www.github.com/googleapis/python-bigquery/issues/785)) ([e587029](https://www.github.com/googleapis/python-bigquery/commit/e58702967d572e83b4c774278818302594a511b7)) +* add sample to delete job metadata ([#798](https://www.github.com/googleapis/python-bigquery/issues/798)) ([be9b242](https://www.github.com/googleapis/python-bigquery/commit/be9b242f2180f5b795dfb3a168a97af1682999fd)) + ## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 2db0ca518..dbc524478 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.0" +__version__ = "2.22.1" From 7e0e2bafc4c3f98a4246100f504fd78a01a28e7d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Fri, 23 Jul 2021 00:48:22 +0200 Subject: [PATCH 176/230] chore(deps): update dependency google-cloud-bigquery to v2.22.1 (#800) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d70ac3fa4..3a83eda64 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 3d9dce718..ffa689a9e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.0 +google-cloud-bigquery==2.22.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From 419d36d6b1887041e5795dbc8fc808890e91ab11 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 23 Jul 2021 16:42:21 +0200 Subject: [PATCH 177/230] fix: retry ChunkedEncodingError by default (#802) --- google/cloud/bigquery/retry.py | 1 + tests/unit/test_retry.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 5e9075fe1..2df4de08b 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -27,6 +27,7 @@ exceptions.TooManyRequests, exceptions.InternalServerError, exceptions.BadGateway, + requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, auth_exceptions.TransportError, ) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 0bef1e5e1..6fb7f93fd 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -51,6 +51,10 @@ def test_w_unstructured_requests_connectionerror(self): exc = requests.exceptions.ConnectionError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_chunked_encoding_error(self): + exc = requests.exceptions.ChunkedEncodingError() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 16af7b0c93f3945af95123f4f9affd55ffa1f98d Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 23 Jul 2021 15:32:40 +0000 Subject: [PATCH 178/230] chore: fix kokoro config for samples (#804) Source-Link: https://github.com/googleapis/synthtool/commit/dd05f9d12f134871c9e45282349c9856fbebecdd Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b --- .github/.OwlBot.lock.yaml | 2 +- .kokoro/samples/python3.6/periodic-head.cfg | 2 +- .kokoro/samples/python3.7/periodic-head.cfg | 2 +- .kokoro/samples/python3.8/periodic-head.cfg | 2 +- .kokoro/samples/python3.9/periodic-head.cfg | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index d57f74204..9ee60f7e4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:6186535cbdbf6b9fe61f00294929221d060634dae4a0795c1cefdbc995b2d605 + digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.6/periodic-head.cfg +++ b/.kokoro/samples/python3.6/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.7/periodic-head.cfg +++ b/.kokoro/samples/python3.7/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.8/periodic-head.cfg +++ b/.kokoro/samples/python3.8/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } diff --git a/.kokoro/samples/python3.9/periodic-head.cfg b/.kokoro/samples/python3.9/periodic-head.cfg index f9cfcd33e..5aa01bab5 100644 --- a/.kokoro/samples/python3.9/periodic-head.cfg +++ b/.kokoro/samples/python3.9/periodic-head.cfg @@ -7,5 +7,5 @@ env_vars: { env_vars: { key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-pubsub/.kokoro/test-samples-against-head.sh" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" } From eef6c8e4cc6fbd9c442605447e60242f67d48a7e Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Sun, 25 Jul 2021 09:51:55 -0400 Subject: [PATCH 179/230] test: Stop creating extra datasets (#791) --- tests/system/conftest.py | 20 +++- tests/system/test_client.py | 222 +++++++++++++++++------------------- 2 files changed, 122 insertions(+), 120 deletions(-) diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 4eef60e92..7b389013f 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -14,13 +14,25 @@ import pytest +from google.cloud import bigquery +import test_utils.prefixer + from . import helpers +prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client): + for dataset in bigquery_client.list_datasets(): + if prefixer.should_cleanup(dataset.dataset_id): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + @pytest.fixture(scope="session") def bigquery_client(): - from google.cloud import bigquery - return bigquery.Client() @@ -33,10 +45,10 @@ def bqstorage_client(bigquery_client): @pytest.fixture(scope="session") def dataset_id(bigquery_client): - dataset_id = f"bqsystem_{helpers.temp_suffix()}" + dataset_id = prefixer.create_prefix() bigquery_client.create_dataset(dataset_id) yield dataset_id - bigquery_client.delete_dataset(dataset_id, delete_contents=True) + bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True) @pytest.fixture diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 2536af9fc..baa2b6ad8 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -153,7 +153,6 @@ class Config(object): CLIENT: Optional[bigquery.Client] = None CURSOR = None - DATASET = None def setUpModule(): @@ -163,9 +162,7 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): - Config.DATASET = _make_dataset_id("bq_system_tests") - dataset = Config.CLIENT.create_dataset(Config.DATASET) - self.to_delete = [dataset] + self.to_delete = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() @@ -1605,20 +1602,6 @@ def test_dbapi_fetchall_from_script(self): row_tuples = [r.values() for r in rows] self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")]) - def test_dbapi_create_view(self): - - query = """ - CREATE VIEW {}.dbapi_create_view - AS SELECT name, SUM(number) AS total - FROM `bigquery-public-data.usa_names.usa_1910_2013` - GROUP BY name; - """.format( - Config.DATASET - ) - - Config.CURSOR.execute(query) - self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows") - @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) @@ -2459,104 +2442,6 @@ def test_list_rows_page_size(self): page = next(pages) self.assertEqual(page.num_items, num_last_page) - def test_parameterized_types_round_trip(self): - client = Config.CLIENT - table_id = f"{Config.DATASET}.test_parameterized_types_round_trip" - fields = ( - ("n", "NUMERIC"), - ("n9", "NUMERIC(9)"), - ("n92", "NUMERIC(9, 2)"), - ("bn", "BIGNUMERIC"), - ("bn9", "BIGNUMERIC(38)"), - ("bn92", "BIGNUMERIC(38, 22)"), - ("s", "STRING"), - ("s9", "STRING(9)"), - ("b", "BYTES"), - ("b9", "BYTES(9)"), - ) - self.to_delete.insert(0, Table(f"{client.project}.{table_id}")) - client.query( - "create table {} ({})".format( - table_id, ", ".join(" ".join(f) for f in fields) - ) - ).result() - table = client.get_table(table_id) - table_id2 = table_id + "2" - self.to_delete.insert(0, Table(f"{client.project}.{table_id2}")) - client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) - table2 = client.get_table(table_id2) - - self.assertEqual(tuple(s._key()[:2] for s in table2.schema), fields) - - def test_table_snapshots(self): - from google.cloud.bigquery import CopyJobConfig - from google.cloud.bigquery import OperationType - - client = Config.CLIENT - - source_table_path = f"{client.project}.{Config.DATASET}.test_table" - snapshot_table_path = f"{source_table_path}_snapshot" - - # Create the table before loading so that the column order is predictable. - schema = [ - bigquery.SchemaField("foo", "INTEGER"), - bigquery.SchemaField("bar", "STRING"), - ] - source_table = helpers.retry_403(Config.CLIENT.create_table)( - Table(source_table_path, schema=schema) - ) - self.to_delete.insert(0, source_table) - - # Populate the table with initial data. - rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] - load_job = Config.CLIENT.load_table_from_json(rows, source_table) - load_job.result() - - # Now create a snapshot before modifying the original table data. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.SNAPSHOT - - copy_job = client.copy_table( - sources=source_table_path, - destination=snapshot_table_path, - job_config=copy_config, - ) - copy_job.result() - - snapshot_table = client.get_table(snapshot_table_path) - self.to_delete.insert(0, snapshot_table) - - # Modify data in original table. - sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' - query_job = client.query(sql) - query_job.result() - - # List rows from the source table and compare them to rows from the snapshot. - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two"), (3, "three")] - - rows_iter = client.list_rows(snapshot_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - - # Now restore the table from the snapshot and it should again contain the old - # set of rows. - copy_config = CopyJobConfig() - copy_config.operation_type = OperationType.RESTORE - copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE - - copy_job = client.copy_table( - sources=snapshot_table_path, - destination=source_table_path, - job_config=copy_config, - ) - copy_job.result() - - rows_iter = client.list_rows(source_table_path) - rows = sorted(row.values() for row in rows_iter) - assert rows == [(1, "one"), (2, "two")] - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) @@ -2587,3 +2472,108 @@ def _table_exists(t): return True except NotFound: return False + + +def test_dbapi_create_view(dataset_id): + + query = f""" + CREATE VIEW {dataset_id}.dbapi_create_view + AS SELECT name, SUM(number) AS total + FROM `bigquery-public-data.usa_names.usa_1910_2013` + GROUP BY name; + """ + + Config.CURSOR.execute(query) + assert Config.CURSOR.rowcount == 0, "expected 0 rows" + + +def test_parameterized_types_round_trip(dataset_id): + client = Config.CLIENT + table_id = f"{dataset_id}.test_parameterized_types_round_trip" + fields = ( + ("n", "NUMERIC"), + ("n9", "NUMERIC(9)"), + ("n92", "NUMERIC(9, 2)"), + ("bn", "BIGNUMERIC"), + ("bn9", "BIGNUMERIC(38)"), + ("bn92", "BIGNUMERIC(38, 22)"), + ("s", "STRING"), + ("s9", "STRING(9)"), + ("b", "BYTES"), + ("b9", "BYTES(9)"), + ) + client.query( + "create table {} ({})".format(table_id, ", ".join(" ".join(f) for f in fields)) + ).result() + table = client.get_table(table_id) + table_id2 = table_id + "2" + client.create_table(Table(f"{client.project}.{table_id2}", table.schema)) + table2 = client.get_table(table_id2) + + assert tuple(s._key()[:2] for s in table2.schema) == fields + + +def test_table_snapshots(dataset_id): + from google.cloud.bigquery import CopyJobConfig + from google.cloud.bigquery import OperationType + + client = Config.CLIENT + + source_table_path = f"{client.project}.{dataset_id}.test_table" + snapshot_table_path = f"{source_table_path}_snapshot" + + # Create the table before loading so that the column order is predictable. + schema = [ + bigquery.SchemaField("foo", "INTEGER"), + bigquery.SchemaField("bar", "STRING"), + ] + source_table = helpers.retry_403(Config.CLIENT.create_table)( + Table(source_table_path, schema=schema) + ) + + # Populate the table with initial data. + rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}] + load_job = Config.CLIENT.load_table_from_json(rows, source_table) + load_job.result() + + # Now create a snapshot before modifying the original table data. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.SNAPSHOT + + copy_job = client.copy_table( + sources=source_table_path, + destination=snapshot_table_path, + job_config=copy_config, + ) + copy_job.result() + + # Modify data in original table. + sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")' + query_job = client.query(sql) + query_job.result() + + # List rows from the source table and compare them to rows from the snapshot. + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two"), (3, "three")] + + rows_iter = client.list_rows(snapshot_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] + + # Now restore the table from the snapshot and it should again contain the old + # set of rows. + copy_config = CopyJobConfig() + copy_config.operation_type = OperationType.RESTORE + copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE + + copy_job = client.copy_table( + sources=snapshot_table_path, + destination=source_table_path, + job_config=copy_config, + ) + copy_job.result() + + rows_iter = client.list_rows(source_table_path) + rows = sorted(row.values() for row in rows_iter) + assert rows == [(1, "one"), (2, "two")] From da87fd921cc8067b187d7985c978aac8eb58d107 Mon Sep 17 00:00:00 2001 From: mgorsk1 Date: Mon, 26 Jul 2021 19:44:38 +0200 Subject: [PATCH 180/230] docs: correct docs for `LoadJobConfig.destination_table_description` (#810) Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #811. --- google/cloud/bigquery/job/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index f1b045412..aee055c1c 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -170,7 +170,7 @@ def destination_encryption_configuration(self, value): @property def destination_table_description(self): - """Optional[str]: Name given to destination table. + """Optional[str]: Description of the destination table. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description From c293e3c914cd0cfe3da34b99330fd6d87a5f534e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 11:21:21 -0500 Subject: [PATCH 181/230] tests: add system tests for `to_arrow` with extreme values (#813) * tests: add system tests for `to_arrow` with extreme values * fix bad merge * revert pandas tests * revert pandas tests * fix link to decimal types Co-authored-by: Peter Lamut * use north and south pole as extreme geography points * add another row of extreme values * base64 encode bytes columns Co-authored-by: Peter Lamut --- google/cloud/bigquery/_pandas_helpers.py | 2 + tests/data/scalars.jsonl | 2 + tests/data/scalars_extreme.jsonl | 5 ++ tests/data/scalars_schema.json | 62 +++++++++++++++++ tests/system/conftest.py | 48 ++++++++++++- tests/system/test_arrow.py | 88 ++++++++++++++++++++++++ 6 files changed, 205 insertions(+), 2 deletions(-) create mode 100644 tests/data/scalars.jsonl create mode 100644 tests/data/scalars_extreme.jsonl create mode 100644 tests/data/scalars_schema.json create mode 100644 tests/system/test_arrow.py diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 2ff96da4d..b381fa5f7 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -93,6 +93,8 @@ def pyarrow_numeric(): def pyarrow_bignumeric(): + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types return pyarrow.decimal256(76, 38) diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl new file mode 100644 index 000000000..4419a6e9a --- /dev/null +++ b/tests/data/scalars.jsonl @@ -0,0 +1,2 @@ +{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl new file mode 100644 index 000000000..ceccd8dbc --- /dev/null +++ b/tests/data/scalars_extreme.jsonl @@ -0,0 +1,5 @@ +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json new file mode 100644 index 000000000..00bd150fd --- /dev/null +++ b/tests/data/scalars_schema.json @@ -0,0 +1,62 @@ +[ + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" + }, + { + "mode": "NULLABLE", + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "float64_col", + "type": "FLOAT" + }, + { + "mode": "NULLABLE", + "name": "datetime_col", + "type": "DATETIME" + }, + { + "mode": "NULLABLE", + "name": "bignumeric_col", + "type": "BIGNUMERIC" + }, + { + "mode": "NULLABLE", + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "NULLABLE", + "name": "geography_col", + "type": "GEOGRAPHY" + }, + { + "mode": "NULLABLE", + "name": "date_col", + "type": "DATE" + }, + { + "mode": "NULLABLE", + "name": "string_col", + "type": "STRING" + }, + { + "mode": "NULLABLE", + "name": "bool_col", + "type": "BOOLEAN" + }, + { + "mode": "NULLABLE", + "name": "bytes_col", + "type": "BYTES" + }, + { + "mode": "NULLABLE", + "name": "int64_col", + "type": "INTEGER" + } +] diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 7b389013f..cc2c2a4dc 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -12,15 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest +import pathlib -from google.cloud import bigquery +import pytest import test_utils.prefixer +from google.cloud import bigquery +from google.cloud.bigquery import enums from . import helpers + prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system") +DATA_DIR = pathlib.Path(__file__).parent.parent / "data" + @pytest.fixture(scope="session", autouse=True) def cleanup_datasets(bigquery_client: bigquery.Client): @@ -36,6 +41,11 @@ def bigquery_client(): return bigquery.Client() +@pytest.fixture(scope="session") +def project_id(bigquery_client: bigquery.Client): + return bigquery_client.project + + @pytest.fixture(scope="session") def bqstorage_client(bigquery_client): from google.cloud import bigquery_storage @@ -54,3 +64,37 @@ def dataset_id(bigquery_client): @pytest.fixture def table_id(dataset_id): return f"{dataset_id}.table_{helpers.temp_suffix()}" + + +@pytest.fixture(scope="session") +def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars" + with open(DATA_DIR / "scalars.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) + + +@pytest.fixture(scope="session") +def scalars_extreme_table( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json") + job_config = bigquery.LoadJobConfig() + job_config.schema = schema + job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON + full_table_id = f"{project_id}.{dataset_id}.scalars_extreme" + with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file: + job = bigquery_client.load_table_from_file( + data_file, full_table_id, job_config=job_config + ) + job.result() + yield full_table_id + bigquery_client.delete_table(full_table_id) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py new file mode 100644 index 000000000..f97488e39 --- /dev/null +++ b/tests/system/test_arrow.py @@ -0,0 +1,88 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""System tests for Arrow connector.""" + +import pytest + +pyarrow = pytest.importorskip( + "pyarrow", minversion="3.0.0" +) # Needs decimal256 for BIGNUMERIC columns. + + +@pytest.mark.parametrize( + ("max_results", "scalars_table_name"), + ( + (None, "scalars_table"), # Use BQ Storage API. + (10, "scalars_table"), # Use REST API. + (None, "scalars_extreme_table"), # Use BQ Storage API. + (10, "scalars_extreme_table"), # Use REST API. + ), +) +def test_list_rows_nullable_scalars_dtypes( + bigquery_client, + scalars_table, + scalars_extreme_table, + max_results, + scalars_table_name, +): + table_id = scalars_table + if scalars_table_name == "scalars_extreme_table": + table_id = scalars_extreme_table + arrow_table = bigquery_client.list_rows( + table_id, max_results=max_results, + ).to_arrow() + + schema = arrow_table.schema + bignumeric_type = schema.field("bignumeric_col").type + # 77th digit is partial. + # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types + assert bignumeric_type.precision in {76, 77} + assert bignumeric_type.scale == 38 + + bool_type = schema.field("bool_col").type + assert bool_type.equals(pyarrow.bool_()) + + bytes_type = schema.field("bytes_col").type + assert bytes_type.equals(pyarrow.binary()) + + date_type = schema.field("date_col").type + assert date_type.equals(pyarrow.date32()) + + datetime_type = schema.field("datetime_col").type + assert datetime_type.unit == "us" + assert datetime_type.tz is None + + float64_type = schema.field("float64_col").type + assert float64_type.equals(pyarrow.float64()) + + geography_type = schema.field("geography_col").type + assert geography_type.equals(pyarrow.string()) + + int64_type = schema.field("int64_col").type + assert int64_type.equals(pyarrow.int64()) + + numeric_type = schema.field("numeric_col").type + assert numeric_type.precision == 38 + assert numeric_type.scale == 9 + + string_type = schema.field("string_col").type + assert string_type.equals(pyarrow.string()) + + time_type = schema.field("time_col").type + assert time_type.equals(pyarrow.time64("us")) + + timestamp_type = schema.field("timestamp_col").type + assert timestamp_type.unit == "us" + assert timestamp_type.tz is not None From 3b70891135f5fe32dcd12210ff4faa51ac53742d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Tue, 27 Jul 2021 13:04:10 -0500 Subject: [PATCH 182/230] chore: protect v3.x.x branch (#816) * chore: protect v3.x.x branch In preparation for breaking changes. * force pattern to be a string * simplify branch name --- .github/sync-repo-settings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index b18fb9c29..2697f214c 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,7 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: master +- pattern: '{master,v3}' requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 3c1be149e76b1d1d8879fdcf0924ddb1c1839e94 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 27 Jul 2021 20:08:37 +0200 Subject: [PATCH 183/230] fix: no longer raise a warning in `to_dataframe` if `max_results` set (#815) That warning should only be used when BQ Storage client is explicitly passed in to RowIterator methods when max_results value is also set. --- google/cloud/bigquery/table.py | 30 +++++-- tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++-- 2 files changed, 179 insertions(+), 11 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 18d969a3f..daade1ac6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1552,11 +1552,6 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): return False if self.max_results is not None: - warnings.warn( - "Cannot use bqstorage_client if max_results is set, " - "reverting to fetching data with the REST endpoint.", - stacklevel=2, - ) return False try: @@ -1604,6 +1599,25 @@ def total_rows(self): """int: The total number of rows in the table.""" return self._total_rows + def _maybe_warn_max_results( + self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], + ): + """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. + + This helper method should be used directly in the relevant top-level public + methods, so that the warning is issued for the correct line in user code. + + Args: + bqstorage_client: + The BigQuery Storage client intended to use for downloading result rows. + """ + if bqstorage_client is not None and self.max_results is not None: + warnings.warn( + "Cannot use bqstorage_client if max_results is set, " + "reverting to fetching data with the REST endpoint.", + stacklevel=3, + ) + def _to_page_iterable( self, bqstorage_download, tabledata_list_download, bqstorage_client=None ): @@ -1700,6 +1714,8 @@ def to_arrow( if pyarrow is None: raise ValueError(_NO_PYARROW_ERROR) + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None @@ -1790,6 +1806,8 @@ def to_dataframe_iterable( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + column_names = [field.name for field in self._schema] bqstorage_download = functools.partial( _pandas_helpers.download_dataframe_bqstorage, @@ -1896,6 +1914,8 @@ def to_dataframe( if dtypes is None: dtypes = {} + self._maybe_warn_max_results(bqstorage_client) + if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): create_bqstorage_client = False bqstorage_client = None diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 37650cd27..4b1fd833b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -15,6 +15,7 @@ import datetime import logging import time +import types import unittest import warnings @@ -1862,6 +1863,15 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self): ) ) + def test__validate_bqstorage_returns_false_if_max_results_set(self): + iterator = self._make_one( + max_results=10, first_page_response=None # not cached + ) + result = iterator._validate_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__validate_bqstorage_returns_false_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached @@ -2105,7 +2115,7 @@ def test_to_arrow_w_empty_table(self): @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" ) - def test_to_arrow_max_results_w_create_bqstorage_warning(self): + def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2119,6 +2129,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2129,7 +2140,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_arrow(create_bqstorage_client=True) + row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2139,6 +2150,49 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_arrow(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @@ -2372,7 +2426,6 @@ def test_to_arrow_w_pyarrow_none(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2415,7 +2468,6 @@ def test_to_dataframe_iterable(self): @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_iterable_with_dtypes(self): from google.cloud.bigquery.schema import SchemaField - import types schema = [ SchemaField("name", "STRING", mode="REQUIRED"), @@ -2527,6 +2579,61 @@ def test_to_dataframe_iterable_w_bqstorage(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf( + bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" + ) + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + + iterator_schema = [ + schema.SchemaField("name", "STRING", mode="REQUIRED"), + schema.SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + path = "/foo" + api_request = mock.Mock( + side_effect=[ + { + "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}], + "pageToken": "NEXTPAGE", + }, + {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]}, + ] + ) + row_iterator = mut.RowIterator( + _mock_client(), + api_request, + path, + iterator_schema, + table=mut.TableReference.from_string("proj.dset.tbl"), + selected_fields=iterator_schema, + max_results=25, + ) + + with warnings.catch_warnings(record=True) as warned: + dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client) + + # Was a warning emitted? + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + assert len(matches) == 1, "User warning was not emitted." + assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel" + + # Basic check of what we got as a result. + dataframes = list(dfs) + assert len(dataframes) == 2 + assert isinstance(dataframes[0], pandas.DataFrame) + assert isinstance(dataframes[1], pandas.DataFrame) + @mock.patch("google.cloud.bigquery.table.pandas", new=None) def test_to_dataframe_iterable_error_if_pandas_is_none(self): from google.cloud.bigquery.schema import SchemaField @@ -2926,7 +3033,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self): self.assertEqual(len(matches), 1, msg="User warning was not emitted.") @unittest.skipIf(pandas is None, "Requires `pandas`") - def test_to_dataframe_max_results_w_create_bqstorage_warning(self): + def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self): from google.cloud.bigquery.schema import SchemaField schema = [ @@ -2940,6 +3047,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): path = "/foo" api_request = mock.Mock(return_value={"rows": rows}) mock_client = _mock_client() + mock_bqstorage_client = mock.sentinel.bq_storage_client row_iterator = self._make_one( client=mock_client, @@ -2950,7 +3058,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): ) with warnings.catch_warnings(record=True) as warned: - row_iterator.to_dataframe(create_bqstorage_client=True) + row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client) matches = [ warning @@ -2960,6 +3068,46 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self): and "REST" in str(warning) ] self.assertEqual(len(matches), 1, msg="User warning was not emitted.") + self.assertIn( + __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel" + ) + mock_client._ensure_bqstorage_client.assert_not_called() + + @unittest.skipIf(pandas is None, "Requires `pandas`") + def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): + from google.cloud.bigquery.schema import SchemaField + + schema = [ + SchemaField("name", "STRING", mode="REQUIRED"), + SchemaField("age", "INTEGER", mode="REQUIRED"), + ] + rows = [ + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ] + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + mock_client = _mock_client() + + row_iterator = self._make_one( + client=mock_client, + api_request=api_request, + path=path, + schema=schema, + max_results=42, + ) + + with warnings.catch_warnings(record=True) as warned: + row_iterator.to_dataframe(create_bqstorage_client=True) + + matches = [ + warning + for warning in warned + if warning.category is UserWarning + and "cannot use bqstorage_client" in str(warning).lower() + and "REST" in str(warning) + ] + self.assertFalse(matches) mock_client._ensure_bqstorage_client.assert_not_called() @unittest.skipIf(pandas is None, "Requires `pandas`") From fe7a902e8b3e723ace335c9b499aea6d180a025b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:14:09 +0000 Subject: [PATCH 184/230] feat: Update proto definitions for bigquery/v2 to support new proto fields for BQML. (#817) PiperOrigin-RevId: 387137741 Source-Link: https://github.com/googleapis/googleapis/commit/8962c92e97495d0795b427d4aa4326b0d06e33eb Source-Link: https://github.com/googleapis/googleapis-gen/commit/102f1b4277cc5a049663535d9eeb77831b67de25 --- google/cloud/bigquery_v2/types/model.py | 104 ++++++++++++++++-- .../bigquery_v2/types/table_reference.py | 12 ++ 2 files changed, 107 insertions(+), 9 deletions(-) diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py index 17e101d25..706418401 100644 --- a/google/cloud/bigquery_v2/types/model.py +++ b/google/cloud/bigquery_v2/types/model.py @@ -96,6 +96,8 @@ class Model(proto.Message): Output only. Label columns that were used to train this model. The output of the model will have a `predicted_` prefix to these columns. + best_trial_id (int): + The best trial_id across all training runs. """ class ModelType(proto.Enum): @@ -113,6 +115,7 @@ class ModelType(proto.Enum): ARIMA = 11 AUTOML_REGRESSOR = 12 AUTOML_CLASSIFIER = 13 + ARIMA_PLUS = 19 class LossType(proto.Enum): r"""Loss metric to evaluate model training performance.""" @@ -151,6 +154,7 @@ class DataFrequency(proto.Enum): WEEKLY = 5 DAILY = 6 HOURLY = 7 + PER_MINUTE = 8 class HolidayRegion(proto.Enum): r"""Type of supported holiday regions for time series forecasting @@ -285,7 +289,7 @@ class RegressionMetrics(proto.Message): median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue): Median absolute error. r_squared (google.protobuf.wrappers_pb2.DoubleValue): - R^2 score. + R^2 score. This corresponds to r2_score in ML.EVALUATE. """ mean_absolute_error = proto.Field( @@ -528,7 +532,7 @@ class ClusteringMetrics(proto.Message): Mean of squared distances between each sample to its cluster centroid. clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]): - [Beta] Information for all clusters. + Information for all clusters. """ class Cluster(proto.Message): @@ -697,10 +701,29 @@ class ArimaSingleModelForecastingMetrics(proto.Message): Is arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -711,9 +734,19 @@ class ArimaSingleModelForecastingMetrics(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=3,) time_series_id = proto.Field(proto.STRING, number=4,) + time_series_ids = proto.RepeatedField(proto.STRING, number=9,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) non_seasonal_order = proto.RepeatedField( proto.MESSAGE, number=1, message="Model.ArimaOrder", @@ -901,7 +934,7 @@ class TrainingRun(proto.Message): """ class TrainingOptions(proto.Message): - r""" + r"""Options used in model training. Attributes: max_iterations (int): The maximum number of iterations in training. @@ -972,8 +1005,9 @@ class TrainingOptions(proto.Message): num_clusters (int): Number of clusters for clustering models. model_uri (str): - [Beta] Google Cloud Storage URI from which the model was - imported. Only applicable for imported models. + Google Cloud Storage URI from which the model + was imported. Only applicable for imported + models. optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy): Optimization strategy for training linear regression models. @@ -1030,8 +1064,11 @@ class TrainingOptions(proto.Message): If a valid value is specified, then holiday effects modeling is enabled. time_series_id_column (str): - The id column that will be used to indicate - different time series to forecast in parallel. + The time series id column that was used + during ARIMA model training. + time_series_id_columns (Sequence[str]): + The time series id columns that were used + during ARIMA model training. horizon (int): The number of periods ahead that need to be forecasted. @@ -1042,6 +1079,15 @@ class TrainingOptions(proto.Message): output feature name is A.b. auto_arima_max_order (int): The max value of non-seasonal p and q. + decompose_time_series (google.protobuf.wrappers_pb2.BoolValue): + If true, perform decompose time series and + save the results. + clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, clean spikes and dips in the input + time series. + adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, detect step changes and make data + adjustment in the input time series. """ max_iterations = proto.Field(proto.INT64, number=1,) @@ -1120,9 +1166,19 @@ class TrainingOptions(proto.Message): proto.ENUM, number=42, enum="Model.HolidayRegion", ) time_series_id_column = proto.Field(proto.STRING, number=43,) + time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,) horizon = proto.Field(proto.INT64, number=44,) preserve_input_structs = proto.Field(proto.BOOL, number=45,) auto_arima_max_order = proto.Field(proto.INT64, number=46,) + decompose_time_series = proto.Field( + proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue, + ) + clean_spikes_and_dips = proto.Field( + proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue, + ) + adjust_step_changes = proto.Field( + proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue, + ) class IterationResult(proto.Message): r"""Information about a single iteration of the training run. @@ -1218,10 +1274,29 @@ class ArimaModelInfo(proto.Message): Whether Arima model fitted with drift or not. It is always false when d is not 1. time_series_id (str): - The id to indicate different time series. + The time_series_id value for this time series. It will be + one of the unique values from the time_series_id_column + specified during ARIMA model training. Only present when + time_series_id_column training option was used. + time_series_ids (Sequence[str]): + The tuple of time_series_ids identifying this time series. + It will be one of the unique tuples of values present in the + time_series_id_columns specified during ARIMA model + training. Only present when time_series_id_columns training + option was used and the order of values here are same as the + order of time_series_id_columns. seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]): Seasonal periods. Repeated because multiple periods are supported for one time series. + has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue): + If true, holiday_effect is a part of time series + decomposition result. + has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue): + If true, spikes_and_dips is a part of time series + decomposition result. + has_step_changes (google.protobuf.wrappers_pb2.BoolValue): + If true, step_changes is a part of time series decomposition + result. """ non_seasonal_order = proto.Field( @@ -1237,11 +1312,21 @@ class ArimaModelInfo(proto.Message): ) has_drift = proto.Field(proto.BOOL, number=4,) time_series_id = proto.Field(proto.STRING, number=5,) + time_series_ids = proto.RepeatedField(proto.STRING, number=10,) seasonal_periods = proto.RepeatedField( proto.ENUM, number=6, enum="Model.SeasonalPeriod.SeasonalPeriodType", ) + has_holiday_effect = proto.Field( + proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue, + ) + has_spikes_and_dips = proto.Field( + proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue, + ) + has_step_changes = proto.Field( + proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue, + ) arima_model_info = proto.RepeatedField( proto.MESSAGE, @@ -1319,6 +1404,7 @@ class ArimaModelInfo(proto.Message): label_columns = proto.RepeatedField( proto.MESSAGE, number=11, message=standard_sql.StandardSqlField, ) + best_trial_id = proto.Field(proto.INT64, number=19,) class GetModelRequest(proto.Message): diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py index a0a8ee4c9..d56e5b09f 100644 --- a/google/cloud/bigquery_v2/types/table_reference.py +++ b/google/cloud/bigquery_v2/types/table_reference.py @@ -36,11 +36,23 @@ class TableReference(proto.Message): maximum length is 1,024 characters. Certain operations allow suffixing of the table ID with a partition decorator, such as ``sample_table$20190123``. + project_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + dataset_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. + table_id_alternative (Sequence[str]): + The alternative field that will be used when ESF is not able + to translate the received data to the project_id field. """ project_id = proto.Field(proto.STRING, number=1,) dataset_id = proto.Field(proto.STRING, number=2,) table_id = proto.Field(proto.STRING, number=3,) + project_id_alternative = proto.RepeatedField(proto.STRING, number=4,) + dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,) + table_id_alternative = proto.RepeatedField(proto.STRING, number=6,) __all__ = tuple(sorted(__protobuf__.manifest)) From 02bbdaebb40be771124d397cb45545f1bf697548 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 Jul 2021 13:50:04 -0500 Subject: [PATCH 185/230] chore: release 2.23.0 (#819) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dbc5d4da..966a8744a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) + + +### Features + +* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b)) + + +### Bug Fixes + +* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94)) +* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11)) + + +### Documentation + +* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107)) + ### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index dbc524478..416bf20ed 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.22.1" +__version__ = "2.23.0" From 42b66d34b979c87cc98b8984a8abe74edda753ac Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 16:30:44 +0200 Subject: [PATCH 186/230] chore(deps): update dependency google-cloud-bigquery to v2.23.0 (#820) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3a83eda64..0f9c3a2e3 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index ffa689a9e..81ef4df2f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.22.1 +google-cloud-bigquery==2.23.0 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d9378af13add879118a1d004529b811f72c325d6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 28 Jul 2021 17:18:18 +0200 Subject: [PATCH 187/230] fix: `insert_rows()` accepts float column values as strings again (#824) --- google/cloud/bigquery/_helpers.py | 12 +++++++----- tests/unit/test__helpers.py | 24 ++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index bf0f80e22..0a1f71444 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -19,6 +19,7 @@ import decimal import math import re +from typing import Union from google.cloud._helpers import UTC from google.cloud._helpers import _date_from_iso8601_date @@ -338,14 +339,15 @@ def _int_to_json(value): return value -def _float_to_json(value): +def _float_to_json(value) -> Union[None, str, float]: """Coerce 'value' to an JSON-compatible representation.""" if value is None: return None - elif math.isnan(value) or math.isinf(value): - return str(value) - else: - return float(value) + + if isinstance(value, str): + value = float(value) + + return str(value) if (math.isnan(value) or math.isinf(value)) else float(value) def _decimal_to_json(value): diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index af026ccbe..f8d00e67d 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -690,21 +690,45 @@ def _call_fut(self, value): def test_w_none(self): self.assertEqual(self._call_fut(None), None) + def test_w_non_numeric(self): + with self.assertRaises(TypeError): + self._call_fut(object()) + + def test_w_integer(self): + result = self._call_fut(123) + self.assertIsInstance(result, float) + self.assertEqual(result, 123.0) + def test_w_float(self): self.assertEqual(self._call_fut(1.23), 1.23) + def test_w_float_as_string(self): + self.assertEqual(self._call_fut("1.23"), 1.23) + def test_w_nan(self): result = self._call_fut(float("nan")) self.assertEqual(result.lower(), "nan") + def test_w_nan_as_string(self): + result = self._call_fut("NaN") + self.assertEqual(result.lower(), "nan") + def test_w_infinity(self): result = self._call_fut(float("inf")) self.assertEqual(result.lower(), "inf") + def test_w_infinity_as_string(self): + result = self._call_fut("inf") + self.assertEqual(result.lower(), "inf") + def test_w_negative_infinity(self): result = self._call_fut(float("-inf")) self.assertEqual(result.lower(), "-inf") + def test_w_negative_infinity_as_string(self): + result = self._call_fut("-inf") + self.assertEqual(result.lower(), "-inf") + class Test_decimal_to_json(unittest.TestCase): def _call_fut(self, value): From a505440e810d377dbb97e33412580089d67db9ba Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 28 Jul 2021 10:45:37 -0500 Subject: [PATCH 188/230] chore: release 2.23.1 (#825) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 966a8744a..be4eab769 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) + + +### Bug Fixes + +* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6)) + ## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 416bf20ed..0195d572c 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.0" +__version__ = "2.23.1" From c541c69355cd4c3f37576b4f22955a1f8ebc82f0 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 28 Jul 2021 13:03:13 -0500 Subject: [PATCH 189/230] chore: add second protection rule for v3 branch (#828) --- .github/sync-repo-settings.yaml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 2697f214c..cc69b2551 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -3,7 +3,16 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` -- pattern: '{master,v3}' +- pattern: master + requiredStatusCheckContexts: + - 'Kokoro' + - 'Kokoro snippets-3.8' + - 'cla/google' + - 'Samples - Lint' + - 'Samples - Python 3.6' + - 'Samples - Python 3.7' + - 'Samples - Python 3.8' +- pattern: v3 requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From 48e8a3535a13abe97ccc76e1fa42ca3a179ba496 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 28 Jul 2021 21:43:43 +0200 Subject: [PATCH 190/230] chore(deps): update dependency google-cloud-bigquery to v2.23.1 (#827) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0f9c3a2e3..6f6e670ab 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 81ef4df2f..dd36b5fe4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.0 +google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.0 google-auth-oauthlib==0.4.4 grpcio==1.39.0 From d8c25ac139d53d0e689ee77ba46560dc63b4d9fa Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Thu, 29 Jul 2021 03:59:03 -0500 Subject: [PATCH 191/230] test: retry getting rows after streaming them in `test_insert_rows_from_dataframe` (#832) --- tests/system/test_pandas.py | 40 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index ddf5eaf43..821b375e1 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -21,6 +21,7 @@ import io import operator +import google.api_core.retry import pkg_resources import pytest import pytz @@ -41,6 +42,10 @@ PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0") +class MissingDataError(Exception): + pass + + def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id): """Test that a DataFrame with dtypes that map well to BigQuery types can be uploaded without specifying a schema. @@ -666,19 +671,6 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): ) for errors in chunk_errors: assert not errors - - # Use query to fetch rows instead of listing directly from the table so - # that we get values from the streaming buffer. - rows = list( - bigquery_client.query( - "SELECT * FROM `{}.{}.{}`".format( - table.project, table.dataset_id, table.table_id - ) - ) - ) - - sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) - row_tuples = [r.values() for r in sorted_rows] expected = [ # Pandas often represents NULL values as NaN. Convert to None for # easier comparison. @@ -686,7 +678,27 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id): for data_row in dataframe.itertuples(index=False) ] - assert len(row_tuples) == len(expected) + # Use query to fetch rows instead of listing directly from the table so + # that we get values from the streaming buffer "within a few seconds". + # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability + @google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type(MissingDataError) + ) + def get_rows(): + rows = list( + bigquery_client.query( + "SELECT * FROM `{}.{}.{}`".format( + table.project, table.dataset_id, table.table_id + ) + ) + ) + if len(rows) != len(expected): + raise MissingDataError() + return rows + + rows = get_rows() + sorted_rows = sorted(rows, key=operator.attrgetter("int_col")) + row_tuples = [r.values() for r in sorted_rows] for row, expected_row in zip(row_tuples, expected): assert ( From 8149d9e3116e6f5340b9a15eb2c46deaaa24920b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:02 +0200 Subject: [PATCH 192/230] chore(deps): update dependency pyarrow to v5 (#834) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dd36b5fe4..73badd1f3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -8,5 +8,5 @@ matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' pandas==1.2.0; python_version >= '3.7' -pyarrow==4.0.1 +pyarrow==5.0.0 pytz==2021.1 From b9349adb2b54e26a45dbb69c10a948f5fc015a3c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 13:00:38 +0200 Subject: [PATCH 193/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.2 (#795) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 6f6e670ab..eca0275a5 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 73badd1f3..8f4ea0406 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.1 -google-cloud-bigquery-storage==2.6.0 +google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.4 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From 80e3a61c60419fb19b70b664c6415cd01ba82f5b Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Thu, 29 Jul 2021 16:42:35 +0200 Subject: [PATCH 194/230] deps: expand pyarrow pins to support 5.x releases (#833) --- setup.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 0ca19b576..e9deaf117 100644 --- a/setup.py +++ b/setup.py @@ -54,10 +54,10 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 5.0dev", + "pyarrow >= 1.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 5.0dev"], - "bignumeric_type": ["pyarrow >= 3.0.0, < 5.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ "opentelemetry-api >= 0.11b0", From 40ef77f376db0db9be23de1a3657be9571f5b48f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 29 Jul 2021 10:04:05 -0500 Subject: [PATCH 195/230] chore: release 2.23.2 (#835) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be4eab769..0c08e7910 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) + + +### Dependencies + +* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b)) + ### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0195d572c..0460e7bb9 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.1" +__version__ = "2.23.2" From 55687b89cc5ab04d1ff5ffeb31e6a4bf3b9eff79 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 19:57:59 +0200 Subject: [PATCH 196/230] chore(deps): update dependency google-auth-oauthlib to v0.4.5 (#839) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 8f4ea0406..d7a99a8bd 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,6 +1,6 @@ google-cloud-bigquery==2.23.1 google-cloud-bigquery-storage==2.6.2 -google-auth-oauthlib==0.4.4 +google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' ipython==7.17.0; python_version >= '3.7' From 85ce81cfd2e7199fa9016065c7329acb6079528c Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 29 Jul 2021 21:36:10 +0200 Subject: [PATCH 197/230] chore(deps): update dependency google-cloud-bigquery to v2.23.2 (#838) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index eca0275a5..5aa967b24 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index d7a99a8bd..4f2eaf90b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.1 +google-cloud-bigquery==2.23.2 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 20df24b70e8934196200d0335c7f5afbdd08ea37 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 Aug 2021 03:14:34 +0200 Subject: [PATCH 198/230] chore(deps): update dependency google-cloud-testutils to v1 (#845) --- samples/snippets/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 9e9d4e40f..b8dee50d0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==0.3.0 +google-cloud-testutils==1.0.0 pytest==6.2.4 mock==4.0.3 From 7016f69b6064be101a359bc093ea74fc2a305ac7 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 2 Aug 2021 19:20:21 -0600 Subject: [PATCH 199/230] chore: require CODEOWNER review and up to date branches (#846) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 --- .github/sync-repo-settings.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index cc69b2551..8634a3043 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -4,6 +4,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' @@ -13,6 +15,8 @@ branchProtectionRules: - 'Samples - Python 3.7' - 'Samples - Python 3.8' - pattern: v3 + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'Kokoro snippets-3.8' From cf0b0d862e01e9309407b2ac1a48f0bfe23d520d Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Thu, 5 Aug 2021 08:59:15 -0600 Subject: [PATCH 200/230] chore: add api-bigquery as a samples owner (#852) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae570eb01..76112476b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners From 30770fd0575fbd5aaa70c14196a4cc54627aecd2 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 6 Aug 2021 12:14:24 -0500 Subject: [PATCH 201/230] fix: increase default retry deadline to 10 minutes (#859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The backend API has a timeout of 4 minutes, so the default of 2 minutes was not allowing for any retries to happen in some cases. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #853 🦕 --- google/cloud/bigquery/retry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 2df4de08b..bab28aacb 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -47,7 +47,7 @@ def _should_retry(exc): return reason in _RETRYABLE_REASONS -DEFAULT_RETRY = retry.Retry(predicate=_should_retry) +DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0) """The default retry object. Any method with a ``retry`` parameter will be retried automatically, From e2cbcaa75a5da2bcd520d9116ead90b02d7326fd Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Fri, 6 Aug 2021 22:34:42 +0200 Subject: [PATCH 202/230] process: add yoshi-python to samples CODEOWNERS (#858) Closes #857. --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 76112476b..6763f258c 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -8,4 +8,4 @@ * @googleapis/api-bigquery @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners +/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python From 9694a4dd1544e06209d091d9a36d086ea794b3b0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 9 Aug 2021 12:24:20 -0500 Subject: [PATCH 203/230] chore: release 2.23.3 (#860) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Tim Swast --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c08e7910..856f1ecd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) + + +### Bug Fixes + +* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2)) + ### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0460e7bb9..df992a051 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.2" +__version__ = "2.23.3" From 9c6614f939604d3ac99b2945c802df277b629d1b Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Mon, 9 Aug 2021 20:10:11 +0200 Subject: [PATCH 204/230] chore(deps): update dependency google-cloud-bigquery to v2.23.3 (#866) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.23.2` -> `==2.23.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/compatibility-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.23.3/confidence-slim/2.23.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery ### [`v2.23.3`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2233-httpswwwgithubcomgoogleapispython-bigquerycomparev2232v2233-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[ ] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5aa967b24..d55d0f254 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4f2eaf90b..69f537de4 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.2 +google-cloud-bigquery==2.23.3 google-cloud-bigquery-storage==2.6.2 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 7f7b1a808d50558772a0deb534ca654da65d629e Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Tue, 10 Aug 2021 19:21:41 +0200 Subject: [PATCH 205/230] feat: add support for transaction statistics (#849) * feat: add support for transaction statistics * Hoist transaction_info into base job class * Add versionadded directive to new property and class * Include new class in docs reference --- docs/reference.rst | 1 + google/cloud/bigquery/__init__.py | 2 ++ google/cloud/bigquery/job/__init__.py | 2 ++ google/cloud/bigquery/job/base.py | 29 +++++++++++++++++++++++ tests/system/test_client.py | 34 +++++++++++++++++++++++++++ tests/unit/job/helpers.py | 1 + tests/unit/job/test_base.py | 14 +++++++++++ tests/unit/job/test_query.py | 29 +++++++++++++++++++++++ 8 files changed, 112 insertions(+) diff --git a/docs/reference.rst b/docs/reference.rst index 8a5bff9a4..5ac596370 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -68,6 +68,7 @@ Job-Related Types job.SourceFormat job.WriteDisposition job.SchemaUpdateOption + job.TransactionInfo Dataset diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 222aadcc9..a7a0da3dd 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -70,6 +70,7 @@ from google.cloud.bigquery.job import ScriptOptions from google.cloud.bigquery.job import SourceFormat from google.cloud.bigquery.job import UnknownJob +from google.cloud.bigquery.job import TransactionInfo from google.cloud.bigquery.job import WriteDisposition from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference @@ -149,6 +150,7 @@ "GoogleSheetsOptions", "ParquetOptions", "ScriptOptions", + "TransactionInfo", "DEFAULT_RETRY", # Enum Constants "enums", diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index 4c16d0e20..f51311b0b 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -22,6 +22,7 @@ from google.cloud.bigquery.job.base import ReservationUsage from google.cloud.bigquery.job.base import ScriptStatistics from google.cloud.bigquery.job.base import ScriptStackFrame +from google.cloud.bigquery.job.base import TransactionInfo from google.cloud.bigquery.job.base import UnknownJob from google.cloud.bigquery.job.copy_ import CopyJob from google.cloud.bigquery.job.copy_ import CopyJobConfig @@ -81,5 +82,6 @@ "QueryPriority", "SchemaUpdateOption", "SourceFormat", + "TransactionInfo", "WriteDisposition", ] diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 20ad81c0b..e5fc592a6 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -19,6 +19,7 @@ import http import threading import typing +from typing import Dict, Optional from google.api_core import exceptions import google.api_core.future.polling @@ -88,6 +89,22 @@ def _error_result_to_exception(error_result): ) +class TransactionInfo(typing.NamedTuple): + """[Alpha] Information of a multi-statement transaction. + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo + + .. versionadded:: 2.24.0 + """ + + transaction_id: str + """Output only. ID of the transaction.""" + + @classmethod + def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo": + return cls(transaction_info["transactionId"]) + + class _JobReference(object): """A reference to a job. @@ -336,6 +353,18 @@ def reservation_usage(self): for usage in usage_stats_raw ] + @property + def transaction_info(self) -> Optional[TransactionInfo]: + """Information of the multi-statement transaction if this job is part of one. + + .. versionadded:: 2.24.0 + """ + info = self._properties.get("statistics", {}).get("transactionInfo") + if info is None: + return None + else: + return TransactionInfo.from_api_repr(info) + @property def error_result(self): """Error information about the job as a whole. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index baa2b6ad8..f540611a6 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1557,6 +1557,40 @@ def test_dml_statistics(self): assert query_job.dml_stats.updated_row_count == 0 assert query_job.dml_stats.deleted_row_count == 3 + def test_transaction_info(self): + table_schema = ( + bigquery.SchemaField("foo", "STRING"), + bigquery.SchemaField("bar", "INTEGER"), + ) + + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics" + + # Create the table before loading so that the column order is deterministic. + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # Insert a few rows and check the stats. + sql = f""" + BEGIN TRANSACTION; + INSERT INTO `{table_id}` + VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4); + + UPDATE `{table_id}` + SET bar = bar + 1 + WHERE bar > 2; + COMMIT TRANSACTION; + """ + query_job = Config.CLIENT.query(sql) + query_job.result() + + # Transaction ID set by the server should be accessible + assert query_job.transaction_info is not None + assert query_job.transaction_info.transaction_id != "" + def test_dbapi_w_standard_sql_types(self): for sql, expected in helpers.STANDARD_SQL_EXAMPLES: Config.CURSOR.execute(sql) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index ea071c5ac..c792214e7 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job): self.assertIsNone(job.created) self.assertIsNone(job.started) self.assertIsNone(job.ended) + self.assertIsNone(job.transaction_info) # derived from resource['status'] self.assertIsNone(job.error_result) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 405ad6ee5..0ac1d05b5 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -227,6 +227,20 @@ def test_script_statistics(self): self.assertEqual(stack_frame.end_column, 14) self.assertEqual(stack_frame.text, "QUERY TEXT") + def test_transaction_info(self): + from google.cloud.bigquery.job.base import TransactionInfo + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + assert job.transaction_info is None + + statistics = job._properties["statistics"] = {} + assert job.transaction_info is None + + statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"} + assert isinstance(job.transaction_info, TransactionInfo) + assert job.transaction_info.transaction_id == "123-abc-xyz" + def test_num_child_jobs(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 482f7f3af..d41370520 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -128,6 +128,18 @@ def _verify_dml_stats_resource_properties(self, job, resource): else: assert job.dml_stats is None + def _verify_transaction_info_resource_properties(self, job, resource): + resource_stats = resource.get("statistics", {}) + + if "transactionInfo" in resource_stats: + resource_transaction_info = resource_stats["transactionInfo"] + job_transaction_info = job.transaction_info + assert job_transaction_info.transaction_id == resource_transaction_info.get( + "transactionId" + ) + else: + assert job.transaction_info is None + def _verify_configuration_properties(self, job, configuration): if "dryRun" in configuration: self.assertEqual(job.dry_run, configuration["dryRun"]) @@ -137,6 +149,7 @@ def _verify_configuration_properties(self, job, configuration): def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) self._verify_dml_stats_resource_properties(job, resource) + self._verify_transaction_info_resource_properties(job, resource) configuration = resource.get("configuration", {}) self._verify_configuration_properties(job, configuration) @@ -325,6 +338,22 @@ def test_from_api_repr_with_dml_stats(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_with_transaction_info(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = { + "id": self.JOB_ID, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "configuration": {"query": {"query": self.QUERY}}, + "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}}, + } + klass = self._get_target_class() + + job = klass.from_api_repr(RESOURCE, client=client) + + self.assertIs(job._client, client) + self._verifyResourceProperties(job, RESOURCE) + def test_from_api_repr_w_properties(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SchemaUpdateOption From 443b8ab28c19bdd0bd3cad39db33cb7bc8ad8741 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 10 Aug 2021 20:02:10 +0200 Subject: [PATCH 206/230] chore(deps): update dependency google-cloud-bigquery-storage to v2.6.3 (#863) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery-storage](https://togithub.com/googleapis/python-bigquery-storage) | `==2.6.2` -> `==2.6.3` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/compatibility-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery-storage/2.6.3/confidence-slim/2.6.2)](https://docs.renovatebot.com/merge-confidence/) | *** ### Release Notes
googleapis/python-bigquery-storage ### [`v2.6.3`](https://togithub.com/googleapis/python-bigquery-storage/blob/master/CHANGELOG.md#​263-httpswwwgithubcomgoogleapispython-bigquery-storagecomparev262v263-2021-08-06) [Compare Source](https://togithub.com/googleapis/python-bigquery-storage/compare/v2.6.2...v2.6.3)
*** ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. *** * \[x] If you want to rebase/retry this PR, check this box. *** This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d55d0f254..d3e599101 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 69f537de4..1545ed96e 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,5 +1,5 @@ google-cloud-bigquery==2.23.3 -google-cloud-bigquery-storage==2.6.2 +google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 ipython==7.16.1; python_version < '3.7' From aee814c6a48758325609b6fdfc35e2378461786e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 12:29:39 +0200 Subject: [PATCH 207/230] chore: fix INSTALL_LIBRARY_FROM_SOURCE in noxfile.py (#869) Source-Link: https://github.com/googleapis/synthtool/commit/6252f2cd074c38f37b44abe5e96d128733eb1b61 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- samples/geography/noxfile.py | 5 ++++- samples/snippets/noxfile.py | 5 ++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9ee60f7e4..649877dc4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 9fc7f1782..7dbea0914 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -94,7 +94,10 @@ def get_pytest_env_vars() -> Dict[str, str]: TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ( + "True", + "true", +) # # Style Checks # From c1a3d4435739a21d25aa154145e36d3a7c42eeb6 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 11 Aug 2021 16:28:43 +0200 Subject: [PATCH 208/230] feat: make the same `Table*` instances equal to each other (#867) * feat: make the same Table instances equal to each other * Table equality should ignore metadata differences * Compare instances through tableReference property * Make Table instances hashable * Make Table* classes interchangeable If these classes reference the same table, they are now considered equal. --- google/cloud/bigquery/table.py | 42 +++++- tests/unit/test_table.py | 225 ++++++++++++++++++++++++++++++--- 2 files changed, 244 insertions(+), 23 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index daade1ac6..d23885ebf 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -255,9 +255,16 @@ def _key(self): return (self._project, self._dataset_id, self._table_id) def __eq__(self, other): - if not isinstance(other, TableReference): + if isinstance(other, (Table, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + elif isinstance(other, TableReference): + return self._key() == other._key() + else: return NotImplemented - return self._key() == other._key() def __ne__(self, other): return not self == other @@ -1011,6 +1018,24 @@ def _build_resource(self, filter_fields): """Generate a resource for ``update``.""" return _helpers._build_resource_from_properties(self, filter_fields) + def __eq__(self, other): + if isinstance(other, Table): + return ( + self._properties["tableReference"] + == other._properties["tableReference"] + ) + elif isinstance(other, (TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def __repr__(self): return "Table({})".format(repr(self.reference)) @@ -1229,6 +1254,19 @@ def to_api_repr(self) -> dict: """ return copy.deepcopy(self._properties) + def __eq__(self, other): + if isinstance(other, (Table, TableReference, TableListItem)): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.table_id == other.table_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.table_id)) + def _row_from_mapping(mapping, schema): """Convert a mapping to a row tuple using the schema. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4b1fd833b..a5badc66c 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -115,8 +115,6 @@ def _make_one(self, *args, **kw): return self._get_target_class()(*args, **kw) def test_ctor_defaults(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -124,8 +122,6 @@ def test_ctor_defaults(self): self.assertEqual(table_ref.table_id, "table_1") def test_to_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table_ref = self._make_one(dataset_ref, "table_1") @@ -137,7 +133,6 @@ def test_to_api_repr(self): ) def test_from_api_repr(self): - from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.table import TableReference dataset_ref = DatasetReference("project_1", "dataset_1") @@ -204,8 +199,6 @@ def test_from_string_ignores_default_project(self): self.assertEqual(got.table_id, "string_table") def test___eq___wrong_type(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset_ref = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset_ref, "table_1") other = object() @@ -213,8 +206,6 @@ def test___eq___wrong_type(self): self.assertEqual(table, mock.ANY) def test___eq___project_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_2", "dataset_1") table = self._make_one(dataset, "table_1") @@ -222,8 +213,6 @@ def test___eq___project_mismatch(self): self.assertNotEqual(table, other) def test___eq___dataset_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") other_dataset = DatasetReference("project_1", "dataset_2") table = self._make_one(dataset, "table_1") @@ -231,24 +220,18 @@ def test___eq___dataset_mismatch(self): self.assertNotEqual(table, other) def test___eq___table_mismatch(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_2") self.assertNotEqual(table, other) def test___eq___equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table = self._make_one(dataset, "table_1") other = self._make_one(dataset, "table_1") self.assertEqual(table, other) def test___hash__set_equality(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -257,8 +240,6 @@ def test___hash__set_equality(self): self.assertEqual(set_one, set_two) def test___hash__not_equals(self): - from google.cloud.bigquery.dataset import DatasetReference - dataset = DatasetReference("project_1", "dataset_1") table1 = self._make_one(dataset, "table1") table2 = self._make_one(dataset, "table2") @@ -294,8 +275,6 @@ def _get_target_class(): return Table def _make_one(self, *args, **kw): - from google.cloud.bigquery.dataset import DatasetReference - if len(args) == 0: dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -581,6 +560,68 @@ def test_num_rows_getter(self): with self.assertRaises(ValueError): getattr(table, "num_rows") + def test__eq__wrong_type(self): + table = self._make_one("project_foo.dataset_bar.table_baz") + + class TableWannabe: + pass + + not_a_table = TableWannabe() + not_a_table._properties = table._properties + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table_basic(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + assert table_1 == table_2 + + def test__eq__same_table_multiple_properties(self): + from google.cloud.bigquery import SchemaField + + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.require_partition_filter = True + table_1.labels = {"first": "one", "second": "two"} + + table_1.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.require_partition_filter = True + table_2.labels = {"first": "one", "second": "two"} + table_2.schema = [ + SchemaField("name", "STRING", "REQUIRED"), + SchemaField("age", "INTEGER", "NULLABLE"), + ] + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is table baz" + + table_2 = self._make_one("project_foo.dataset_bar.table_baz") + table_2.description = "This is also table baz" + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_2 = self._make_one("project_foo.dataset_bar.table_baz_2") + + assert table_1 != table_2 + + def test_hashable(self): + table_1 = self._make_one("project_foo.dataset_bar.table_baz") + table_1.description = "This is a table" + + table_1b = self._make_one("project_foo.dataset_bar.table_baz") + table_1b.description = "Metadata is irrelevant for hashes" + + assert hash(table_1) == hash(table_1b) + def test_schema_setter_non_sequence(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1543,6 +1584,148 @@ def test_to_api_repr(self): table = self._make_one(resource) self.assertEqual(table.to_api_repr(), resource) + def test__eq__wrong_type(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table = self._make_one(resource) + + class FakeTableListItem: + project = "project_foo" + dataset_id = "dataset_bar" + table_id = "table_baz" + + not_a_table = FakeTableListItem() + + assert table != not_a_table # Can't fake it. + + def test__eq__same_table(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource) + table_2 = self._make_one(resource) + + assert table_1 == table_2 + + def test__eq__same_table_property_different(self): + table_ref_resource = { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + + resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"} + table_1 = self._make_one(resource_1) + + resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"} + table_2 = self._make_one(resource_2) + + assert table_1 == table_2 # Still equal, only table reference is important. + + def test__eq__different_table(self): + resource_1 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_1 = self._make_one(resource_1) + + resource_2 = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_quux", + } + } + table_2 = self._make_one(resource_2) + + assert table_1 != table_2 + + def test_hashable(self): + resource = { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + table_item = self._make_one(resource) + table_item_2 = self._make_one(resource) + + assert hash(table_item) == hash(table_item_2) + + +class TestTableClassesInterchangeability: + @staticmethod + def _make_table(*args, **kwargs): + from google.cloud.bigquery.table import Table + + return Table(*args, **kwargs) + + @staticmethod + def _make_table_ref(*args, **kwargs): + from google.cloud.bigquery.table import TableReference + + return TableReference(*args, **kwargs) + + @staticmethod + def _make_table_list_item(*args, **kwargs): + from google.cloud.bigquery.table import TableListItem + + return TableListItem(*args, **kwargs) + + def test_table_eq_table_ref(self): + + table = self._make_table("project_foo.dataset_bar.table_baz") + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + + assert table == table_ref + assert table_ref == table + + def test_table_eq_table_list_item(self): + table = self._make_table("project_foo.dataset_bar.table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table == table_list_item + assert table_list_item == table + + def test_table_ref_eq_table_list_item(self): + + dataset_ref = DatasetReference("project_foo", "dataset_bar") + table_ref = self._make_table_ref(dataset_ref, "table_baz") + table_list_item = self._make_table_list_item( + { + "tableReference": { + "projectId": "project_foo", + "datasetId": "dataset_bar", + "tableId": "table_baz", + } + } + ) + + assert table_ref == table_list_item + assert table_list_item == table_ref + class TestSnapshotDefinition: @staticmethod From 93d15e2e5405c2cc6d158c4e5737361344193dbc Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 11 Aug 2021 10:12:23 -0500 Subject: [PATCH 209/230] feat: support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor (#850) Follow-up to https://github.com/googleapis/python-bigquery/pull/840/files#r679880582 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) --- docs/conf.py | 1 + docs/reference.rst | 1 + google/cloud/bigquery/enums.py | 24 +++++++++---------- google/cloud/bigquery/query.py | 42 ++++++++++++++++++++++++---------- tests/unit/test_query.py | 13 +++++++++++ 5 files changed, 57 insertions(+), 24 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/docs/reference.rst b/docs/reference.rst index 5ac596370..d8738e67b 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -138,6 +138,7 @@ Query query.ArrayQueryParameter query.ScalarQueryParameter + query.ScalarQueryParameterType query.StructQueryParameter query.UDFResource diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 0da01d665..d67cebd4c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -259,23 +259,23 @@ class SqlTypeNames(str, enum.Enum): class SqlParameterScalarTypes: """Supported scalar SQL query parameter types as type objects.""" - STRING = ScalarQueryParameterType("STRING") + BOOL = ScalarQueryParameterType("BOOL") + BOOLEAN = ScalarQueryParameterType("BOOL") + BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") + BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") BYTES = ScalarQueryParameterType("BYTES") - INTEGER = ScalarQueryParameterType("INT64") - INT64 = ScalarQueryParameterType("INT64") + DATE = ScalarQueryParameterType("DATE") + DATETIME = ScalarQueryParameterType("DATETIME") + DECIMAL = ScalarQueryParameterType("NUMERIC") FLOAT = ScalarQueryParameterType("FLOAT64") FLOAT64 = ScalarQueryParameterType("FLOAT64") - NUMERIC = ScalarQueryParameterType("NUMERIC") - BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC") - DECIMAL = ScalarQueryParameterType("NUMERIC") - BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC") - BOOLEAN = ScalarQueryParameterType("BOOL") - BOOL = ScalarQueryParameterType("BOOL") GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY") - TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") - DATE = ScalarQueryParameterType("DATE") + INT64 = ScalarQueryParameterType("INT64") + INTEGER = ScalarQueryParameterType("INT64") + NUMERIC = ScalarQueryParameterType("NUMERIC") + STRING = ScalarQueryParameterType("STRING") TIME = ScalarQueryParameterType("TIME") - DATETIME = ScalarQueryParameterType("DATETIME") + TIMESTAMP = ScalarQueryParameterType("TIMESTAMP") class WriteDisposition(object): diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index d1e9a45a5..1f449f189 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -16,7 +16,9 @@ from collections import OrderedDict import copy -from typing import Union +import datetime +import decimal +from typing import Optional, Union from google.cloud.bigquery.table import _parse_schema_resource from google.cloud.bigquery._helpers import _rows_from_json @@ -24,6 +26,11 @@ from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM +_SCALAR_VALUE_TYPE = Optional[ + Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date] +] + + class UDFResource(object): """Describe a single user-defined function (UDF) resource. @@ -325,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter): """Named / positional query parameters for scalar values. Args: - name (Optional[str]): + name: Parameter name, used via ``@foo`` syntax. If None, the parameter can only be addressed via position (``?``). - type_ (str): - Name of parameter type. One of 'STRING', 'INT64', - 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or - 'DATE'. + type_: + Name of parameter type. See + :class:`google.cloud.bigquery.enums.SqlTypeNames` and + :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for + supported types. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. """ - def __init__(self, name, type_, value): + def __init__( + self, + name: Optional[str], + type_: Optional[Union[str, ScalarQueryParameterType]], + value: _SCALAR_VALUE_TYPE, + ): self.name = name - self.type_ = type_ + if isinstance(type_, ScalarQueryParameterType): + self.type_ = type_._type + else: + self.type_ = type_ self.value = value @classmethod - def positional(cls, type_: str, value) -> "ScalarQueryParameter": + def positional( + cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE + ) -> "ScalarQueryParameter": """Factory for positional paramater. Args: - type_ (str): + type_: Name of parameter type. One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or 'DATE'. - value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]): + value: The scalar parameter value. Returns: diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 9483fe8dd..69a6772e5 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -13,6 +13,7 @@ # limitations under the License. import datetime +import decimal import unittest import mock @@ -430,6 +431,18 @@ def test_positional(self): self.assertEqual(param.type_, "INT64") self.assertEqual(param.value, 123) + def test_ctor_w_scalar_query_parameter_type(self): + from google.cloud.bigquery import enums + + param = self._make_one( + name="foo", + type_=enums.SqlParameterScalarTypes.BIGNUMERIC, + value=decimal.Decimal("123.456"), + ) + self.assertEqual(param.name, "foo") + self.assertEqual(param.type_, "BIGNUMERIC") + self.assertEqual(param.value, decimal.Decimal("123.456")) + def test_from_api_repr_w_name(self): RESOURCE = { "name": "foo", From 519d99c20e7d1101f76981f3de036fdf3c7a4ecc Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Wed, 11 Aug 2021 14:24:28 -0400 Subject: [PATCH 210/230] feat: retry failed query jobs in `result()` (#837) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #539 🦕 Previously, we only retried failed API requests. Now, we retry failed jobs (according to the predicate of the `Retry` object passed to `job.result()`). --- google/cloud/bigquery/client.py | 110 +++++++++---- google/cloud/bigquery/job/query.py | 84 ++++++++-- google/cloud/bigquery/retry.py | 20 +++ tests/system/test_job_retry.py | 72 +++++++++ tests/unit/test_job_retry.py | 247 +++++++++++++++++++++++++++++ tests/unit/test_retry.py | 24 +++ 6 files changed, 518 insertions(+), 39 deletions(-) create mode 100644 tests/system/test_job_retry.py create mode 100644 tests/unit/test_job_retry.py diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 742ecac2e..8142c59cd 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -86,7 +86,7 @@ from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -3163,6 +3163,7 @@ def query( project: str = None, retry: retries.Retry = DEFAULT_RETRY, timeout: float = None, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3192,21 +3193,52 @@ def query( Project ID of the project of where to run the job. Defaults to the client's project. retry (Optional[google.api_core.retry.Retry]): - How to retry the RPC. + How to retry the RPC. This only applies to making RPC + calls. It isn't used to retry failed jobs. This has + a reasonable default that should only be overridden + with care. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` is + provided, then the job returned by the query will not + be retryable, and an exception will be raised if a + non-``None`` (and non-default) value for ``job_retry`` + is also provided. + + Note that errors aren't detected until ``result()`` is + called on the job returned. The ``job_retry`` + specified here becomes the default ``job_retry`` for + ``result()``, where it can also be specified. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. Raises: TypeError: - If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` - class. + If ``job_config`` is not an instance of + :class:`~google.cloud.bigquery.job.QueryJobConfig` + class, or if both ``job_id`` and non-``None`` non-default + ``job_retry`` are provided. """ job_id_given = job_id is not None - job_id = _make_job_id(job_id, job_id_prefix) + if ( + job_id_given + and job_retry is not None + and job_retry is not DEFAULT_JOB_RETRY + ): + raise TypeError( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ) + + job_id_save = job_id if project is None: project = self.project @@ -3214,8 +3246,6 @@ def query( if location is None: location = self.location - job_config = copy.deepcopy(job_config) - if self._default_query_job_config: if job_config: _verify_job_config_type( @@ -3225,6 +3255,8 @@ def query( # that is in the default, # should be filled in with the default # the incoming therefore has precedence + # + # Note that _fill_from_default doesn't mutate the receiver job_config = job_config._fill_from_default( self._default_query_job_config ) @@ -3233,34 +3265,54 @@ def query( self._default_query_job_config, google.cloud.bigquery.job.QueryJobConfig, ) - job_config = copy.deepcopy(self._default_query_job_config) + job_config = self._default_query_job_config - job_ref = job._JobReference(job_id, project=project, location=location) - query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) + # Note that we haven't modified the original job_config (or + # _default_query_job_config) up to this point. + job_config_save = job_config - try: - query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.Conflict as create_exc: - # The thought is if someone is providing their own job IDs and they get - # their job ID generation wrong, this could end up returning results for - # the wrong query. We thus only try to recover if job ID was not given. - if job_id_given: - raise create_exc + def do_query(): + # Make a copy now, so that original doesn't get changed by the process + # below and to facilitate retry + job_config = copy.deepcopy(job_config_save) + + job_id = _make_job_id(job_id_save, job_id_prefix) + job_ref = job._JobReference(job_id, project=project, location=location) + query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) try: - query_job = self.get_job( - job_id, - project=project, - location=location, - retry=retry, - timeout=timeout, - ) - except core_exceptions.GoogleAPIError: # (includes RetryError) - raise create_exc + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job else: return query_job - else: - return query_job + + future = do_query() + # The future might be in a failed state now, but if it's + # unrecoverable, we'll find out when we ask for it's result, at which + # point, we may retry. + if not job_id_given: + future._retry_do_query = do_query # in case we have to retry later + future._job_retry = job_retry + + return future def insert_rows( self, diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 2cb7ee28e..3ab47b0f9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -36,7 +36,7 @@ from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import StructQueryParameter from google.cloud.bigquery.query import UDFResource -from google.cloud.bigquery.retry import DEFAULT_RETRY +from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning @@ -1260,6 +1260,7 @@ def result( retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None, start_index: int = None, + job_retry: "retries.Retry" = DEFAULT_JOB_RETRY, ) -> Union["RowIterator", _EmptyRowIterator]: """Start the job and wait for it to complete and get the result. @@ -1270,9 +1271,13 @@ def result( max_results (Optional[int]): The maximum total number of rows from this request. retry (Optional[google.api_core.retry.Retry]): - How to retry the call that retrieves rows. If the job state is - ``DONE``, retrying is aborted early even if the results are not - available, as this will not change anymore. + How to retry the call that retrieves rows. This only + applies to making RPC calls. It isn't used to retry + failed jobs. This has a reasonable default that + should only be overridden with care. If the job state + is ``DONE``, retrying is aborted early even if the + results are not available, as this will not change + anymore. timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -1280,6 +1285,16 @@ def result( applies to each individual request. start_index (Optional[int]): The zero-based index of the starting row to read. + job_retry (Optional[google.api_core.retry.Retry]): + How to retry failed jobs. The default retries + rate-limit-exceeded errors. Passing ``None`` disables + job retry. + + Not all jobs can be retried. If ``job_id`` was + provided to the query that created this job, then the + job returned by the query will not be retryable, and + an exception will be raised if non-``None`` + non-default ``job_retry`` is also provided. Returns: google.cloud.bigquery.table.RowIterator: @@ -1295,17 +1310,66 @@ def result( Raises: google.cloud.exceptions.GoogleAPICallError: - If the job failed. + If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. + TypeError: + If Non-``None`` and non-default ``job_retry`` is + provided and the job is not retryable. """ try: - super(QueryJob, self).result(retry=retry, timeout=timeout) + retry_do_query = getattr(self, "_retry_do_query", None) + if retry_do_query is not None: + if job_retry is DEFAULT_JOB_RETRY: + job_retry = self._job_retry + else: + if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY: + raise TypeError( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ) + + first = True + + def do_get_result(): + nonlocal first + + if first: + first = False + else: + # Note that we won't get here if retry_do_query is + # None, because we won't use a retry. + + # The orinal job is failed. Create a new one. + job = retry_do_query() + + # If it's already failed, we might as well stop: + if job.done() and job.exception() is not None: + raise job.exception() + + # Become the new job: + self.__dict__.clear() + self.__dict__.update(job.__dict__) + + # This shouldn't be necessary, because once we have a good + # job, it should stay good,and we shouldn't have to retry. + # But let's be paranoid. :) + self._retry_do_query = retry_do_query + self._job_retry = job_retry + + super(QueryJob, self).result(retry=retry, timeout=timeout) + + # Since the job could already be "done" (e.g. got a finished job + # via client.get_job), the superclass call to done() might not + # set the self._query_results cache. + self._reload_query_results(retry=retry, timeout=timeout) + + if retry_do_query is not None and job_retry is not None: + do_get_result = job_retry(do_get_result) + + do_get_result() - # Since the job could already be "done" (e.g. got a finished job - # via client.get_job), the superclass call to done() might not - # set the self._query_results cache. - self._reload_query_results(retry=retry, timeout=timeout) except exceptions.GoogleAPICallError as exc: exc.message += self._format_for_exception(self.query, self.job_id) exc.query_job = self diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index bab28aacb..e9286055c 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -32,6 +32,8 @@ auth_exceptions.TransportError, ) +_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds + def _should_retry(exc): """Predicate for determining when to retry. @@ -56,3 +58,21 @@ def _should_retry(exc): on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +job_retry_reasons = "rateLimitExceeded", "backendError" + + +def _job_should_retry(exc): + if not hasattr(exc, "errors") or len(exc.errors) == 0: + return False + + reason = exc.errors[0]["reason"] + return reason in job_retry_reasons + + +DEFAULT_JOB_RETRY = retry.Retry( + predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE +) +""" +The default job retry object. +""" diff --git a/tests/system/test_job_retry.py b/tests/system/test_job_retry.py new file mode 100644 index 000000000..520545493 --- /dev/null +++ b/tests/system/test_job_retry.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import contextlib +import threading +import time + +import google.api_core.exceptions +import google.cloud.bigquery +import pytest + + +def thread(func): + thread = threading.Thread(target=func, daemon=True) + thread.start() + return thread + + +@pytest.mark.parametrize("job_retry_on_query", [True, False]) +def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query): + """ + Test job_retry + + See: https://github.com/googleapis/python-bigquery/issues/539 + """ + from google.api_core import exceptions + from google.api_core.retry import if_exception_type, Retry + + table_name = f"{dataset_id}.t539" + + # Without a custom retry, we fail: + with pytest.raises(google.api_core.exceptions.NotFound): + bigquery_client.query(f"select count(*) from {table_name}").result() + + retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound)) + + job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {} + job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry) + job_id = job.job_id + + # We can already know that the job failed, but we're not supposed + # to find out until we call result, which is where retry happend + assert job.done() + assert job.exception() is not None + + @thread + def create_table(): + time.sleep(1) # Give the first retry attempt time to fail. + with contextlib.closing(google.cloud.bigquery.Client()) as client: + client.query(f"create table {table_name} (id int64)").result() + + job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound) + [[count]] = list(job.result(**job_retry)) + assert count == 0 + + # The job was retried, and thus got a new job id + assert job.job_id != job_id + + # Make sure we don't leave a thread behind: + create_table.join() + bigquery_client.query(f"drop table {table_name}").result() diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py new file mode 100644 index 000000000..b2095d2f2 --- /dev/null +++ b/tests/unit/test_job_retry.py @@ -0,0 +1,247 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import re + +import mock +import pytest + +import google.api_core.exceptions +import google.api_core.retry + +from .helpers import make_connection + + +# With job_retry_on_query, we're testing 4 scenarios: +# - No `job_retry` passed, retry on default rateLimitExceeded. +# - Pass NotFound retry to `query`. +# - Pass NotFound retry to `result`. +# - Pass BadRequest retry to query, with the value passed to `result` overriding. +@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"]) +@mock.patch("time.sleep") +def test_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + + retry_notfound = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.NotFound + ) + ) + retry_badrequest = google.api_core.retry.Retry( + predicate=google.api_core.retry.if_exception_type( + google.api_core.exceptions.BadRequest + ) + ) + + if job_retry_on_query is None: + reason = "rateLimitExceeded" + else: + reason = "notFound" + + err = dict(reason=reason) + responses = [ + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=retry_notfound) + elif job_retry_on_query == "Both": + # This will be overridden in `result` + job_retry = dict(job_retry=retry_badrequest) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = ( + dict(job_retry=retry_notfound) + if job_retry_on_query in ("Result", "Both") + else {} + ) + result = job.result(**job_retry) + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # The job adjusts it's job id based on the id of the last attempt. + assert job.job_id != orig_job_id + assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"] + + # We had to sleep three times + assert len(sleep.mock_calls) == 3 + + # Sleeps are random, however they're more than 0 + assert min(c[1][0] for c in sleep.mock_calls) > 0 + + # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial + # The default multiplier is 2 + assert max(c[1][0] for c in sleep.mock_calls) <= 8 + + # We can ask for the result again: + responses = [ + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + orig_job_id = job.job_id + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + + # We wouldn't (and didn't) fail, because we're dealing with a successful job. + # So the job id hasn't changed. + assert job.job_id == orig_job_id + + +# With job_retry_on_query, we're testing 4 scenarios: +# - Pass None retry to `query`. +# - Pass None retry to `result`. +@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) +@mock.patch("time.sleep") +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): + """ + Test retry of job failures, as opposed to API-invocation failures. + """ + err = dict(reason="rateLimitExceeded") + responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3 + + def api_request(method, path, query_params=None, data=None, **kw): + response = responses.pop(0) + response["jobReference"] = data["jobReference"] + return response + + conn = client._connection = make_connection() + conn.api_request.side_effect = api_request + + if job_retry_on_query == "Query": + job_retry = dict(job_retry=None) + else: + job_retry = {} + job = client.query("select 1", **job_retry) + + orig_job_id = job.job_id + job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {} + with pytest.raises(google.api_core.exceptions.Forbidden): + job.result(**job_retry) + + assert job.job_id == orig_job_id + assert len(sleep.mock_calls) == 0 + + +@mock.patch("google.api_core.retry.datetime_helpers") +@mock.patch("time.sleep") +def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client): + """ + If at first you don't succeed, maybe you will later. :) + """ + conn = client._connection = make_connection() + + datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2) + + err = dict(reason="rateLimitExceeded") + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + if calls: + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = dict(status=dict(state="DONE", errors=[err], errorResult=err)) + response["jobReference"] = data["jobReference"] + return response + + conn.api_request.side_effect = api_request + + job = client.query("select 1") + orig_job_id = job.job_id + + with pytest.raises(google.api_core.exceptions.RetryError): + job.result() + + # We never got a successful job, so the job id never changed: + assert job.job_id == orig_job_id + + # We failed because we couldn't succeed after 120 seconds. + # But we can try again: + err2 = dict(reason="backendError") # We also retry on this + responses = [ + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE", errors=[err], errorResult=err)), + dict(status=dict(state="DONE", errors=[err2], errorResult=err2)), + dict(status=dict(state="DONE")), + dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"), + ] + + def api_request(method, path, query_params=None, data=None, **kw): + calls = sleep.mock_calls + datetime_helpers.utcnow.return_value += datetime.timedelta( + seconds=calls[-1][1][0] + ) + response = responses.pop(0) + if data: + response["jobReference"] = data["jobReference"] + else: + response["jobReference"] = dict( + jobId=path.split("/")[-1], projectId="PROJECT" + ) + return response + + conn.api_request.side_effect = api_request + result = job.result() + assert result.total_rows == 1 + assert not responses # We made all the calls we expected to. + assert job.job_id != orig_job_id + + +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but the returned job is" + " not retryable, because a custom `job_id` was" + " provided." + ), + ): + client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) + + +def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): + client._connection = make_connection({}) + job = client.query("select 42", job_id=42) + with pytest.raises( + TypeError, + match=re.escape( + "`job_retry` was provided, but this job is" + " not retryable, because a custom `job_id` was" + " provided to the query that created this job." + ), + ): + job.result(job_retry=google.api_core.retry.Retry()) diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index 6fb7f93fd..c7c25e036 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -86,3 +86,27 @@ def test_w_unstructured_bad_gateway(self): exc = BadGateway("testing") self.assertTrue(self._call_fut(exc)) + + +def test_DEFAULT_JOB_RETRY_predicate(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + from google.api_core.exceptions import ClientError + + assert not DEFAULT_JOB_RETRY._predicate(TypeError()) + assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail")) + assert not DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="idk")]) + ) + + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="rateLimitExceeded")]) + ) + assert DEFAULT_JOB_RETRY._predicate( + ClientError("fail", errors=[dict(reason="backendError")]) + ) + + +def test_DEFAULT_JOB_RETRY_deadline(): + from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY + + assert DEFAULT_JOB_RETRY._deadline == 600 From ad9c8026f0e667f13dd754279f9dc40d06f4fa78 Mon Sep 17 00:00:00 2001 From: Grimmer Date: Thu, 12 Aug 2021 03:23:48 +0800 Subject: [PATCH 211/230] fix: make unicode characters working well in load_table_from_json (#865) Co-authored-by: Tim Swast Co-authored-by: Tres Seaver --- google/cloud/bigquery/client.py | 2 +- tests/unit/test_client.py | 36 +++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8142c59cd..cbac82548 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2762,7 +2762,7 @@ def load_table_from_json( destination = _table_arg_to_table_ref(destination, default_project=self.project) - data_str = "\n".join(json.dumps(item) for item in json_rows) + data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) encoded_str = data_str.encode() data_file = io.BytesIO(encoded_str) return self.load_table_from_file( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 535685511..671dd8da1 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -7775,6 +7775,42 @@ def test_load_table_from_json_w_invalid_job_config(self): err_msg = str(exc.value) assert "Expected an instance of LoadJobConfig" in err_msg + def test_load_table_from_json_unicode_emoji_data_case(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + + client = self._make_client() + + emoji = "\U0001F3E6" + json_row = {"emoji": emoji} + json_rows = [json_row] + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + + with load_patch as load_table_from_file: + client.load_table_from_json(json_rows, self.TABLE_REF) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + size=mock.ANY, + num_retries=_DEFAULT_NUM_RETRIES, + job_id=mock.ANY, + job_id_prefix=None, + location=client.location, + project=client.project, + job_config=mock.ANY, + timeout=None, + ) + + sent_data_file = load_table_from_file.mock_calls[0][1][1] + + # make sure json_row's unicode characters are only encoded one time + expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}' + assert sent_data_file.getvalue() == expected_bytes + # Low-level tests @classmethod From cf6f0e923d385817c9aff447255ecfa4b9b4c72d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 11 Aug 2021 19:46:11 +0000 Subject: [PATCH 212/230] chore: release 2.24.0 (#868) :robot: I have created a release \*beep\* \*boop\* --- ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) ### Features * add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) * make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) * retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) * support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) ### Bug Fixes * make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 15 +++++++++++++++ docs/conf.py | 1 - google/cloud/bigquery/version.py | 2 +- 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 856f1ecd1..83b409015 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) + + +### Features + +* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e)) +* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6)) +* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc)) +* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc)) + + +### Bug Fixes + +* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78)) + ### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06) diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..cb347160d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,7 +110,6 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", - "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index df992a051..84f6b4643 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.23.3" +__version__ = "2.24.0" From c44d45bc0481aeef2e39ba3392666125bdd2715d Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 12 Aug 2021 12:15:45 +0200 Subject: [PATCH 213/230] chore(deps): update dependency google-cloud-bigquery to v2.24.0 (#873) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d3e599101..dfee339d4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1545ed96e..264899dff 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.23.3 +google-cloud-bigquery==2.24.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From e3704c3494b90112cb30b091bcacb443bf148383 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 13 Aug 2021 02:00:19 -0500 Subject: [PATCH 214/230] test: refactor `list_rows` tests and add test for scalars (#829) * test: refactor `list_rows` tests and add test for scalars * fix JSON formatting * add TODO for INTERVAL Arrow support * format tests --- tests/data/scalars.jsonl | 4 +- tests/data/scalars_extreme.jsonl | 10 +-- tests/data/scalars_schema.json | 54 +++++++++------ tests/system/test_arrow.py | 36 ++++++++-- tests/system/test_client.py | 48 ------------- tests/system/test_list_rows.py | 112 +++++++++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 83 deletions(-) create mode 100644 tests/system/test_list_rows.py diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl index 4419a6e9a..e06139e5c 100644 --- a/tests/data/scalars.jsonl +++ b/tests/data/scalars.jsonl @@ -1,2 +1,2 @@ -{"bool_col": true, "bytes_col": "abcd", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "string_col": "Hello, World", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl index ceccd8dbc..d0a33fdba 100644 --- a/tests/data/scalars_extreme.jsonl +++ b/tests/data/scalars_extreme.jsonl @@ -1,5 +1,5 @@ -{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "string_col": "Hello, World", "time_col": "23:59:59.99999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} -{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} -{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} -{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} -{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "string_col": null, "time_col": null, "timestamp_col": null} +{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"} +{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"} +{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"} +{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"} +{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null} diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json index 00bd150fd..676d37d56 100644 --- a/tests/data/scalars_schema.json +++ b/tests/data/scalars_schema.json @@ -1,33 +1,33 @@ [ { "mode": "NULLABLE", - "name": "timestamp_col", - "type": "TIMESTAMP" + "name": "bool_col", + "type": "BOOLEAN" }, { "mode": "NULLABLE", - "name": "time_col", - "type": "TIME" + "name": "bignumeric_col", + "type": "BIGNUMERIC" }, { "mode": "NULLABLE", - "name": "float64_col", - "type": "FLOAT" + "name": "bytes_col", + "type": "BYTES" }, { "mode": "NULLABLE", - "name": "datetime_col", - "type": "DATETIME" + "name": "date_col", + "type": "DATE" }, { "mode": "NULLABLE", - "name": "bignumeric_col", - "type": "BIGNUMERIC" + "name": "datetime_col", + "type": "DATETIME" }, { "mode": "NULLABLE", - "name": "numeric_col", - "type": "NUMERIC" + "name": "float64_col", + "type": "FLOAT" }, { "mode": "NULLABLE", @@ -36,27 +36,37 @@ }, { "mode": "NULLABLE", - "name": "date_col", - "type": "DATE" + "name": "int64_col", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "string_col", - "type": "STRING" + "name": "interval_col", + "type": "INTERVAL" }, { "mode": "NULLABLE", - "name": "bool_col", - "type": "BOOLEAN" + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "mode": "REQUIRED", + "name": "rowindex", + "type": "INTEGER" }, { "mode": "NULLABLE", - "name": "bytes_col", - "type": "BYTES" + "name": "string_col", + "type": "STRING" }, { "mode": "NULLABLE", - "name": "int64_col", - "type": "INTEGER" + "name": "time_col", + "type": "TIME" + }, + { + "mode": "NULLABLE", + "name": "timestamp_col", + "type": "TIMESTAMP" } ] diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index f97488e39..12f7af9cb 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -14,8 +14,14 @@ """System tests for Arrow connector.""" +from typing import Optional + import pytest +from google.cloud import bigquery +from google.cloud.bigquery import enums + + pyarrow = pytest.importorskip( "pyarrow", minversion="3.0.0" ) # Needs decimal256 for BIGNUMERIC columns. @@ -31,17 +37,35 @@ ), ) def test_list_rows_nullable_scalars_dtypes( - bigquery_client, - scalars_table, - scalars_extreme_table, - max_results, - scalars_table_name, + bigquery_client: bigquery.Client, + scalars_table: str, + scalars_extreme_table: str, + max_results: Optional[int], + scalars_table_name: str, ): table_id = scalars_table if scalars_table_name == "scalars_extreme_table": table_id = scalars_extreme_table + + # TODO(GH#836): Avoid INTERVAL columns until they are supported by the + # BigQuery Storage API and pyarrow. + schema = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + ] + arrow_table = bigquery_client.list_rows( - table_id, max_results=max_results, + table_id, max_results=max_results, selected_fields=schema, ).to_arrow() schema = arrow_table.schema diff --git a/tests/system/test_client.py b/tests/system/test_client.py index f540611a6..06ef40126 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2428,54 +2428,6 @@ def test_nested_table_to_arrow(self): self.assertTrue(pyarrow.types.is_list(record_col[1].type)) self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type)) - def test_list_rows_empty_table(self): - from google.cloud.bigquery.table import RowIterator - - dataset_id = _make_dataset_id("empty_table") - dataset = self.temp_dataset(dataset_id) - table_ref = dataset.table("empty_table") - table = Config.CLIENT.create_table(bigquery.Table(table_ref)) - - # It's a bit silly to list rows for an empty table, but this does - # happen as the result of a DDL query from an IPython magic command. - rows = Config.CLIENT.list_rows(table) - self.assertIsInstance(rows, RowIterator) - self.assertEqual(tuple(rows), ()) - - def test_list_rows_page_size(self): - from google.cloud.bigquery.job import SourceFormat - from google.cloud.bigquery.job import WriteDisposition - - num_items = 7 - page_size = 3 - num_pages, num_last_page = divmod(num_items, page_size) - - SF = bigquery.SchemaField - schema = [SF("string_col", "STRING", mode="NULLABLE")] - to_insert = [{"string_col": "item%d" % i} for i in range(num_items)] - rows = [json.dumps(row) for row in to_insert] - body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii")) - - table_id = "test_table" - dataset = self.temp_dataset(_make_dataset_id("nested_df")) - table = dataset.table(table_id) - self.to_delete.insert(0, table) - job_config = bigquery.LoadJobConfig() - job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE - job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON - job_config.schema = schema - # Load a table using a local JSON file from memory. - Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result() - - df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size) - pages = df.pages - - for i in range(num_pages): - page = next(pages) - self.assertEqual(page.num_items, page_size) - page = next(pages) - self.assertEqual(page.num_items, num_last_page) - def temp_dataset(self, dataset_id, location=None): project = Config.CLIENT.project dataset_ref = bigquery.DatasetReference(project, dataset_id) diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py new file mode 100644 index 000000000..70388059e --- /dev/null +++ b/tests/system/test_list_rows.py @@ -0,0 +1,112 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal + +from google.cloud import bigquery +from google.cloud.bigquery import enums + + +def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str): + from google.cloud.bigquery.table import RowIterator + + table = bigquery_client.create_table(table_id) + + # It's a bit silly to list rows for an empty table, but this does + # happen as the result of a DDL query from an IPython magic command. + rows = bigquery_client.list_rows(table) + assert isinstance(rows, RowIterator) + assert tuple(rows) == () + + +def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str): + num_items = 7 + page_size = 3 + num_pages, num_last_page = divmod(num_items, page_size) + + to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)] + bigquery_client.load_table_from_json(to_insert, table_id).result() + + df = bigquery_client.list_rows( + table_id, + selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)], + page_size=page_size, + ) + pages = df.pages + + for i in range(num_pages): + page = next(pages) + assert page.num_items == page_size + page = next(pages) + assert page.num_items == num_last_page + + +def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str): + rows = sorted( + bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"] + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"Hello, World!" + assert row["date_col"] == datetime.date(2021, 7, 21) + assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45) + assert row["geography_col"] == "POINT(-122.0838511 37.3860517)" + assert row["int64_col"] == 123456789 + assert row["numeric_col"] == decimal.Decimal("1.23456789") + assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819") + assert row["float64_col"] == 1.25 + assert row["string_col"] == "Hello, World!" + assert row["time_col"] == datetime.time(11, 41, 43, 76160) + assert row["timestamp_col"] == datetime.datetime( + 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[1] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 1 + else: + assert value is None + + +def test_list_rows_scalars_extreme( + bigquery_client: bigquery.Client, scalars_extreme_table: str +): + rows = sorted( + bigquery_client.list_rows(scalars_extreme_table), + key=lambda row: row["rowindex"], + ) + row = rows[0] + assert row["bool_col"] # True + assert row["bytes_col"] == b"\r\n" + assert row["date_col"] == datetime.date(9999, 12, 31) + assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999) + assert row["geography_col"] == "POINT(-135 90)" + assert row["int64_col"] == 9223372036854775807 + assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28") + assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37") + assert row["float64_col"] == float("Inf") + assert row["string_col"] == "Hello, World" + assert row["time_col"] == datetime.time(23, 59, 59, 999999) + assert row["timestamp_col"] == datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + nullrow = rows[4] + for column, value in nullrow.items(): + if column == "rowindex": + assert value == 4 + else: + assert value is None From cd21df1716e9ab163c779a716d94a850a6b2d253 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:17:31 -0400 Subject: [PATCH 215/230] chore: drop mention of Python 2.7 from templates (#877) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + samples/geography/noxfile.py | 6 +++--- samples/snippets/noxfile.py | 6 +++--- scripts/readme-gen/templates/install_deps.tmpl.rst | 2 +- 5 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 649877dc4..a9fcd07cc 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:50e35228649c47b6ca82aa0be3ff9eb2afce51c82b66c4a03fe4afeb5ff6c0fc + digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 diff --git a/docs/conf.py b/docs/conf.py index cb347160d..09f7ea414 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 7dbea0914..b008613f0 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -39,7 +39,7 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": [], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, @@ -86,8 +86,8 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8..275d64989 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash From 2cb3563ee863edef7eaf5d04d739bcfe7bc6438e Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Fri, 13 Aug 2021 12:54:09 -0600 Subject: [PATCH 216/230] fix: remove pytz dependency and require pyarrow>=3.0.0 (#875) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: remove pytz dependency * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix(deps): require pyarrow>=3.0.0 * remove version check for pyarrow * require pyarrow 3.0 in pandas extra * remove _BIGNUMERIC_SUPPORT references from tests Co-authored-by: Owl Bot Co-authored-by: Dina Graves Portman Co-authored-by: Tim Swast --- docs/snippets.py | 5 +- google/cloud/bigquery/_pandas_helpers.py | 17 ++---- google/cloud/bigquery/table.py | 3 +- samples/client_query_w_timestamp_params.py | 3 +- setup.py | 4 +- testing/constraints-3.6.txt | 2 +- tests/system/test_client.py | 14 ++--- tests/system/test_pandas.py | 42 ++++++------- tests/unit/job/test_base.py | 4 +- tests/unit/test__pandas_helpers.py | 69 ++++++++-------------- tests/unit/test_client.py | 21 ++++--- tests/unit/test_table.py | 17 ++---- 12 files changed, 78 insertions(+), 123 deletions(-) diff --git a/docs/snippets.py b/docs/snippets.py index 3f9b9a88c..c62001fc0 100644 --- a/docs/snippets.py +++ b/docs/snippets.py @@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete): # [START bigquery_update_table_expiration] import datetime - import pytz # from google.cloud import bigquery # client = bigquery.Client() @@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete): assert table.expires is None # set table to expire 5 days from now - expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5) + expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + days=5 + ) table.expires = expiration table = client.update_table(table, ["expires"]) # API request diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index b381fa5f7..f49980645 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,8 +20,6 @@ import queue import warnings -from packaging import version - try: import pandas except ImportError: # pragma: NO COVER @@ -110,6 +108,7 @@ def pyarrow_timestamp(): # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py # When modifying it be sure to update it there as well. BQ_TO_ARROW_SCALARS = { + "BIGNUMERIC": pyarrow_bignumeric, "BOOL": pyarrow.bool_, "BOOLEAN": pyarrow.bool_, "BYTES": pyarrow.binary, @@ -146,23 +145,15 @@ def pyarrow_timestamp(): pyarrow.date64().id: "DATETIME", # because millisecond resolution pyarrow.binary().id: "BYTES", pyarrow.string().id: "STRING", # also alias for pyarrow.utf8() - # The exact scale and precision don't matter, see below. - pyarrow.decimal128(38, scale=9).id: "NUMERIC", - } - - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric # The exact decimal's scale and precision are not important, as only # the type ID matters, and it's the same for all decimal256 instances. - ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" - _BIGNUMERIC_SUPPORT = True - else: - _BIGNUMERIC_SUPPORT = False + pyarrow.decimal128(38, scale=9).id: "NUMERIC", + pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC", + } else: # pragma: NO COVER BQ_TO_ARROW_SCALARS = {} # pragma: NO COVER ARROW_SCALAR_IDS_TO_BQ = {} # pragma: NO_COVER - _BIGNUMERIC_SUPPORT = False # pragma: NO COVER def bq_to_arrow_struct_data_type(field): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index d23885ebf..62f888001 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -20,7 +20,6 @@ import datetime import functools import operator -import pytz import typing from typing import Any, Dict, Iterable, Iterator, Optional, Tuple import warnings @@ -1969,7 +1968,7 @@ def to_dataframe( # Pandas, we set the timestamp_as_object parameter to True, if necessary. types_to_check = { pyarrow.timestamp("us"), - pyarrow.timestamp("us", tz=pytz.UTC), + pyarrow.timestamp("us", tz=datetime.timezone.utc), } for column in record_batch: diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py index ca8eec0b5..41a27770e 100644 --- a/samples/client_query_w_timestamp_params.py +++ b/samples/client_query_w_timestamp_params.py @@ -18,7 +18,6 @@ def client_query_w_timestamp_params(): # [START bigquery_query_params_timestamps] import datetime - import pytz from google.cloud import bigquery # Construct a BigQuery client object. @@ -30,7 +29,7 @@ def client_query_w_timestamp_params(): bigquery.ScalarQueryParameter( "ts_value", "TIMESTAMP", - datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC), + datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc), ) ] ) diff --git a/setup.py b/setup.py index e9deaf117..a1b3b61a0 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,9 @@ # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.38.1, < 2.0dev", - "pyarrow >= 1.0.0, < 6.0dev", + "pyarrow >= 3.0.0, < 6.0dev", ], - "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 6.0dev"], + "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], "opentelemetry": [ diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index af6e82efd..ce012f0d7 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -16,7 +16,7 @@ opentelemetry-sdk==0.11b0 pandas==0.23.0 proto-plus==1.10.0 protobuf==3.12.0 -pyarrow==1.0.0 +pyarrow==3.0.0 requests==2.18.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 06ef40126..4250111b4 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -30,7 +30,6 @@ import psutil import pytest -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers try: @@ -1972,15 +1971,12 @@ def test_query_w_query_params(self): "expected": {"friends": [phred_name, bharney_name]}, "query_parameters": [with_friends_param], }, + { + "sql": "SELECT @bignum_param", + "expected": bignum, + "query_parameters": [bignum_param], + }, ] - if _BIGNUMERIC_SUPPORT: - examples.append( - { - "sql": "SELECT @bignum_param", - "expected": bignum, - "query_parameters": [bignum_param], - } - ) for example in examples: jconfig = QueryJobConfig() diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 821b375e1..371dcea71 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -24,10 +24,8 @@ import google.api_core.retry import pkg_resources import pytest -import pytz from google.cloud import bigquery -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT from . import helpers @@ -64,7 +62,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ( "dt_col", @@ -189,12 +187,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -216,12 +213,11 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id): ("geo_col", nulls), ("int_col", nulls), ("num_col", nulls), + ("bignum_col", nulls), ("str_col", nulls), ("time_col", nulls), ("ts_col", nulls), ] - if _BIGNUMERIC_SUPPORT: - df_data.append(("bignum_col", nulls)) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, columns=df_data.keys()) @@ -297,12 +293,11 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), bigquery.SchemaField("num_col", "NUMERIC"), + bigquery.SchemaField("bignum_col", "BIGNUMERIC"), bigquery.SchemaField("str_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("ts_col", "TIMESTAMP"), ) - if _BIGNUMERIC_SUPPORT: - scalars_schema += (bigquery.SchemaField("bignum_col", "BIGNUMERIC"),) table_schema = scalars_schema + ( # TODO: Array columns can't be read due to NULLABLE versus REPEATED @@ -340,6 +335,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id decimal.Decimal("99999999999999999999999999999.999999999"), ], ), + ( + "bignum_col", + [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + None, + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], + ), ("str_col", ["abc", None, "def"]), ( "time_col", @@ -348,23 +351,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), ], ), ] - if _BIGNUMERIC_SUPPORT: - df_data.append( - ( - "bignum_col", - [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - None, - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ], - ) - ) df_data = collections.OrderedDict(df_data) dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys()) @@ -484,10 +478,10 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv( ( "ts_col", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, datetime.datetime( - 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc ), ], ), diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 0ac1d05b5..c3f7854e3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -295,11 +295,11 @@ def test_user_email(self): @staticmethod def _datetime_and_millis(): import datetime - import pytz from google.cloud._helpers import _millis now = datetime.datetime.utcnow().replace( - microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision + microsecond=123000, + tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) return now, _millis(now) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 0ba671cd9..b9cb56572 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -37,12 +37,10 @@ # used in test parameterization. pyarrow = mock.Mock() import pytest -import pytz from google import api_core from google.cloud.bigquery import _helpers from google.cloud.bigquery import schema -from google.cloud.bigquery._pandas_helpers import _BIGNUMERIC_SUPPORT try: from google.cloud import bigquery_storage @@ -60,11 +58,6 @@ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0") -skip_if_no_bignumeric = pytest.mark.skipif( - not _BIGNUMERIC_SUPPORT, reason="BIGNUMERIC support requires pyarrow>=3.0.0", -) - - @pytest.fixture def module_under_test(): from google.cloud.bigquery import _pandas_helpers @@ -153,9 +146,7 @@ def test_all_(): ("FLOAT", "NULLABLE", pyarrow.types.is_float64), ("FLOAT64", "NULLABLE", pyarrow.types.is_float64), ("NUMERIC", "NULLABLE", is_numeric), - pytest.param( - "BIGNUMERIC", "NULLABLE", is_bignumeric, marks=skip_if_no_bignumeric, - ), + ("BIGNUMERIC", "NULLABLE", is_bignumeric), ("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean), ("BOOL", "NULLABLE", pyarrow.types.is_boolean), ("TIMESTAMP", "NULLABLE", is_timestamp), @@ -234,11 +225,10 @@ def test_all_(): "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)), ), - pytest.param( + ( "BIGNUMERIC", "REPEATED", all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)), - marks=skip_if_no_bignumeric, ), ( "BOOLEAN", @@ -312,6 +302,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -321,9 +312,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -335,6 +323,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -343,8 +332,6 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected = pyarrow.struct(expected) assert pyarrow.types.is_struct(actual) @@ -363,6 +350,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field05", "FLOAT"), schema.SchemaField("field06", "FLOAT64"), schema.SchemaField("field07", "NUMERIC"), + schema.SchemaField("field08", "BIGNUMERIC"), schema.SchemaField("field09", "BOOLEAN"), schema.SchemaField("field10", "BOOL"), schema.SchemaField("field11", "TIMESTAMP"), @@ -372,9 +360,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): schema.SchemaField("field15", "GEOGRAPHY"), ) - if _BIGNUMERIC_SUPPORT: - fields += (schema.SchemaField("field08", "BIGNUMERIC"),) - field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields) actual = module_under_test.bq_to_arrow_data_type(field) @@ -386,6 +371,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field05", pyarrow.float64()), pyarrow.field("field06", pyarrow.float64()), pyarrow.field("field07", module_under_test.pyarrow_numeric()), + pyarrow.field("field08", module_under_test.pyarrow_bignumeric()), pyarrow.field("field09", pyarrow.bool_()), pyarrow.field("field10", pyarrow.bool_()), pyarrow.field("field11", module_under_test.pyarrow_timestamp()), @@ -394,8 +380,6 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type): pyarrow.field("field14", module_under_test.pyarrow_datetime()), pyarrow.field("field15", pyarrow.string()), ) - if _BIGNUMERIC_SUPPORT: - expected += (pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),) expected_value_type = pyarrow.struct(expected) assert pyarrow.types.is_list(actual) @@ -441,7 +425,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("999.123456789"), ], ), - pytest.param( + ( "BIGNUMERIC", [ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), @@ -449,17 +433,18 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test): decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), decimal.Decimal("3.141592653589793238462643383279"), ], - marks=skip_if_no_bignumeric, ), ("BOOLEAN", [True, None, False, None]), ("BOOL", [False, None, True, None]), ( "TIMESTAMP", [ - datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), None, - datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc), - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), ], ), ( @@ -938,6 +923,7 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field05", "FLOAT", mode="REQUIRED"), schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"), schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"), + schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"), schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"), schema.SchemaField("field10", "BOOL", mode="REQUIRED"), schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"), @@ -946,8 +932,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): schema.SchemaField("field14", "DATETIME", mode="REQUIRED"), schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"), ) - if _BIGNUMERIC_SUPPORT: - bq_schema += (schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),) data = { "field01": ["hello", "world"], @@ -957,11 +941,15 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): "field05": [1.25, 9.75], "field06": [-1.75, -3.5], "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")], + "field08": [ + decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), + decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), + ], "field09": [True, False], "field10": [False, True], "field11": [ - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc), + datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc), + datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc), ], "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)], "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)], @@ -971,11 +959,6 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test): ], "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"], } - if _BIGNUMERIC_SUPPORT: - data["field08"] = [ - decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)), - decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)), - ] dataframe = pandas.DataFrame(data) arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema) @@ -1210,11 +1193,8 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), + schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), ) - if _BIGNUMERIC_SUPPORT: - current_schema += ( - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) with warnings.catch_warnings(record=True) as warned: augmented_schema = module_under_test.augment_schema(dataframe, current_schema) @@ -1236,13 +1216,10 @@ def test_augment_schema_type_detection_succeeds(module_under_test): schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"), schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"), schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"), + schema.SchemaField( + "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" + ), ) - if _BIGNUMERIC_SUPPORT: - expected_schema += ( - schema.SchemaField( - "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE" - ), - ) by_name = operator.attrgetter("name") assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 671dd8da1..ca0dca975 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -30,7 +30,6 @@ import packaging import requests import pytest -import pytz import pkg_resources try: @@ -5018,16 +5017,24 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], [1.25, 2.5], ), { "score": 13, "times": [ - datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc), - datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc), + datetime.datetime( + 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc + ), + datetime.datetime( + 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc + ), ], "distances": [-1.25, -2.5], }, @@ -6974,7 +6981,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ] ) @@ -7306,7 +7313,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): datetime.datetime(2012, 3, 14, 15, 16), ], dtype="datetime64[ns]", - ).dt.tz_localize(pytz.utc), + ).dt.tz_localize(datetime.timezone.utc), ), ("string_col", ["abc", None, "def"]), ("bytes_col", [b"abc", b"def", None]), diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a5badc66c..50d573345 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -20,9 +20,7 @@ import warnings import mock -import pkg_resources import pytest -import pytz import google.api_core.exceptions from test_utils.imports import maybe_fail_import @@ -44,11 +42,8 @@ try: import pyarrow import pyarrow.types - - PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__) except ImportError: # pragma: NO COVER pyarrow = None - PYARROW_VERSION = pkg_resources.parse_version("0.0.1") try: from tqdm import tqdm @@ -58,9 +53,6 @@ from google.cloud.bigquery.dataset import DatasetReference -PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0") - - def _mock_client(): from google.cloud.bigquery import client @@ -914,7 +906,9 @@ def test_mview_last_refresh_time(self): } self.assertEqual( table.mview_last_refresh_time, - datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc), + datetime.datetime( + 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc + ), ) def test_mview_enable_refresh(self): @@ -2878,10 +2872,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self): df = row_iterator.to_dataframe(create_bqstorage_client=False) - tzinfo = None - if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION: - tzinfo = datetime.timezone.utc - + tzinfo = datetime.timezone.utc self.assertIsInstance(df, pandas.DataFrame) self.assertEqual(len(df), 2) # verify the number of rows self.assertEqual(list(df.columns), ["some_timestamp"]) From 936660bdf48eb65844b39bc567146968895225d7 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 19 Aug 2021 10:01:53 -0400 Subject: [PATCH 217/230] chore: release 2.24.1 (#879) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83b409015..5a3e74fd0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) + + +### Bug Fixes + +* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e)) + ## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 84f6b4643..96f84438a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.0" +__version__ = "2.24.1" From 5c5b4b852e8818f885014bca3769c4b7c13183cd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Thu, 19 Aug 2021 17:29:00 +0200 Subject: [PATCH 218/230] chore(deps): update dependency google-cloud-bigquery to v2.24.1 (#887) --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dfee339d4..ac804c81c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,4 @@ geojson==2.5.0 -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 Shapely==1.7.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 264899dff..484e10516 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.0 +google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 16f65e6ae15979217ceea6c6d398c9057a363a13 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 10:29:04 -0400 Subject: [PATCH 219/230] feat: Support using GeoPandas for GEOGRAPHY columns (#848) --- docs/conf.py | 2 + docs/usage/pandas.rst | 15 ++ google/cloud/bigquery/_pandas_helpers.py | 71 ++++++- google/cloud/bigquery/job/query.py | 119 ++++++++++- google/cloud/bigquery/table.py | 196 +++++++++++++++++- owlbot.py | 4 + samples/geography/requirements.txt | 44 ++++ samples/geography/to_geodataframe.py | 32 +++ samples/geography/to_geodataframe_test.py | 25 +++ setup.py | 1 + testing/constraints-3.6.txt | 4 +- tests/system/test_client.py | 3 - tests/system/test_pandas.py | 143 +++++++++++++ tests/unit/job/test_query_pandas.py | 130 ++++++++++-- tests/unit/test__pandas_helpers.py | 100 +++++++++ tests/unit/test_table.py | 242 ++++++++++++++++++++++ 16 files changed, 1102 insertions(+), 29 deletions(-) create mode 100644 samples/geography/to_geodataframe.py create mode 100644 samples/geography/to_geodataframe_test.py diff --git a/docs/conf.py b/docs/conf.py index 09f7ea414..59a2d8fb3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -366,6 +366,8 @@ "grpc": ("https://grpc.github.io/grpc/python/", None), "proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None), "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None), + "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None), + "geopandas": ("https://geopandas.org/", None), } diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst index 9db98dfbb..92eee67cf 100644 --- a/docs/usage/pandas.rst +++ b/docs/usage/pandas.rst @@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`: :start-after: [START bigquery_list_rows_dataframe] :end-before: [END bigquery_list_rows_dataframe] + +Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame +------------------------------------------------------------ + +`GeoPandas `_ adds geospatial analytics +capabilities to Pandas. To retrieve query results containing +GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`: + +.. literalinclude:: ../samples/geography/to_geodataframe.py + :language: python + :dedent: 4 + :start-after: [START bigquery_query_results_geodataframe] + :end-before: [END bigquery_query_results_geodataframe] + + Load a Pandas DataFrame to a BigQuery Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index f49980645..ab58b1729 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -24,6 +24,36 @@ import pandas except ImportError: # pragma: NO COVER pandas = None +else: + import numpy + +try: + # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array` + from shapely.geometry.base import BaseGeometry as _BaseGeometry +except ImportError: # pragma: NO COVER + # No shapely, use NoneType for _BaseGeometry as a placeholder. + _BaseGeometry = type(None) +else: + if pandas is not None: # pragma: NO COVER + + def _to_wkb(): + # Create a closure that: + # - Adds a not-null check. This allows the returned function to + # be used directly with apply, unlike `shapely.wkb.dumps`. + # - Avoid extra work done by `shapely.wkb.dumps` that we don't need. + # - Caches the WKBWriter (and write method lookup :) ) + # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace. + from shapely.geos import WKBWriter, lgeos + + write = WKBWriter(lgeos).write + notnull = pandas.notnull + + def _to_wkb(v): + return write(v) if notnull(v) else v + + return _to_wkb + + _to_wkb = _to_wkb() try: import pyarrow @@ -69,6 +99,7 @@ "uint8": "INTEGER", "uint16": "INTEGER", "uint32": "INTEGER", + "geometry": "GEOGRAPHY", } @@ -193,14 +224,16 @@ def bq_to_arrow_data_type(field): return data_type_constructor() -def bq_to_arrow_field(bq_field): +def bq_to_arrow_field(bq_field, array_type=None): """Return the Arrow field, corresponding to a given BigQuery column. Returns: None: if the Arrow type cannot be determined. """ arrow_type = bq_to_arrow_data_type(bq_field) - if arrow_type: + if arrow_type is not None: + if array_type is not None: + arrow_type = array_type # For GEOGRAPHY, at least initially is_nullable = bq_field.mode.upper() == "NULLABLE" return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable) @@ -225,7 +258,24 @@ def bq_to_arrow_schema(bq_schema): def bq_to_arrow_array(series, bq_field): - arrow_type = bq_to_arrow_data_type(bq_field) + if bq_field.field_type.upper() == "GEOGRAPHY": + arrow_type = None + first = _first_valid(series) + if first is not None: + if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry): + arrow_type = pyarrow.binary() + # Convert shapey geometry to WKB binary format: + series = series.apply(_to_wkb) + elif isinstance(first, bytes): + arrow_type = pyarrow.binary() + elif series.dtype.name == "geometry": + # We have a GeoSeries containing all nulls, convert it to a pandas series + series = pandas.Series(numpy.array(series)) + + if arrow_type is None: + arrow_type = bq_to_arrow_data_type(bq_field) + else: + arrow_type = bq_to_arrow_data_type(bq_field) field_type_upper = bq_field.field_type.upper() if bq_field.field_type else "" @@ -279,6 +329,12 @@ def list_columns_and_indexes(dataframe): return columns_and_indexes +def _first_valid(series): + first_valid_index = series.first_valid_index() + if first_valid_index is not None: + return series.at[first_valid_index] + + def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. @@ -319,6 +375,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema): # Otherwise, try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) + if bq_type is None: + sample_data = _first_valid(dataframe[column]) + if ( + isinstance(sample_data, _BaseGeometry) + and sample_data is not None # Paranoia + ): + bq_type = "GEOGRAPHY" bq_field = schema.SchemaField(column, bq_type) bq_schema_out.append(bq_field) @@ -450,11 +513,11 @@ def dataframe_to_arrow(dataframe, bq_schema): arrow_names = [] arrow_fields = [] for bq_field in bq_schema: - arrow_fields.append(bq_to_arrow_field(bq_field)) arrow_names.append(bq_field.name) arrow_arrays.append( bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field) ) + arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type)) if all((field is not None for field in arrow_fields)): return pyarrow.Table.from_arrays( diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 3ab47b0f9..0cb4798be 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -53,6 +53,7 @@ # Assumption: type checks are only used by library developers and CI environments # that have all optional dependencies installed, thus no conditional imports. import pandas + import geopandas import pyarrow from google.api_core import retry as retries from google.cloud import bigquery_storage @@ -1487,6 +1488,7 @@ def to_dataframe( create_bqstorage_client: bool = True, date_as_object: bool = True, max_results: Optional[int] = None, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -1538,13 +1540,27 @@ def to_dataframe( .. versionadded:: 2.21.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: - A :class:`~pandas.DataFrame` populated with row data and column - headers from the query results. The column headers are derived - from the destination table's schema. + pandas.DataFrame: + A :class:`~pandas.DataFrame` populated with row data + and column headers from the query results. The column + headers are derived from the destination table's + schema. Raises: - ValueError: If the `pandas` library cannot be imported. + ValueError: + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ query_result = wait_for_query(self, progress_bar_type, max_results=max_results) return query_result.to_dataframe( @@ -1553,6 +1569,101 @@ def to_dataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, date_as_object=date_as_object, + geography_as_object=geography_as_object, + ) + + # If changing the signature of this method, make sure to apply the same + # changes to table.RowIterator.to_dataframe(), except for the max_results parameter + # that should only exist here in the QueryJob method. + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + max_results: Optional[int] = None, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Return a GeoPandas GeoDataFrame from a QueryJob + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. This + API is a billable API. + + This method requires the ``fastavro`` and + ``google-cloud-bigquery-storage`` libraries. + + Reading from a specific partition or snapshot is not + currently supported by this method. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + See + :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe` + for details. + + .. versionadded:: 1.11.0 + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + .. versionadded:: 1.24.0 + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + .. versionadded:: 1.26.0 + + max_results (Optional[int]): + Maximum number of rows to include in the result. No limit by default. + + .. versionadded:: 2.21.0 + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a GeoPandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + query_result = wait_for_query(self, progress_bar_type, max_results=max_results) + return query_result.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 62f888001..609c0b57e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -29,6 +29,20 @@ except ImportError: # pragma: NO COVER pandas = None +try: + import geopandas +except ImportError: + geopandas = None +else: + _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" + +try: + import shapely.geos +except ImportError: + shapely = None +else: + _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read + try: import pyarrow except ImportError: # pragma: NO COVER @@ -52,6 +66,7 @@ # Unconditionally import optional dependencies again to tell pytype that # they are not None, avoiding false "no attribute" errors. import pandas + import geopandas import pyarrow from google.cloud import bigquery_storage @@ -60,6 +75,14 @@ "The pandas library is not installed, please install " "pandas to use the to_dataframe() function." ) +_NO_GEOPANDAS_ERROR = ( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." +) +_NO_SHAPELY_ERROR = ( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." +) _NO_PYARROW_ERROR = ( "The pyarrow library is not installed, please install " "pyarrow to use the to_arrow() function." @@ -1878,6 +1901,7 @@ def to_dataframe( progress_bar_type: str = None, create_bqstorage_client: bool = True, date_as_object: bool = True, + geography_as_object: bool = False, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -1933,6 +1957,13 @@ def to_dataframe( .. versionadded:: 1.26.0 + geography_as_object (Optional[bool]): + If ``True``, convert GEOGRAPHY data to :mod:`shapely` + geometry objects. If ``False`` (default), don't cast + geography data to :mod:`shapely` geometry objects. + + .. versionadded:: 2.24.0 + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -1941,13 +1972,18 @@ def to_dataframe( Raises: ValueError: - If the :mod:`pandas` library cannot be imported, or the - :mod:`google.cloud.bigquery_storage_v1` module is - required but cannot be imported. + If the :mod:`pandas` library cannot be imported, or + the :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. Also if + `geography_as_object` is `True`, but the + :mod:`shapely` library cannot be imported. """ if pandas is None: raise ValueError(_NO_PANDAS_ERROR) + if geography_as_object and shapely is None: + raise ValueError(_NO_SHAPELY_ERROR) + if dtypes is None: dtypes = {} @@ -1988,8 +2024,136 @@ def to_dataframe( for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column]) + if geography_as_object: + for field in self.schema: + if field.field_type.upper() == "GEOGRAPHY": + df[field.name] = df[field.name].dropna().apply(_read_wkt) + return df + # If changing the signature of this method, make sure to apply the same + # changes to job.QueryJob.to_geodataframe() + def to_geodataframe( + self, + bqstorage_client: "bigquery_storage.BigQueryReadClient" = None, + dtypes: Dict[str, Any] = None, + progress_bar_type: str = None, + create_bqstorage_client: bool = True, + date_as_object: bool = True, + geography_column: Optional[str] = None, + ) -> "geopandas.GeoDataFrame": + """Create a GeoPandas GeoDataFrame by loading all pages of a query. + + Args: + bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): + A BigQuery Storage API client. If supplied, use the faster + BigQuery Storage API to fetch rows from BigQuery. + + This method requires the ``pyarrow`` and + ``google-cloud-bigquery-storage`` libraries. + + This method only exposes a subset of the capabilities of the + BigQuery Storage API. For full access to all features + (projections, filters, snapshots) use the Storage API directly. + + dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): + A dictionary of column names pandas ``dtype``s. The provided + ``dtype`` is used when constructing the series for the column + specified. Otherwise, the default pandas behavior is used. + progress_bar_type (Optional[str]): + If set, use the `tqdm `_ library to + display a progress bar while the data downloads. Install the + ``tqdm`` package to use this feature. + + Possible values of ``progress_bar_type`` include: + + ``None`` + No progress bar. + ``'tqdm'`` + Use the :func:`tqdm.tqdm` function to print a progress bar + to :data:`sys.stderr`. + ``'tqdm_notebook'`` + Use the :func:`tqdm.tqdm_notebook` function to display a + progress bar as a Jupyter notebook widget. + ``'tqdm_gui'`` + Use the :func:`tqdm.tqdm_gui` function to display a + progress bar as a graphical dialog box. + + create_bqstorage_client (Optional[bool]): + If ``True`` (default), create a BigQuery Storage API client + using the default API settings. The BigQuery Storage API + is a faster way to fetch rows from BigQuery. See the + ``bqstorage_client`` parameter for more information. + + This argument does nothing if ``bqstorage_client`` is supplied. + + date_as_object (Optional[bool]): + If ``True`` (default), cast dates to objects. If ``False``, convert + to datetime64[ns] dtype. + + geography_column (Optional[str]): + If there are more than one GEOGRAPHY column, + identifies which one to use to construct a geopandas + GeoDataFrame. This option can be ommitted if there's + only one GEOGRAPHY column. + + Returns: + geopandas.GeoDataFrame: + A :class:`geopandas.GeoDataFrame` populated with row + data and column headers from the query results. The + column headers are derived from the destination + table's schema. + + Raises: + ValueError: + If the :mod:`geopandas` library cannot be imported, or the + :mod:`google.cloud.bigquery_storage_v1` module is + required but cannot be imported. + + .. versionadded:: 2.24.0 + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + + geography_columns = set( + field.name + for field in self.schema + if field.field_type.upper() == "GEOGRAPHY" + ) + if not geography_columns: + raise TypeError( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ) + + if geography_column: + if geography_column not in geography_columns: + raise ValueError( + f"The given geography column, {geography_column}, doesn't name" + f" a GEOGRAPHY column in the result." + ) + elif len(geography_columns) == 1: + [geography_column] = geography_columns + else: + raise ValueError( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ) + + df = self.to_dataframe( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + return geopandas.GeoDataFrame( + df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column + ) + class _EmptyRowIterator(RowIterator): """An empty row iterator. @@ -2042,6 +2206,7 @@ def to_dataframe( progress_bar_type=None, create_bqstorage_client=True, date_as_object=True, + geography_as_object=False, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2059,6 +2224,31 @@ def to_dataframe( raise ValueError(_NO_PANDAS_ERROR) return pandas.DataFrame() + def to_geodataframe( + self, + bqstorage_client=None, + dtypes=None, + progress_bar_type=None, + create_bqstorage_client=True, + date_as_object=True, + geography_column: Optional[str] = None, + ) -> "pandas.DataFrame": + """Create an empty dataframe. + + Args: + bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. + dtypes (Any): Ignored. Added for compatibility with RowIterator. + progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. + create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + date_as_object (bool): Ignored. Added for compatibility with RowIterator. + + Returns: + pandas.DataFrame: An empty :class:`~pandas.DataFrame`. + """ + if geopandas is None: + raise ValueError(_NO_GEOPANDAS_ERROR) + return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM) + def to_dataframe_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, diff --git a/owlbot.py b/owlbot.py index 09845480a..ea9904cdb 100644 --- a/owlbot.py +++ b/owlbot.py @@ -97,6 +97,10 @@ samples=True, microgenerator=True, split_system_tests=True, + intersphinx_dependencies={ + "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "geopandas": "https://geopandas.org/", + } ) # BigQuery has a custom multiprocessing note diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ac804c81c..7a76b4033 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,4 +1,48 @@ +attrs==21.2.0 +cachetools==4.2.2 +certifi==2021.5.30 +cffi==1.14.6 +charset-normalizer==2.0.4 +click==8.0.1 +click-plugins==1.1.1 +cligj==0.7.2 +dataclasses==0.6; python_version < '3.7' +Fiona==1.8.20 geojson==2.5.0 +geopandas==0.9.0 +google-api-core==1.31.2 +google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 +google-cloud-core==1.7.2 +google-crc32c==1.1.2 +google-resumable-media==1.3.3 +googleapis-common-protos==1.53.0 +grpcio==1.39.0 +idna==3.2 +importlib-metadata==4.6.4 +libcst==0.3.20 +munch==2.5.0 +mypy-extensions==0.4.3 +numpy==1.19.5 +packaging==21.0 +pandas==1.1.5 +proto-plus==1.19.0 +protobuf==3.17.3 +pyarrow==5.0.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pycparser==2.20 +pyparsing==2.4.7 +pyproj==3.0.1 +python-dateutil==2.8.2 +pytz==2021.1 +PyYAML==5.4.1 +requests==2.26.0 +rsa==4.7.2 Shapely==1.7.1 +six==1.16.0 +typing-extensions==3.10.0.0 +typing-inspect==0.7.1 +urllib3==1.26.6 +zipp==3.5.0 diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py new file mode 100644 index 000000000..fa8073fef --- /dev/null +++ b/samples/geography/to_geodataframe.py @@ -0,0 +1,32 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +client = bigquery.Client() + + +def get_austin_service_requests_as_geography(): + # [START bigquery_query_results_geodataframe] + + sql = """ + SELECT created_date, complaint_description, + ST_GEOGPOINT(longitude, latitude) as location + FROM bigquery-public-data.austin_311.311_service_requests + LIMIT 10 + """ + + df = client.query(sql).to_geodataframe() + # [END bigquery_query_results_geodataframe] + return df diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py new file mode 100644 index 000000000..7a2ba6937 --- /dev/null +++ b/samples/geography/to_geodataframe_test.py @@ -0,0 +1,25 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from .to_geodataframe import get_austin_service_requests_as_geography + + +def test_get_austin_service_requests_as_geography(): + geopandas = pytest.importorskip("geopandas") + df = get_austin_service_requests_as_geography() + assert isinstance(df, geopandas.GeoDataFrame) + assert len(list(df)) == 3 # verify the number of columns + assert len(df) == 10 # verify the number of rows diff --git a/setup.py b/setup.py index a1b3b61a0..e7515493d 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,7 @@ "grpcio >= 1.38.1, < 2.0dev", "pyarrow >= 3.0.0, < 6.0dev", ], + "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"], "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"], "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"], "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt index ce012f0d7..be1a992fa 100644 --- a/testing/constraints-3.6.txt +++ b/testing/constraints-3.6.txt @@ -5,6 +5,7 @@ # # e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", # Then this file should have foo==1.14.0 +geopandas==0.9.0 google-api-core==1.29.0 google-cloud-bigquery-storage==2.0.0 google-cloud-core==1.4.1 @@ -13,10 +14,11 @@ grpcio==1.38.1 opentelemetry-api==0.11b0 opentelemetry-instrumentation==0.11b0 opentelemetry-sdk==0.11b0 -pandas==0.23.0 +pandas==0.24.2 proto-plus==1.10.0 protobuf==3.12.0 pyarrow==3.0.0 requests==2.18.0 +shapely==1.6.0 six==1.13.0 tqdm==4.7.4 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 4250111b4..9da45ee6e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -2360,9 +2360,6 @@ def test_create_table_rows_fetch_nested_schema(self): self.assertEqual(found[7], e_favtime) self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"])) - def _fetch_dataframe(self, query): - return Config.CLIENT.query(query).result().to_dataframe() - @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") @unittest.skipIf( bigquery_storage is None, "Requires `google-cloud-bigquery-storage`" diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 371dcea71..836f93210 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -798,3 +798,146 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client) assert len(dataframe.index) == 100 + + +def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + bigquery_client.query( + f"create table {dataset_id}.lake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.lake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('point(0 1)')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.lake order by name" + ).to_dataframe(geography_as_object=True) + assert list(df["name"]) == ["bar", "baz", "foo"] + assert df["geog"][0] == wkt.loads("point(0 1)") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + + +def test_to_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + from shapely import wkt + + bigquery_client.query( + f"create table {dataset_id}.geolake (name string, geog geography)" + ).result() + bigquery_client.query( + f""" + insert into {dataset_id}.geolake (name, geog) values + ('foo', st_geogfromtext('point(0 0)')), + ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')), + ('baz', null) + """ + ).result() + df = bigquery_client.query( + f"select * from {dataset_id}.geolake order by name" + ).to_geodataframe() + assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))") + assert pandas.isna(df["geog"][1]) + assert df["geog"][2] == wkt.loads("point(0 0)") + assert isinstance(df, geopandas.GeoDataFrame) + assert isinstance(df["geog"], geopandas.GeoSeries) + assert df.area[0] == 0.5 + assert pandas.isna(df.area[1]) + assert df.area[2] == 0.0 + assert df.crs.srs == "EPSG:4326" + assert df.crs.name == "WGS 84" + assert df.geog.crs.srs == "EPSG:4326" + assert df.geog.crs.name == "WGS 84" + + +def test_load_geodataframe(bigquery_client, dataset_id): + geopandas = pytest.importorskip("geopandas") + import pandas + from shapely import wkt + from google.cloud.bigquery.schema import SchemaField + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + + table_id = f"{dataset_id}.lake_from_gp" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo1", "GEOGRAPHY", "NULLABLE"), + SchemaField("geo2", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", None, "POINT(1 1)"], + ["foo", None, None], + ] + + +def test_load_dataframe_w_shapely(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")]) + ) + + table_id = f"{dataset_id}.lake_from_shapes" + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] + + bigquery_client.load_table_from_dataframe(df, table_id).result() + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["bar", "POINT(1 1)"], + ["foo", None], + ["foo", None], + ] + + +def test_load_dataframe_w_wkb(bigquery_client, dataset_id): + wkt = pytest.importorskip("shapely.wkt") + from shapely import wkb + from google.cloud.bigquery.schema import SchemaField + + df = pandas.DataFrame( + dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))]) + ) + + table_id = f"{dataset_id}.lake_from_wkb" + # We create the table first, to inform the interpretation of the wkb data + bigquery_client.query( + f"create table {table_id} (name string, geo GEOGRAPHY)" + ).result() + bigquery_client.load_table_from_dataframe(df, table_id).result() + + table = bigquery_client.get_table(table_id) + assert table.schema == [ + SchemaField("name", "STRING", "NULLABLE"), + SchemaField("geo", "GEOGRAPHY", "NULLABLE"), + ] + assert sorted(map(list, bigquery_client.list_rows(table_id))) == [ + ["bar", "POINT(1 1)"], + ["foo", None], + ] diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index c537802f4..b5af90c0b 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -23,6 +23,14 @@ import pandas except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import shapely +except (ImportError, AttributeError): # pragma: NO COVER + shapely = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None try: import pyarrow except (ImportError, AttributeError): # pragma: NO COVER @@ -425,38 +433,41 @@ def test_to_arrow_w_tqdm_wo_query_plan(): result_patch_tqdm.assert_called() -@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_to_dataframe(): +def _make_job(schema=(), rows=()): from google.cloud.bigquery.job import QueryJob as target_class begun_resource = _make_job_resource(job_type="query") query_resource = { "jobComplete": True, "jobReference": begun_resource["jobReference"], - "totalRows": "4", + "totalRows": str(len(rows)), "schema": { "fields": [ - {"name": "name", "type": "STRING", "mode": "NULLABLE"}, - {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + dict(name=field[0], type=field[1], mode=field[2]) for field in schema ] }, } - tabledata_resource = { - "rows": [ - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]}, - {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]}, - ] - } + tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]} done_resource = copy.deepcopy(begun_resource) done_resource["status"] = {"state": "DONE"} connection = _make_connection( begun_resource, query_resource, done_resource, tabledata_resource ) client = _make_client(connection=connection) - job = target_class.from_api_repr(begun_resource, client) + return target_class.from_api_repr(begun_resource, client) + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_to_dataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")), + ( + ("Phred Phlyntstone", "32"), + ("Bharney Rhubble", "33"), + ("Wylma Phlyntstone", "29"), + ("Bhettye Rhubble", "27"), + ), + ) df = job.to_dataframe(create_bqstorage_client=False) assert isinstance(df, pandas.DataFrame) @@ -868,3 +879,94 @@ def test_to_dataframe_w_tqdm_max_results(): result_patch_tqdm.assert_called_with( timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3 ) + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(shapely is None, reason="Requires `shapely`") +def test_to_dataframe_geography_as_object(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True) + + assert isinstance(df, pandas.DataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "float", + ] # float because nan + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_to_geodataframe(): + job = _make_job( + (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")), + ( + ("Phred Phlyntstone", "Point(0 0)"), + ("Bharney Rhubble", "Point(0 1)"), + ("Wylma Phlyntstone", None), + ), + ) + df = job.to_geodataframe(create_bqstorage_client=False) + + assert isinstance(df, geopandas.GeoDataFrame) + assert len(df) == 3 # verify the number of rows + assert list(df) == ["name", "geog"] # verify the column names + assert [v.__class__.__name__ for v in df.geog] == [ + "Point", + "Point", + "NoneType", + ] # float because nan + assert isinstance(df.geog, geopandas.GeoSeries) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_geodataframe_delegation(wait_for_query): + """ + QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for RowIterator. + """ + import numpy + + job = _make_job() + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + max_results = 42 + geography_column = "g" + + df = job.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + max_results=max_results, + geography_column=geography_column, + ) + + wait_for_query.assert_called_once_with( + job, progress_bar_type, max_results=max_results + ) + row_iterator = wait_for_query.return_value + row_iterator.to_geodataframe.assert_called_once_with( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index b9cb56572..a9b0ae21f 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -36,6 +36,11 @@ # Mock out pyarrow when missing, because methods from pyarrow.types are # used in test parameterization. pyarrow = mock.Mock() +try: + import geopandas +except ImportError: # pragma: NO COVER + geopandas = None + import pytest from google import api_core @@ -584,6 +589,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test): assert roundtrip[3] is None +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_dtype(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = geopandas.GeoSeries([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkt.loads("point(0 0)")]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == [None, wkb.dumps(series[1])] + + # All na: + series = pandas.Series([None, None]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + assert array.type == pyarrow.string() + assert array.to_pylist() == list(series) + + +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test): + from shapely import wkb, wkt + + bq_field = schema.SchemaField("field_name", "GEOGRAPHY") + + series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))]) + array = module_under_test.bq_to_arrow_array(series, bq_field) + # The result is binary, because we use wkb format + assert array.type == pyarrow.binary() + assert array.to_pylist() == list(series) + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_bq_to_arrow_schema_w_unknown_type(module_under_test): fields = ( @@ -1158,6 +1217,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): assert "struct_field" in str(expected_warnings[0]) +@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") +def test_dataframe_to_bq_schema_geography(module_under_test): + from shapely import wkt + + df = geopandas.GeoDataFrame( + pandas.DataFrame( + dict( + name=["foo", "bar"], + geo1=[None, None], + geo2=[None, wkt.loads("Point(1 1)")], + ) + ), + geometry="geo1", + ) + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( + schema.SchemaField("name", "STRING"), + schema.SchemaField("geo1", "GEOGRAPHY"), + schema.SchemaField("geo2", "GEOGRAPHY"), + ) + + @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_augment_schema_type_detection_succeeds(module_under_test): @@ -1554,3 +1635,22 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test) def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test): dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {}) assert isinstance(dataframe, pandas.DataFrame) + + +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_type_override(module_under_test): + # When loading pandas data, we may need to override the type + # decision based on data contents, because GEOGRAPHY data can be + # stored as either text or binary. + + assert ( + module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type + == pyarrow.string() + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(), + ).type + == pyarrow.binary() + ) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 50d573345..1ce930ee4 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -14,6 +14,7 @@ import datetime import logging +import re import time import types import unittest @@ -39,6 +40,11 @@ except (ImportError, AttributeError): # pragma: NO COVER pandas = None +try: + import geopandas +except (ImportError, AttributeError): # pragma: NO COVER + geopandas = None + try: import pyarrow import pyarrow.types @@ -1842,6 +1848,27 @@ def test_to_dataframe_iterable(self): self.assertEqual(len(df), 0) # Verify the number of rows. self.assertEqual(len(df.columns), 0) + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_if_geopandas_is_none(self): + row_iterator = self._make_one() + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one() + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 0) # verify the number of rows + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + class TestRowIterator(unittest.TestCase): def _class_under_test(self): @@ -1879,6 +1906,16 @@ def _make_one( client, api_request, path, schema, table=table, **kwargs ) + def _make_one_from_data(self, schema=(), rows=()): + from google.cloud.bigquery.schema import SchemaField + + schema = [SchemaField(*a) for a in schema] + rows = [{"f": [{"v": v} for v in row]} for row in rows] + + path = "/foo" + api_request = mock.Mock(return_value={"rows": rows}) + return self._make_one(_mock_client(), api_request, path, schema) + def test_constructor(self): from google.cloud.bigquery.table import _item_to_row from google.cloud.bigquery.table import _rows_page_start @@ -3170,6 +3207,18 @@ def test_to_dataframe_error_if_pandas_is_none(self): with self.assertRaises(ValueError): row_iterator.to_dataframe() + @unittest.skipIf(pandas is None, "Requires `pandas`") + @mock.patch("google.cloud.bigquery.table.shapely", new=None) + def test_to_dataframe_error_if_shapely_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The shapely library is not installed, please install " + "shapely to use the geography_as_object option." + ), + ): + self._make_one_from_data().to_dataframe(geography_as_object=True) + @unittest.skipIf(pandas is None, "Requires `pandas`") def test_to_dataframe_max_results_w_bqstorage_warning(self): from google.cloud.bigquery.schema import SchemaField @@ -3927,6 +3976,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): # Don't close the client if it was passed in. bqstorage_client._transport.grpc_channel.close.assert_not_called() + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_dataframe_geography_as_object(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_dataframe( + create_bqstorage_client=False, geography_as_object=True, + ) + self.assertIsInstance(df, pandas.DataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "object") + self.assertIsInstance(df.geog, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"] + ) + + @mock.patch("google.cloud.bigquery.table.geopandas", new=None) + def test_to_geodataframe_error_if_geopandas_is_none(self): + with self.assertRaisesRegex( + ValueError, + re.escape( + "The geopandas library is not installed, please install " + "geopandas to use the to_geodataframe() function." + ), + ): + self._make_one_from_data().to_geodataframe() + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)"), + ("bar", None), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"), + ), + ) + df = row_iterator.to_geodataframe(create_bqstorage_client=False) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(df.crs.srs, "EPSG:4326") + self.assertEqual(df.crs.name, "WGS 84") + self.assertEqual(df.geog.crs.srs, "EPSG:4326") + self.assertEqual(df.geog.crs.name, "WGS 84") + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_ambiguous_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "There is more than one GEOGRAPHY column in the result. " + "The geography_column argument must be used to specify which " + "one to use to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_bad_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), () + ) + with self.assertRaisesRegex( + ValueError, + re.escape( + "The given geography column, xxx, doesn't name" + " a GEOGRAPHY column in the result." + ), + ): + row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="xxx" + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_no_geog(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "STRING")), () + ) + with self.assertRaisesRegex( + TypeError, + re.escape( + "There must be at least one GEOGRAPHY column" + " to create a GeoDataFrame" + ), + ): + row_iterator.to_geodataframe(create_bqstorage_client=False) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + def test_to_geodataframe_w_geography_column(self): + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), + ( + ("foo", "Point(0 0)", "Point(1 1)"), + ("bar", None, "Point(2 2)"), + ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"), + ), + ) + df = row_iterator.to_geodataframe( + create_bqstorage_client=False, geography_column="geog" + ) + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 3) # verify the number of rows + self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.geog.dtype.name, "geometry") + self.assertEqual(df.geog2.dtype.name, "object") + self.assertIsInstance(df.geog, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"]) + self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"]) + self.assertEqual( + [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"] + ) + + # Geog2 isn't a GeoSeries, but it contains geomentries: + self.assertIsInstance(df.geog2, pandas.Series) + self.assertEqual( + [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"] + ) + # and can easily be converted to a GeoSeries + self.assertEqual( + list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"] + ) + + @unittest.skipIf(geopandas is None, "Requires `geopandas`") + @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe") + def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): + """ + RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe. + + This test just demonstrates that. We don't need to test all the + variations, which are tested for to_dataframe. + """ + import numpy + from shapely import wkt + + row_iterator = self._make_one_from_data( + (("name", "STRING"), ("g", "GEOGRAPHY")) + ) + bqstorage_client = object() + dtypes = dict(xxx=numpy.dtype("int64")) + progress_bar_type = "normal" + create_bqstorage_client = False + date_as_object = False + geography_column = "g" + + to_dataframe.return_value = pandas.DataFrame( + dict(name=["foo"], g=[wkt.loads("point(0 0)")],) + ) + + df = row_iterator.to_geodataframe( + bqstorage_client=bqstorage_client, + dtypes=dtypes, + progress_bar_type=progress_bar_type, + create_bqstorage_client=create_bqstorage_client, + date_as_object=date_as_object, + geography_column=geography_column, + ) + + to_dataframe.assert_called_once_with( + bqstorage_client, + dtypes, + progress_bar_type, + create_bqstorage_client, + date_as_object, + geography_as_object=True, + ) + + self.assertIsInstance(df, geopandas.GeoDataFrame) + self.assertEqual(len(df), 1) # verify the number of rows + self.assertEqual(list(df), ["name", "g"]) # verify the column names + self.assertEqual(df.name.dtype.name, "object") + self.assertEqual(df.g.dtype.name, "geometry") + self.assertIsInstance(df.g, geopandas.GeoSeries) + self.assertEqual(list(map(str, df.area)), ["0.0"]) + self.assertEqual(list(map(str, df.g.area)), ["0.0"]) + self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"]) + class TestPartitionRange(unittest.TestCase): def _get_target_class(self): From aa4876e226aa54a43d3e20d401675403a41d71f8 Mon Sep 17 00:00:00 2001 From: Jim Fulton Date: Tue, 24 Aug 2021 09:33:04 -0600 Subject: [PATCH 220/230] test: Add test of datetime and time pandas load (#895) --- tests/system/test_pandas.py | 64 ++++++++++++++++++++++++++++++------- 1 file changed, 53 insertions(+), 11 deletions(-) diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 836f93210..93ce23481 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -279,8 +279,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id): def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id): # Schema with all scalar types. - # TODO: Uploading DATETIME columns currently fails, thus that field type - # is temporarily removed from the test. # See: # https://github.com/googleapis/python-bigquery/issues/61 # https://issuetracker.google.com/issues/151765076 @@ -288,7 +286,7 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id bigquery.SchemaField("bool_col", "BOOLEAN"), bigquery.SchemaField("bytes_col", "BYTES"), bigquery.SchemaField("date_col", "DATE"), - # bigquery.SchemaField("dt_col", "DATETIME"), + bigquery.SchemaField("dt_col", "DATETIME"), bigquery.SchemaField("float_col", "FLOAT"), bigquery.SchemaField("geo_col", "GEOGRAPHY"), bigquery.SchemaField("int_col", "INTEGER"), @@ -313,14 +311,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id ("bool_col", [True, None, False]), ("bytes_col", [b"abc", None, b"def"]), ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]), - # ( - # "dt_col", - # [ - # datetime.datetime(1, 1, 1, 0, 0, 0), - # None, - # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), - # ], - # ), + ( + "dt_col", + [ + datetime.datetime(1, 1, 1, 0, 0, 0), + None, + datetime.datetime(9999, 12, 31, 23, 59, 59, 999999), + ], + ), ("float_col", [float("-inf"), float("nan"), float("inf")]), ( "geo_col", @@ -800,6 +798,50 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client): assert len(dataframe.index) == 100 +def test_upload_time_and_datetime_56(bigquery_client, dataset_id): + df = pandas.DataFrame( + dict( + dt=[ + datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime( + 2020, + 1, + 8, + 8, + 0, + 0, + tzinfo=datetime.timezone(datetime.timedelta(hours=-7)), + ), + ], + t=[datetime.time(0, 0, 10, 100001), None], + ) + ) + table = f"{dataset_id}.test_upload_time_and_datetime" + bigquery_client.load_table_from_dataframe(df, table).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [ + datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc), + datetime.time(0, 0, 10, 100001), + ], + [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None], + ] + + from google.cloud.bigquery import job, schema + + table = f"{dataset_id}.test_upload_time_and_datetime_dt" + config = job.LoadJobConfig( + schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")] + ) + + bigquery_client.load_table_from_dataframe(df, table, job_config=config).result() + data = list(map(list, bigquery_client.list_rows(table))) + assert data == [ + [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)], + [datetime.datetime(2020, 1, 8, 15, 0), None], + ] + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( From f319d2596e7146ef355053a2a178d2e6a921e651 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 24 Aug 2021 15:36:00 -0600 Subject: [PATCH 221/230] chore: release 2.25.0 (#898) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Jim Fulton --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a3e74fd0..7a5727ee7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) + + +### Features + +* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) + ### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 96f84438a..f882cac3a 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.24.1" +__version__ = "2.25.0" From fbbf72cd8d9629594b32ae981f7b6f4815fc3647 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 15:44:47 +0200 Subject: [PATCH 222/230] chore(deps): update dependency numpy to v1.21.2 (#899) * chore(deps): update dependency numpy to v1.21.2 * Update samples/geography/requirements.txt Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a76b4033..82a45e3e8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,8 @@ importlib-metadata==4.6.4 libcst==0.3.20 munch==2.5.0 mypy-extensions==0.4.3 -numpy==1.19.5 +numpy==1.19.5; python_version < "3.7" +numpy==1.21.2; python_version > "3.6" packaging==21.0 pandas==1.1.5 proto-plus==1.19.0 From 72a52f0253125a45e3162c5a32c0dbfe9e127466 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 16:28:49 +0200 Subject: [PATCH 223/230] chore(deps): update dependency google-cloud-core to v2 (#904) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 82a45e3e8..853306d71 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ google-api-core==1.31.2 google-auth==1.35.0 google-cloud-bigquery==2.24.1 google-cloud-bigquery-storage==2.6.3 -google-cloud-core==1.7.2 +google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==1.3.3 googleapis-common-protos==1.53.0 From 1cb3e55253e824e3a1da5201f6ec09065fb6b627 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 25 Aug 2021 16:52:09 +0200 Subject: [PATCH 224/230] fix: use REST API in cell magic when requested (#892) Fixes #876. The `--use_rest_api` option did not work as expected and this commit fixes it. **PR checklist:** - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) --- google/cloud/bigquery/magics/magics.py | 12 +++++++++--- tests/unit/test_magics.py | 21 ++++++++++++++++----- 2 files changed, 25 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index 2b8c2928e..d368bbeaa 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -671,7 +671,9 @@ def _cell_magic(line, query): _handle_error(ex, args.destination_var) return - result = rows.to_dataframe(bqstorage_client=bqstorage_client) + result = rows.to_dataframe( + bqstorage_client=bqstorage_client, create_bqstorage_client=False, + ) if args.destination_var: IPython.get_ipython().push({args.destination_var: result}) return @@ -728,11 +730,15 @@ def _cell_magic(line, query): if max_results: result = query_job.result(max_results=max_results).to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) else: result = query_job.to_dataframe( - bqstorage_client=bqstorage_client, progress_bar_type=progress_bar + bqstorage_client=bqstorage_client, + create_bqstorage_client=False, + progress_bar_type=progress_bar, ) if args.destination_var: diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index d030482cc..88c92a070 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -660,7 +660,9 @@ def warning_match(warning): assert client_info.user_agent == "ipython-" + IPython.__version__ query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm" + bqstorage_client=bqstorage_instance_mock, + create_bqstorage_client=mock.ANY, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -703,7 +705,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type="tqdm" + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type="tqdm", ) assert isinstance(return_value, pandas.DataFrame) @@ -757,7 +761,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(): client_query_mock.return_value = query_job_mock ip.run_cell_magic("bigquery", "--max_results=5", sql) - query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.assert_called_with(max_results=5) + query_job_mock.result.return_value.to_dataframe.assert_called_once_with( + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=mock.ANY, + ) @pytest.mark.usefixtures("ipython_interactive") @@ -929,7 +938,7 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client(): ip.run_cell_magic("bigquery", "--max_results=5", table_id) row_iterator_mock.to_dataframe.assert_called_once_with( - bqstorage_client=bqstorage_instance_mock + bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY, ) @@ -1246,7 +1255,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): bqstorage_mock.assert_not_called() query_job_mock.to_dataframe.assert_called_once_with( - bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type + bqstorage_client=None, + create_bqstorage_client=False, + progress_bar_type=magics.context.progress_bar_type, ) assert isinstance(return_value, pandas.DataFrame) From b508809c0f887575274309a463e763c56ddd017d Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 10:12:25 -0500 Subject: [PATCH 225/230] fix: populate default `timeout` and retry after client-side timeout (#896) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This addresses internal issue 195337762 where sometimes query job creation can take longer than expected and retrying the API call can be faster than waiting for the first query job request to fail. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #889 Towards https://github.com/googleapis/python-bigquery/issues/779 🦕 --- google/cloud/bigquery/client.py | 123 +++++++-------- google/cloud/bigquery/retry.py | 8 + noxfile.py | 4 - tests/unit/test_client.py | 242 +++++++++++++++++------------- tests/unit/test_create_dataset.py | 19 +-- tests/unit/test_delete_dataset.py | 7 +- tests/unit/test_list_datasets.py | 11 +- tests/unit/test_list_jobs.py | 19 +-- tests/unit/test_list_models.py | 12 +- tests/unit/test_list_projects.py | 11 +- tests/unit/test_list_routines.py | 12 +- tests/unit/test_list_tables.py | 16 +- tests/unit/test_magics.py | 5 +- tests/unit/test_retry.py | 12 ++ 14 files changed, 282 insertions(+), 219 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cbac82548..023346ffa 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -76,17 +76,24 @@ from google.cloud.bigquery.opentelemetry_tracing import create_span from google.cloud.bigquery import job from google.cloud.bigquery.job import ( + CopyJob, + CopyJobConfig, + ExtractJob, + ExtractJobConfig, + LoadJob, LoadJobConfig, QueryJob, QueryJobConfig, - CopyJobConfig, - ExtractJobConfig, ) from google.cloud.bigquery.model import Model from google.cloud.bigquery.model import ModelReference from google.cloud.bigquery.model import _model_arg_to_model_ref from google.cloud.bigquery.query import _QueryResults -from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY +from google.cloud.bigquery.retry import ( + DEFAULT_JOB_RETRY, + DEFAULT_RETRY, + DEFAULT_TIMEOUT, +) from google.cloud.bigquery.routine import Routine from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.schema import SchemaField @@ -245,7 +252,7 @@ def get_service_account_email( self, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> str: """Get the email address of the project's BigQuery service account @@ -292,7 +299,7 @@ def list_projects( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List projects for the project associated with this client. @@ -358,7 +365,7 @@ def list_datasets( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List datasets for the project associated with this client. @@ -549,7 +556,7 @@ def create_dataset( dataset: Union[str, Dataset, DatasetReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """API call: create the dataset via a POST request. @@ -624,7 +631,7 @@ def create_routine( routine: Routine, exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Create a routine via a POST request. @@ -679,7 +686,7 @@ def create_table( table: Union[str, Table, TableReference], exists_ok: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """API call: create a table via a PUT request @@ -751,7 +758,7 @@ def get_dataset( self, dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -795,7 +802,7 @@ def get_iam_policy( table: Union[Table, TableReference], requested_policy_version: int = 1, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -825,7 +832,7 @@ def set_iam_policy( policy: Policy, updateMask: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Policy: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -858,7 +865,7 @@ def test_iam_permissions( table: Union[Table, TableReference], permissions: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dict[str, Any]: if not isinstance(table, (Table, TableReference)): raise TypeError("table must be a Table or TableReference") @@ -883,7 +890,7 @@ def get_model( self, model_ref: Union[ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Fetch the model referenced by ``model_ref``. @@ -926,7 +933,7 @@ def get_routine( self, routine_ref: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Get the routine referenced by ``routine_ref``. @@ -970,7 +977,7 @@ def get_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Fetch the table referenced by ``table``. @@ -1012,7 +1019,7 @@ def update_dataset( dataset: Dataset, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Dataset: """Change some fields of a dataset. @@ -1082,7 +1089,7 @@ def update_model( model: Model, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Model: """[Beta] Change some fields of a model. @@ -1146,7 +1153,7 @@ def update_routine( routine: Routine, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Routine: """[Beta] Change some fields of a routine. @@ -1220,7 +1227,7 @@ def update_table( table: Table, fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Table: """Change some fields of a table. @@ -1286,7 +1293,7 @@ def list_models( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List models in the dataset. @@ -1363,7 +1370,7 @@ def list_routines( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """[Beta] List routines in the dataset. @@ -1440,7 +1447,7 @@ def list_tables( max_results: int = None, page_token: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, page_size: int = None, ) -> page_iterator.Iterator: """List tables in the dataset. @@ -1515,7 +1522,7 @@ def delete_dataset( dataset: Union[Dataset, DatasetReference, str], delete_contents: bool = False, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a dataset. @@ -1574,7 +1581,7 @@ def delete_model( self, model: Union[Model, ModelReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a model @@ -1624,12 +1631,12 @@ def delete_model( def delete_job_metadata( self, - job_id, - project=None, - location=None, - retry=DEFAULT_RETRY, - timeout=None, - not_found_ok=False, + job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], + project: Optional[str] = None, + location: Optional[str] = None, + retry: retries.Retry = DEFAULT_RETRY, + timeout: float = DEFAULT_TIMEOUT, + not_found_ok: bool = False, ): """[Beta] Delete job metadata from job history. @@ -1637,26 +1644,20 @@ def delete_job_metadata( :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. Args: - job_id (Union[ \ - str, \ - google.cloud.bigquery.job.LoadJob, \ - google.cloud.bigquery.job.CopyJob, \ - google.cloud.bigquery.job.ExtractJob, \ - google.cloud.bigquery.job.QueryJob \ - ]): Job identifier. + job_id: Job or job identifier. Keyword Arguments: - project (Optional[str]): + project: ID of the project which owns the job (defaults to the client's project). - location (Optional[str]): + location: Location where the job was run. Ignored if ``job_id`` is a job object. - retry (Optional[google.api_core.retry.Retry]): + retry: How to retry the RPC. - timeout (Optional[float]): + timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. - not_found_ok (Optional[bool]): + not_found_ok: Defaults to ``False``. If ``True``, ignore "not found" errors when deleting the job. """ @@ -1697,7 +1698,7 @@ def delete_routine( self, routine: Union[Routine, RoutineReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """[Beta] Delete a routine. @@ -1751,7 +1752,7 @@ def delete_table( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, not_found_ok: bool = False, ) -> None: """Delete a table @@ -1804,7 +1805,7 @@ def _get_query_results( project: str = None, timeout_ms: int = None, location: str = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> _QueryResults: """Get the query results object for a query job. @@ -1893,7 +1894,7 @@ def create_job( self, job_config: dict, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Create a new job. Args: @@ -1990,7 +1991,7 @@ def get_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Fetch a job for the project associated with this client. @@ -2064,7 +2065,7 @@ def cancel_job( project: str = None, location: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: """Attempt to cancel a job from a job ID. @@ -2141,7 +2142,7 @@ def list_jobs( all_users: bool = None, state_filter: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, min_creation_time: datetime.datetime = None, max_creation_time: datetime.datetime = None, page_size: int = None, @@ -2256,7 +2257,7 @@ def load_table_from_uri( project: str = None, job_config: LoadJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Starts a job for loading data into a table from CloudStorage. @@ -2340,7 +2341,7 @@ def load_table_from_file( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of this table from a file-like object. @@ -2443,7 +2444,7 @@ def load_table_from_dataframe( project: str = None, job_config: LoadJobConfig = None, parquet_compression: str = "snappy", - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a pandas DataFrame. @@ -2678,7 +2679,7 @@ def load_table_from_json( location: str = None, project: str = None, job_config: LoadJobConfig = None, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.LoadJob: """Upload the contents of a table from a JSON string or dict. @@ -2961,7 +2962,7 @@ def copy_table( project: str = None, job_config: CopyJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> job.CopyJob: """Copy one or more tables to another table. @@ -3064,7 +3065,7 @@ def extract_table( project: str = None, job_config: ExtractJobConfig = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, source_type: str = "Table", ) -> job.ExtractJob: """Start a job to extract a table into Cloud Storage files. @@ -3162,7 +3163,7 @@ def query( location: str = None, project: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, job_retry: retries.Retry = DEFAULT_JOB_RETRY, ) -> job.QueryJob: """Run a SQL query. @@ -3444,7 +3445,7 @@ def insert_rows_json( ignore_unknown_values: bool = None, template_suffix: str = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[dict]: """Insert rows into a table without applying local type conversions. @@ -3579,7 +3580,7 @@ def list_partitions( self, table: Union[Table, TableReference, str], retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> Sequence[str]: """List the partitions in a table. @@ -3629,7 +3630,7 @@ def list_rows( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of the table. @@ -3741,7 +3742,7 @@ def _list_rows_from_query_results( start_index: int = None, page_size: int = None, retry: retries.Retry = DEFAULT_RETRY, - timeout: float = None, + timeout: float = DEFAULT_TIMEOUT, ) -> RowIterator: """List the rows of a completed query. See diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index e9286055c..830582322 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -29,6 +29,7 @@ exceptions.BadGateway, requests.exceptions.ChunkedEncodingError, requests.exceptions.ConnectionError, + requests.exceptions.Timeout, auth_exceptions.TransportError, ) @@ -59,6 +60,13 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ +DEFAULT_TIMEOUT = 5.0 * 60.0 +"""The default API timeout. + +This is the time to wait per request. To adjust the total wait time, set a +deadline on the retry object. +""" + job_retry_reasons = "rateLimitExceeded", "backendError" diff --git a/noxfile.py b/noxfile.py index 0dfe7bf93..9077924e9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -160,10 +160,6 @@ def snippets(session): if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false": session.skip("RUN_SNIPPETS_TESTS is set to false, skipping") - # Sanity check: Only run snippets tests if the environment variable is set. - if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""): - session.skip("Credentials must be set via environment variable.") - constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ca0dca975..e9204f1de 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -56,6 +56,7 @@ import google.cloud._helpers from google.cloud import bigquery_v2 from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT try: from google.cloud import bigquery_storage @@ -367,7 +368,7 @@ def test__get_query_results_miss_w_client_location(self): method="GET", path="/projects/PROJECT/queries/nothere", query_params={"maxResults": 0, "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test__get_query_results_hit(self): @@ -428,7 +429,9 @@ def test_get_service_account_email_w_alternate_project(self): service_account_email = client.get_service_account_email(project=project) final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None) + conn.api_request.assert_called_once_with( + method="GET", path=path, timeout=DEFAULT_TIMEOUT + ) self.assertEqual(service_account_email, email) def test_get_service_account_email_w_custom_retry(self): @@ -771,7 +774,7 @@ def test_create_routine_w_conflict(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`") @@ -807,7 +810,7 @@ def test_span_status_is_set(self): } } conn.api_request.assert_called_once_with( - method="POST", path=path, data=resource, timeout=None, + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, ) def test_create_routine_w_conflict_exists_ok(self): @@ -843,11 +846,13 @@ def test_create_routine_w_conflict_exists_ok(self): self.assertEqual(actual_routine.routine_id, "minimal_routine") conn.api_request.assert_has_calls( [ - mock.call(method="POST", path=path, data=resource, timeout=None,), + mock.call( + method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT, + ), mock.call( method="GET", path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine", - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -923,7 +928,7 @@ def test_create_table_w_custom_property(self): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got._properties["newAlphaProperty"], "unreleased property") self.assertEqual(got.table_id, self.TABLE_ID) @@ -964,7 +969,7 @@ def test_create_table_w_encryption_configuration(self): "labels": {}, "encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1000,7 +1005,7 @@ def test_create_table_w_day_partition_and_expire(self): "timePartitioning": {"type": "DAY", "expirationMs": "100"}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(table.time_partitioning.type_, "DAY") self.assertEqual(table.time_partitioning.expiration_ms, 100) @@ -1081,7 +1086,7 @@ def test_create_table_w_schema_and_query(self): "view": {"query": query, "useLegacySql": False}, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1136,7 +1141,7 @@ def test_create_table_w_external(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) self.assertEqual(got.project, self.PROJECT) @@ -1175,7 +1180,7 @@ def test_create_table_w_reference(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1209,7 +1214,7 @@ def test_create_table_w_fully_qualified_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1241,7 +1246,7 @@ def test_create_table_w_string(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(got.table_id, self.TABLE_ID) @@ -1276,7 +1281,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_table_alreadyexists_w_exists_ok_true(self): @@ -1319,9 +1324,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self): }, "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) @@ -1394,7 +1399,7 @@ def test_get_model_w_string(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=None + method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT ) self.assertEqual(got.model_id, self.MODEL_ID) @@ -1503,7 +1508,7 @@ def test_get_table_sets_user_agent(self): "User-Agent": expected_user_agent, }, data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIn("my-application/1.2.3", expected_user_agent) @@ -1846,7 +1851,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -2136,7 +2141,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2171,7 +2176,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2269,7 +2274,7 @@ def test_update_table_w_query(self): "schema": schema_resource, }, headers=None, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_update_table_w_schema_None(self): @@ -2398,7 +2403,7 @@ def test_delete_job_metadata_not_found(self): method="DELETE", path="/projects/client-proj/jobs/my-job/delete", query_params={"location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_id(self): @@ -2412,7 +2417,7 @@ def test_delete_job_metadata_with_id(self): method="DELETE", path="/projects/param-proj/jobs/my-job/delete", query_params={"location": "param-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_job_metadata_with_resource(self): @@ -2437,7 +2442,7 @@ def test_delete_job_metadata_with_resource(self): method="DELETE", path="/projects/job-based-proj/jobs/query_job/delete", query_params={"location": "us-east1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_delete_model(self): @@ -2492,7 +2497,9 @@ def test_delete_model_w_not_found_ok_false(self): with self.assertRaises(google.api_core.exceptions.NotFound): client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID)) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_model_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/models/{}".format( @@ -2513,7 +2520,9 @@ def test_delete_model_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_routine(self): from google.cloud.bigquery.routine import Routine @@ -2567,7 +2576,7 @@ def test_delete_routine_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_routine_w_not_found_ok_true(self): @@ -2589,7 +2598,7 @@ def test_delete_routine_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) conn.api_request.assert_called_with( - method="DELETE", path=path, timeout=None, + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT, ) def test_delete_table(self): @@ -2653,7 +2662,9 @@ def test_delete_table_w_not_found_ok_false(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def test_delete_table_w_not_found_ok_true(self): path = "/projects/{}/datasets/{}/tables/{}".format( @@ -2675,7 +2686,9 @@ def test_delete_table_w_not_found_ok_true(self): final_attributes.assert_called_once_with({"path": path}, client, None) - conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None) + conn.api_request.assert_called_with( + method="DELETE", path=path, timeout=DEFAULT_TIMEOUT + ) def _create_job_helper(self, job_config): from google.cloud.bigquery import _helpers @@ -2697,7 +2710,7 @@ def _create_job_helper(self, job_config): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_create_job_load_config(self): @@ -2846,7 +2859,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self): method="POST", path="/projects/PROJECT/jobs", data=data_without_destination, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ) @@ -2886,7 +2899,7 @@ def test_get_job_miss_w_explict_project(self): method="GET", path="/projects/OTHER_PROJECT/jobs/NONESUCH", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_miss_w_client_location(self): @@ -2904,7 +2917,7 @@ def test_get_job_miss_w_client_location(self): method="GET", path="/projects/client-proj/jobs/NONESUCH", query_params={"projection": "full", "location": "client-loc"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_get_job_hit_w_timeout(self): @@ -2973,7 +2986,7 @@ def test_cancel_job_miss_w_explict_project(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_miss_w_client_location(self): @@ -2992,7 +3005,7 @@ def test_cancel_job_miss_w_client_location(self): method="POST", path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel", query_params={"projection": "full", "location": self.LOCATION}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_hit(self): @@ -3028,7 +3041,7 @@ def test_cancel_job_hit(self): method="POST", path="/projects/job-based-proj/jobs/query_job/cancel", query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_cancel_job_w_timeout(self): @@ -3154,7 +3167,7 @@ def test_load_table_from_uri_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_client_location(self): @@ -3198,7 +3211,7 @@ def test_load_table_from_uri_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_load_table_from_uri_w_invalid_job_config(self): @@ -3486,7 +3499,7 @@ def test_copy_table_w_multiple_sources(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=expected_resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job, CopyJob) self.assertIs(job._client, client) @@ -3548,7 +3561,7 @@ def test_copy_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_client_location(self): @@ -3598,7 +3611,7 @@ def test_copy_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_copy_table_w_source_strings(self): @@ -3691,7 +3704,7 @@ def test_copy_table_w_valid_job_config(self): method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertIsInstance(job._configuration, CopyJobConfig) @@ -3797,7 +3810,7 @@ def test_extract_table_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_w_client_location(self): @@ -3841,7 +3854,7 @@ def test_extract_table_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_extract_table_generated_job_id(self): @@ -3884,7 +3897,7 @@ def test_extract_table_generated_job_id(self): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") self.assertIsInstance(req["data"]["jobReference"]["jobId"], str) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -3929,7 +3942,7 @@ def test_extract_table_w_destination_uris(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) # Check the job resource. self.assertIsInstance(job, ExtractJob) @@ -4099,7 +4112,7 @@ def test_query_defaults(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4152,7 +4165,7 @@ def test_query_w_explicit_project(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_explicit_job_config(self): @@ -4208,7 +4221,10 @@ def test_query_w_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4252,7 +4268,10 @@ def test_query_preserving_explicit_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original config object should not have been modified @@ -4304,7 +4323,10 @@ def test_query_preserving_explicit_default_job_config(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) # the original default config object should not have been modified @@ -4389,7 +4411,10 @@ def test_query_w_explicit_job_config_override(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_client_default_config_no_incoming(self): @@ -4430,7 +4455,10 @@ def test_query_w_client_default_config_no_incoming(self): # Check that query actually starts the job. conn.api_request.assert_called_once_with( - method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None + method="POST", + path="/projects/PROJECT/jobs", + data=resource, + timeout=DEFAULT_TIMEOUT, ) def test_query_w_invalid_default_job_config(self): @@ -4475,7 +4503,7 @@ def test_query_w_client_location(self): method="POST", path="/projects/other-project/jobs", data=resource, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_query_detect_location(self): @@ -4546,7 +4574,7 @@ def test_query_w_udf_resources(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertIsInstance(sent["jobReference"]["jobId"], str) sent_config = sent["configuration"]["query"] @@ -4602,7 +4630,7 @@ def test_query_w_query_parameters(self): _, req = conn.api_request.call_args self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/projects/PROJECT/jobs") - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) sent = req["data"] self.assertEqual(sent["jobReference"]["jobId"], JOB) sent_config = sent["configuration"]["query"] @@ -4794,7 +4822,7 @@ def _row_data(row): self.assertEqual(req["method"], "POST") self.assertEqual(req["path"], "/%s" % PATH) self.assertEqual(req["data"], SENT) - self.assertIsNone(req["timeout"]) + self.assertEqual(req["timeout"], DEFAULT_TIMEOUT) def test_insert_rows_w_list_of_dictionaries(self): import datetime @@ -4862,7 +4890,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_list_of_Rows(self): @@ -4907,7 +4935,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_skip_invalid_and_ignore_unknown(self): @@ -4984,7 +5012,7 @@ def _row_data(row): errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0] ) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_repeated_fields(self): @@ -5085,7 +5113,7 @@ def test_insert_rows_w_repeated_fields(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None, + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_record_schema(self): @@ -5151,7 +5179,7 @@ def test_insert_rows_w_record_schema(self): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/%s" % PATH, data=SENT, timeout=None + method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT ) def test_insert_rows_w_explicit_none_insert_ids(self): @@ -5185,7 +5213,7 @@ def _row_data(row): self.assertEqual(len(errors), 0) conn.api_request.assert_called_once_with( - method="POST", path="/{}".format(PATH), data=SENT, timeout=None, + method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_errors(self): @@ -5269,7 +5297,7 @@ def test_insert_rows_w_numeric(self): project, ds_id, table_id ), data=sent, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf(pandas is None, "Requires `pandas`") @@ -5461,7 +5489,10 @@ def test_insert_rows_from_dataframe_many_columns(self): ] } expected_call = mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) actual_calls = conn.api_request.call_args_list @@ -5514,7 +5545,10 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self): actual_calls = conn.api_request.call_args_list assert len(actual_calls) == 1 assert actual_calls[0] == mock.call( - method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None + method="POST", + path=API_PATH, + data=EXPECTED_SENT_DATA, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_default_behavior(self): @@ -5594,7 +5628,7 @@ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): @@ -5624,7 +5658,7 @@ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_iterator_row_ids(self): @@ -5651,7 +5685,7 @@ def test_insert_rows_json_with_iterator_row_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_with_non_iterable_row_ids(self): @@ -5704,7 +5738,7 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_json_w_none_insert_ids_sequence(self): @@ -5743,7 +5777,7 @@ def test_insert_rows_json_w_none_insert_ids_sequence(self): method="POST", path="/projects/proj/datasets/dset/tables/tbl/insertAll", data=expected_row_data, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_insert_rows_w_wrong_arg(self): @@ -5938,7 +5972,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), mock.call( method="GET", @@ -5948,7 +5982,7 @@ def test_list_rows_w_start_index_w_page_size(self): "maxResults": 2, "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), ] ) @@ -6099,7 +6133,7 @@ def test_list_rows_repeated_fields(self): "selectedFields": "color,struct", "formatOptions.useInt64Timestamp": True, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_w_record_schema(self): @@ -6169,7 +6203,7 @@ def test_list_rows_w_record_schema(self): method="GET", path="/%s" % PATH, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) def test_list_rows_with_missing_schema(self): @@ -6224,7 +6258,7 @@ def test_list_rows_with_missing_schema(self): row_iter = client.list_rows(table) conn.api_request.assert_called_once_with( - method="GET", path=table_path, timeout=None + method="GET", path=table_path, timeout=DEFAULT_TIMEOUT ) conn.api_request.reset_mock() self.assertEqual(row_iter.total_rows, 2, msg=repr(table)) @@ -6234,7 +6268,7 @@ def test_list_rows_with_missing_schema(self): method="GET", path=tabledata_path, query_params={"formatOptions.useInt64Timestamp": True}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) self.assertEqual(row_iter.total_rows, 3, msg=repr(table)) self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table)) @@ -6407,7 +6441,7 @@ def test_load_table_from_file_resumable(self): file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6440,7 +6474,7 @@ def test_load_table_from_file_w_explicit_project(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6474,7 +6508,7 @@ def test_load_table_from_file_w_client_location(self): file_obj, expected_resource, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project="other-project", ) @@ -6536,7 +6570,7 @@ def test_load_table_from_file_resumable_metadata(self): file_obj, expected_config, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6565,7 +6599,7 @@ def test_load_table_from_file_multipart(self): self.EXPECTED_CONFIGURATION, file_obj_size, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.PROJECT, ) @@ -6590,7 +6624,7 @@ def test_load_table_from_file_with_retries(self): file_obj, self.EXPECTED_CONFIGURATION, num_retries, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6627,7 +6661,7 @@ def test_load_table_from_file_with_readable_gzip(self): gzip_file, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES, - None, + DEFAULT_TIMEOUT, project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"], ) @@ -6750,7 +6784,7 @@ def test_load_table_from_dataframe(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6808,7 +6842,7 @@ def test_load_table_from_dataframe_w_client_location(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -6862,7 +6896,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -6918,7 +6952,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7012,7 +7046,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7073,7 +7107,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7120,7 +7154,7 @@ def test_load_table_from_dataframe_unknown_table(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @unittest.skipIf( @@ -7162,7 +7196,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7210,7 +7244,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7272,7 +7306,7 @@ def test_load_table_from_dataframe_struct_fields(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7347,7 +7381,7 @@ def test_load_table_from_dataframe_w_partial_schema(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7442,7 +7476,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) assert warned # there should be at least one warning @@ -7592,7 +7626,7 @@ def test_load_table_from_dataframe_w_nulls(self): location=self.LOCATION, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7658,7 +7692,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self): location=None, project=None, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_file = load_table_from_file.mock_calls[0][1][1] @@ -7696,7 +7730,7 @@ def test_load_table_from_json_basic_use(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7749,7 +7783,7 @@ def test_load_table_from_json_non_default_args(self): location="EU", project="project-x", job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_config = load_table_from_file.mock_calls[0][2]["job_config"] @@ -7809,7 +7843,7 @@ def test_load_table_from_json_unicode_emoji_data_case(self): location=client.location, project=client.project, job_config=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) sent_data_file = load_table_from_file.mock_calls[0][1][1] diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index d07aaed4f..67b21225d 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -15,6 +15,7 @@ from google.cloud.bigquery.dataset import Dataset, DatasetReference from .helpers import make_connection, dataset_polymorphic, make_client import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import mock import pytest @@ -111,7 +112,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "labels": LABELS, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -143,7 +144,7 @@ def test_create_dataset_w_custom_property(client, PROJECT, DS_ID): "newAlphaProperty": "unreleased property", "labels": {}, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -176,7 +177,7 @@ def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LO "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -211,7 +212,7 @@ def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOC "labels": {}, "location": OTHER_LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -241,7 +242,7 @@ def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -271,7 +272,7 @@ def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -306,7 +307,7 @@ def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION): "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -356,8 +357,8 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) "labels": {}, "location": LOCATION, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=None), + mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), ] ) diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py index 3a65e031c..b48beb147 100644 --- a/tests/unit/test_delete_dataset.py +++ b/tests/unit/test_delete_dataset.py @@ -14,6 +14,7 @@ from .helpers import make_connection, make_client, dataset_polymorphic import google.api_core.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT import pytest @@ -40,7 +41,7 @@ def test_delete_dataset_delete_contents( method="DELETE", path="/%s" % PATH, query_params={"deleteContents": "true"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -61,7 +62,7 @@ def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID): client.delete_dataset(DS_ID) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -74,5 +75,5 @@ def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID): ) client.delete_dataset(DS_ID, not_found_ok=True) conn.api_request.assert_called_with( - method="DELETE", path=path, query_params={}, timeout=None + method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py index 7793a7ba6..6f0b55c5e 100644 --- a/tests/unit/test_list_datasets.py +++ b/tests/unit/test_list_datasets.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -65,7 +66,7 @@ def test_list_datasets_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -120,5 +121,5 @@ def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT): "maxResults": 3, "pageToken": TOKEN, }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py index f348be724..1fb40d446 100644 --- a/tests/unit/test_list_jobs.py +++ b/tests/unit/test_list_jobs.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -17,6 +17,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -136,7 +137,7 @@ def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query): method="GET", path="/%s" % PATH, query_params=dict({"projection": "full"}, **query), - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -185,7 +186,7 @@ def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -220,7 +221,7 @@ def test_list_jobs_explicit_missing(client, PROJECT): "allUsers": True, "stateFilter": "done", }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -233,7 +234,7 @@ def test_list_jobs_w_project(client, PROJECT): method="GET", path="/projects/other-project/jobs", query_params={"projection": "full"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -269,7 +270,7 @@ def test_list_jobs_w_time_filter(client, PROJECT): "minCreationTime": "1", "maxCreationTime": str(end_time_millis), }, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) @@ -286,6 +287,6 @@ def test_list_jobs_w_parent_job_filter(client, PROJECT): method="GET", path="/projects/%s/jobs" % PROJECT, query_params={"projection": "full", "parentJobId": "parent-job-123"}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) conn.api_request.reset_mock() diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py index 4ede9a7dd..b14852338 100644 --- a/tests/unit/test_list_models.py +++ b/tests/unit/test_list_models.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_models_empty_w_timeout(client, PROJECT, DS_ID): path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID) @@ -82,7 +84,7 @@ def test_list_models_defaults( assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params=query, timeout=None + method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py index a88540dd5..190612b44 100644 --- a/tests/unit/test_list_projects.py +++ b/tests/unit/test_list_projects.py @@ -1,11 +1,11 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,6 +15,7 @@ import mock import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from .helpers import make_connection @@ -66,7 +67,7 @@ def test_list_projects_defaults(client, PROJECT, extra, query): assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/projects", query_params=query, timeout=None + method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT ) @@ -115,5 +116,5 @@ def test_list_projects_explicit_response_missing_projects_key(client): method="GET", path="/projects", query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py index 069966542..80e62d6bd 100644 --- a/tests/unit/test_list_routines.py +++ b/tests/unit/test_list_routines.py @@ -1,20 +1,22 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic import pytest +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + def test_list_routines_empty_w_timeout(client): conn = client._connection = make_connection({}) @@ -85,7 +87,7 @@ def test_list_routines_defaults( assert actual_token == token conn.api_request.assert_called_once_with( - method="GET", path=path, query_params=query, timeout=None + method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT ) diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py index 45d15bed3..8360f6605 100644 --- a/tests/unit/test_list_tables.py +++ b/tests/unit/test_list_tables.py @@ -1,21 +1,23 @@ # Copyright 2021 Google LLC - +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at - +# # https://www.apache.org/licenses/LICENSE-2.0 - +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .helpers import make_connection, dataset_polymorphic -import google.cloud.bigquery.dataset import pytest +import google.cloud.bigquery.dataset +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +from .helpers import make_connection, dataset_polymorphic + @dataset_polymorphic def test_list_tables_empty_w_timeout( @@ -89,7 +91,7 @@ def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_I assert token == TOKEN conn.api_request.assert_called_once_with( - method="GET", path="/%s" % PATH, query_params={}, timeout=None + method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT ) @@ -150,7 +152,7 @@ def test_list_tables_explicit(client, PROJECT, DS_ID): method="GET", path="/%s" % PATH, query_params={"maxResults": 3, "pageToken": TOKEN}, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 88c92a070..36cbf4993 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -32,6 +32,7 @@ from google.cloud.bigquery import job from google.cloud.bigquery import table from google.cloud.bigquery.magics import magics +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT from tests.unit.helpers import make_connection from test_utils.imports import maybe_fail_import @@ -185,7 +186,7 @@ def test_context_with_default_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", @@ -249,7 +250,7 @@ def test_context_with_custom_connection(): method="POST", path="/projects/project-from-env/jobs", data=mock.ANY, - timeout=None, + timeout=DEFAULT_TIMEOUT, ) query_results_call = mock.call( method="GET", diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py index c7c25e036..e0a992f78 100644 --- a/tests/unit/test_retry.py +++ b/tests/unit/test_retry.py @@ -55,6 +55,18 @@ def test_w_unstructured_requests_chunked_encoding_error(self): exc = requests.exceptions.ChunkedEncodingError() self.assertTrue(self._call_fut(exc)) + def test_w_unstructured_requests_connecttimeout(self): + exc = requests.exceptions.ConnectTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_readtimeout(self): + exc = requests.exceptions.ReadTimeout() + self.assertTrue(self._call_fut(exc)) + + def test_w_unstructured_requests_timeout(self): + exc = requests.exceptions.Timeout() + self.assertTrue(self._call_fut(exc)) + def test_w_auth_transporterror(self): from google.auth.exceptions import TransportError From 4fc7c693283e94b44d388f8c7991a1ad78fcde45 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:34:24 +0200 Subject: [PATCH 226/230] chore(deps): update dependency google-cloud-bigquery to v2.25.0 (#907) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [google-cloud-bigquery](https://togithub.com/googleapis/python-bigquery) | `==2.24.1` -> `==2.25.0` | [![age](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/compatibility-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/google-cloud-bigquery/2.25.0/confidence-slim/2.24.1)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery ### [`v2.25.0`](https://togithub.com/googleapis/python-bigquery/blob/master/CHANGELOG.md#​2250-httpswwwgithubcomgoogleapispython-bigquerycomparev2241v2250-2021-08-24) [Compare Source](https://togithub.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) ##### Features - Support using GeoPandas for GEOGRAPHY columns ([#​848](https://www.togithub.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13)) ##### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13) ##### Bug Fixes - remove pytz dependency and require pyarrow>=3.0.0 ([#​875](https://www.togithub.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 853306d71..d810e1241 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==2.5.0 geopandas==0.9.0 google-api-core==1.31.2 google-auth==1.35.0 -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-cloud-core==2.0.0 google-crc32c==1.1.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 484e10516..07760b666 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,4 +1,4 @@ -google-cloud-bigquery==2.24.1 +google-cloud-bigquery==2.25.0 google-cloud-bigquery-storage==2.6.3 google-auth-oauthlib==0.4.5 grpcio==1.39.0 From 121c2c2005225fae8a89ed231026e7ac64625532 Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Wed, 25 Aug 2021 17:56:26 +0200 Subject: [PATCH 227/230] chore(deps): update dependency pandas to v1.3.2 (#900) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [![WhiteSource Renovate](https://app.renovatebot.com/images/banner.svg)](https://renovatebot.com) This PR contains the following updates: | Package | Change | Age | Adoption | Passing | Confidence | |---|---|---|---|---|---| | [pandas](https://pandas.pydata.org) ([source](https://togithub.com/pandas-dev/pandas)) | `==1.1.5` -> `==1.3.2` | [![age](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/age-slim)](https://docs.renovatebot.com/merge-confidence/) | [![adoption](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/adoption-slim)](https://docs.renovatebot.com/merge-confidence/) | [![passing](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/compatibility-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://badges.renovateapi.com/packages/pypi/pandas/1.3.2/confidence-slim/1.1.5)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
pandas-dev/pandas ### [`v1.3.2`](https://togithub.com/pandas-dev/pandas/releases/v1.3.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.1...v1.3.2) This is a patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.2/whatsnew/v1.3.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.1`](https://togithub.com/pandas-dev/pandas/releases/v1.3.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.3.0...v1.3.1) This is the first patch release in the 1.3.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.1/whatsnew/v1.3.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.3.0`](https://togithub.com/pandas-dev/pandas/releases/v1.3.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.5...v1.3.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.3.0/whatsnew/v1.3.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.5`](https://togithub.com/pandas-dev/pandas/releases/v1.2.5) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.4...v1.2.5) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.5/whatsnew/v1.2.5.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.4`](https://togithub.com/pandas-dev/pandas/releases/v1.2.4) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.3...v1.2.4) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.4/whatsnew/v1.2.4.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.3`](https://togithub.com/pandas-dev/pandas/releases/v1.2.3) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.2...v1.2.3) This is a patch release in the 1.2.x series and includes some regression fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.3/whatsnew/v1.2.3.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.2`](https://togithub.com/pandas-dev/pandas/releases/v1.2.2) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.1...v1.2.2) This is a patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.2/whatsnew/v1.2.2.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.1`](https://togithub.com/pandas-dev/pandas/releases/v1.2.1) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.2.0...v1.2.1) This is the first patch release in the 1.2.x series and includes some regression fixes and bug fixes. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.1/whatsnew/v1.2.1.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues). ### [`v1.2.0`](https://togithub.com/pandas-dev/pandas/releases/v1.2.0) [Compare Source](https://togithub.com/pandas-dev/pandas/compare/v1.1.5...v1.2.0) This release includes some new features, bug fixes, and performance improvements. We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/1.2.0/whatsnew/v1.2.0.html) for a list of all the changes. The release will be available on the defaults and conda-forge channels: conda install -c conda-forge pandas Or via PyPI: python3 -m pip install --upgrade pandas Please report any issues with the release on the [pandas issue tracker](https://togithub.com/pandas-dev/pandas/issues).
--- ### Configuration 📅 **Schedule**: At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Renovate will not automatically rebase this PR, because other commits have been found. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box. --- This PR has been generated by [WhiteSource Renovate](https://renovate.whitesourcesoftware.com). View repository job log [here](https://app.renovatebot.com/dashboard#github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 3 ++- samples/snippets/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d810e1241..b5fe247cb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -27,7 +27,8 @@ mypy-extensions==0.4.3 numpy==1.19.5; python_version < "3.7" numpy==1.21.2; python_version > "3.6" packaging==21.0 -pandas==1.1.5 +pandas==1.1.5; python_version < '3.7' +pandas==1.3.2; python_version >= '3.7' proto-plus==1.19.0 protobuf==3.17.3 pyarrow==5.0.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 07760b666..d75c747fb 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -7,6 +7,6 @@ ipython==7.17.0; python_version >= '3.7' matplotlib==3.3.4; python_version < '3.7' matplotlib==3.4.1; python_version >= '3.7' pandas==1.1.5; python_version < '3.7' -pandas==1.2.0; python_version >= '3.7' +pandas==1.3.2; python_version >= '3.7' pyarrow==5.0.0 pytz==2021.1 From a3a85dac90211599b2260da0d514d19647085575 Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Wed, 25 Aug 2021 11:38:24 -0500 Subject: [PATCH 228/230] chore: group all renovate PRs together (#911) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This excludes `renovate.json` from templated updates. If this works well, we can update the core templates (perhaps with a configuration option to `py_library`). Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- owlbot.py | 28 +++++++++++++++++----------- renovate.json | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/owlbot.py b/owlbot.py index ea9904cdb..8664b658a 100644 --- a/owlbot.py +++ b/owlbot.py @@ -63,7 +63,7 @@ s.replace( library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", r"type_ ", - "type " + "type ", ) s.move( @@ -78,8 +78,8 @@ "noxfile.py", "setup.py", f"scripts/fixup_bigquery_{library.name}_keywords.py", - f"google/cloud/bigquery/__init__.py", - f"google/cloud/bigquery/py.typed", + "google/cloud/bigquery/__init__.py", + "google/cloud/bigquery/py.typed", # There are no public API endpoints for the generated ModelServiceClient, # thus there's no point in generating it and its tests. f"google/cloud/bigquery_{library.name}/services/**", @@ -98,9 +98,9 @@ microgenerator=True, split_system_tests=True, intersphinx_dependencies={ - "pandas": 'http://pandas.pydata.org/pandas-docs/dev', + "pandas": "http://pandas.pydata.org/pandas-docs/dev", "geopandas": "https://geopandas.org/", - } + }, ) # BigQuery has a custom multiprocessing note @@ -113,7 +113,11 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", - ] + # Group all renovate PRs together. If this works well, remove this and + # update the shared templates (possibly with configuration option to + # py_library.) + "renovate.json", + ], ) # ---------------------------------------------------------------------------- @@ -125,14 +129,14 @@ s.replace( "docs/conf.py", r'\{"members": True\}', - '{"members": True, "inherited-members": True}' + '{"members": True, "inherited-members": True}', ) # Tell Sphinx to ingore autogenerated docs files. s.replace( "docs/conf.py", r'"samples/snippets/README\.rst",', - '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', + '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator', ) # ---------------------------------------------------------------------------- @@ -140,13 +144,14 @@ # ---------------------------------------------------------------------------- # Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\g<0>\n.pytype") +s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") # Add pytype config to setup.cfg s.replace( "setup.cfg", r"universal = 1", - textwrap.dedent(""" \g<0> + textwrap.dedent( + """ \\g<0> [pytype] python_version = 3.8 @@ -160,7 +165,8 @@ # There's some issue with finding some pyi files, thus disabling. # The issue https://github.com/google/pytype/issues/150 is closed, but the # error still occurs for some reason. - pyi-error""") + pyi-error""" + ), ) s.shell.run(["nox", "-s", "blacken"], hide_output=False) diff --git a/renovate.json b/renovate.json index c04895563..713c60bb4 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,6 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", "group:all", ":preserveSemverRanges" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 109a5365d7c1e388a49809e653a51c1d77ddb0a2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 25 Aug 2021 17:34:14 +0000 Subject: [PATCH 229/230] chore: release 2.25.1 (#912) :robot: I have created a release \*beep\* \*boop\* --- ### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) ### Bug Fixes * populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) * use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) --- This PR was generated with [Release Please](https://github.com/googleapis/release-please). See [documentation](https://github.com/googleapis/release-please#release-please). --- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a5727ee7..8a21df6fe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25) + + +### Bug Fixes + +* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d)) +* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627)) + ## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index f882cac3a..21cbec9fe 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "2.25.0" +__version__ = "2.25.1" From f55864ec3d6381f2b31598428a64822fdc73cb56 Mon Sep 17 00:00:00 2001 From: pallabiwrites <87546424+pallabiwrites@users.noreply.github.com> Date: Thu, 26 Aug 2021 08:40:47 +0530 Subject: [PATCH 230/230] docs: update docstring for bigquery_create_routine sample (#883) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixed language issues. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # 🦕 --- samples/create_routine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/create_routine.py b/samples/create_routine.py index 012c7927a..1cb4a80b4 100644 --- a/samples/create_routine.py +++ b/samples/create_routine.py @@ -22,7 +22,7 @@ def create_routine(routine_id): # Construct a BigQuery client object. client = bigquery.Client() - # TODO(developer): Choose a fully-qualified ID for the routine. + # TODO(developer): Choose a fully qualified ID for the routine. # routine_id = "my-project.my_dataset.my_routine" routine = bigquery.Routine(