diff --git a/.coveragerc b/.coveragerc
index 0d8e6297d..23861a8eb 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,38 +1,18 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Generated by synthtool. DO NOT EDIT!
[run]
branch = True
-omit =
- google/cloud/__init__.py
[report]
fail_under = 100
show_missing = True
+omit =
+ google/cloud/bigquery/__init__.py
exclude_lines =
# Re-enable the standard pragma
pragma: NO COVER
# Ignore debug-only repr
def __repr__
- # Ignore abstract methods
- raise NotImplementedError
-omit =
- */gapic/*.py
- */proto/*.py
- */core/*.py
- */site-packages/*.py
- google/cloud/__init__.py
+ # Ignore pkg_resources exceptions.
+ # This is added at the module level as a safeguard for if someone
+ # generates the code and tries to run it without pip installing. This
+ # makes it virtually impossible to test properly.
+ except pkg_resources.DistributionNotFound
diff --git a/.flake8 b/.flake8
index ed9316381..29227d4cf 100644
--- a/.flake8
+++ b/.flake8
@@ -26,6 +26,7 @@ exclude =
*_pb2.py
# Standard linting exemptions.
+ **/.nox/**
__pycache__,
.git,
*.pyc,
diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
new file mode 100644
index 000000000..a9fcd07cc
--- /dev/null
+++ b/.github/.OwlBot.lock.yaml
@@ -0,0 +1,3 @@
+docker:
+ image: gcr.io/repo-automation-bots/owlbot-python:latest
+ digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803
diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml
new file mode 100644
index 000000000..2b6451c19
--- /dev/null
+++ b/.github/.OwlBot.yaml
@@ -0,0 +1,26 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+docker:
+ image: gcr.io/repo-automation-bots/owlbot-python:latest
+
+deep-remove-regex:
+ - /owl-bot-staging
+
+deep-copy-regex:
+ - source: /google/cloud/bigquery/(v.*)/.*-py/(.*)
+ dest: /owl-bot-staging/$1/$2
+
+begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15
+
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 10f4ee7c0..6763f258c 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -5,8 +5,7 @@
# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax
# The @googleapis/api-bigquery is the default owner for changes in this repo
-* @googleapis/api-bigquery
+* @googleapis/api-bigquery @googleapis/yoshi-python
# The python-samples-reviewers team is the default owner for samples changes
-/samples/ @googleapis/python-samples-owners
-
+/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python
diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml
new file mode 100644
index 000000000..6fe78aa79
--- /dev/null
+++ b/.github/header-checker-lint.yml
@@ -0,0 +1,15 @@
+{"allowedCopyrightHolders": ["Google LLC"],
+ "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"],
+ "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"],
+ "sourceFileExtensions": [
+ "ts",
+ "js",
+ "java",
+ "sh",
+ "Dockerfile",
+ "yaml",
+ "py",
+ "html",
+ "txt"
+ ]
+}
\ No newline at end of file
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
index b18fb9c29..8634a3043 100644
--- a/.github/sync-repo-settings.yaml
+++ b/.github/sync-repo-settings.yaml
@@ -4,6 +4,19 @@ branchProtectionRules:
# Identifies the protection rule pattern. Name of the branch to be protected.
# Defaults to `master`
- pattern: master
+ requiresCodeOwnerReviews: true
+ requiresStrictStatusChecks: true
+ requiredStatusCheckContexts:
+ - 'Kokoro'
+ - 'Kokoro snippets-3.8'
+ - 'cla/google'
+ - 'Samples - Lint'
+ - 'Samples - Python 3.6'
+ - 'Samples - Python 3.7'
+ - 'Samples - Python 3.8'
+- pattern: v3
+ requiresCodeOwnerReviews: true
+ requiresStrictStatusChecks: true
requiredStatusCheckContexts:
- 'Kokoro'
- 'Kokoro snippets-3.8'
diff --git a/.gitignore b/.gitignore
index b9daa52f1..99c3a1444 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,6 +29,7 @@ pip-log.txt
.nox
.cache
.pytest_cache
+.pytype
# Mac
@@ -50,8 +51,10 @@ docs.metadata
# Virtual environment
env/
+
+# Test logs
coverage.xml
-sponge_log.xml
+*sponge_log.xml
# System test environment variables.
system_tests/local_test_setup
diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index 058f363e1..302cc1e1a 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -40,6 +40,16 @@ python3 -m pip uninstall --yes --quiet nox-automation
python3 -m pip install --upgrade --quiet nox
python3 -m nox --version
+# If this is a continuous build, send the test log to the FlakyBot.
+# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot.
+if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then
+ cleanup() {
+ chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot
+ $KOKORO_GFILE_DIR/linux_amd64/flakybot
+ }
+ trap cleanup EXIT HUP
+fi
+
# If NOX_SESSION is set, it only runs the specified session,
# otherwise run all the sessions.
if [[ -n "${NOX_SESSION:-}" ]]; then
diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile
index 412b0b56a..4e1b1fb8b 100644
--- a/.kokoro/docker/docs/Dockerfile
+++ b/.kokoro/docker/docs/Dockerfile
@@ -40,6 +40,7 @@ RUN apt-get update \
libssl-dev \
libsqlite3-dev \
portaudio19-dev \
+ python3-distutils \
redis-server \
software-properties-common \
ssh \
@@ -59,40 +60,8 @@ RUN apt-get update \
&& rm -rf /var/lib/apt/lists/* \
&& rm -f /var/cache/apt/archives/*.deb
-
-COPY fetch_gpg_keys.sh /tmp
-# Install the desired versions of Python.
-RUN set -ex \
- && export GNUPGHOME="$(mktemp -d)" \
- && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \
- && /tmp/fetch_gpg_keys.sh \
- && for PYTHON_VERSION in 3.7.8 3.8.5; do \
- wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz" \
- && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/${PYTHON_VERSION%%[a-z]*}/Python-$PYTHON_VERSION.tar.xz.asc" \
- && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \
- && rm -r python-${PYTHON_VERSION}.tar.xz.asc \
- && mkdir -p /usr/src/python-${PYTHON_VERSION} \
- && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \
- && rm python-${PYTHON_VERSION}.tar.xz \
- && cd /usr/src/python-${PYTHON_VERSION} \
- && ./configure \
- --enable-shared \
- # This works only on Python 2.7 and throws a warning on every other
- # version, but seems otherwise harmless.
- --enable-unicode=ucs4 \
- --with-system-ffi \
- --without-ensurepip \
- && make -j$(nproc) \
- && make install \
- && ldconfig \
- ; done \
- && rm -rf "${GNUPGHOME}" \
- && rm -rf /usr/src/python* \
- && rm -rf ~/.cache/
-
RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \
- && python3.7 /tmp/get-pip.py \
&& python3.8 /tmp/get-pip.py \
&& rm /tmp/get-pip.py
-CMD ["python3.7"]
+CMD ["python3.8"]
diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg
index 5c216b4bc..08adb2e28 100644
--- a/.kokoro/docs/docs-presubmit.cfg
+++ b/.kokoro/docs/docs-presubmit.cfg
@@ -25,4 +25,4 @@ env_vars: {
env_vars: {
key: "NOX_SESSION"
value: "docs docfx"
-}
\ No newline at end of file
+}
diff --git a/.kokoro/release.sh b/.kokoro/release.sh
index 0e58f0640..3abba6e06 100755
--- a/.kokoro/release.sh
+++ b/.kokoro/release.sh
@@ -26,7 +26,7 @@ python3 -m pip install --upgrade twine wheel setuptools
export PYTHONUNBUFFERED=1
# Move into the package, build the distribution and upload.
-TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google_cloud_pypi_password")
+TWINE_PASSWORD=$(cat "${KOKORO_GFILE_DIR}/secret_manager/google-cloud-pypi-token")
cd github/python-bigquery
python3 setup.py sdist bdist_wheel
-twine upload --username gcloudpypi --password "${TWINE_PASSWORD}" dist/*
+twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/*
diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg
index 18b417709..922d7fe50 100644
--- a/.kokoro/release/common.cfg
+++ b/.kokoro/release/common.cfg
@@ -23,18 +23,8 @@ env_vars: {
value: "github/python-bigquery/.kokoro/release.sh"
}
-# Fetch PyPI password
-before_action {
- fetch_keystore {
- keystore_resource {
- keystore_config_id: 73713
- keyname: "google_cloud_pypi_password"
- }
- }
-}
-
# Tokens needed to report release status back to GitHub
env_vars: {
key: "SECRET_MANAGER_KEYS"
- value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem"
-}
\ No newline at end of file
+ value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem,google-cloud-pypi-token"
+}
diff --git a/.kokoro/samples/python3.6/periodic-head.cfg b/.kokoro/samples/python3.6/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.6/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.7/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.8/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.9/common.cfg b/.kokoro/samples/python3.9/common.cfg
new file mode 100644
index 000000000..f179577a5
--- /dev/null
+++ b/.kokoro/samples/python3.9/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+ define_artifacts {
+ regex: "**/*sponge_log.xml"
+ }
+}
+
+# Specify which tests to run
+env_vars: {
+ key: "RUN_TESTS_SESSION"
+ value: "py-3.9"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+ key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ value: "python-docs-samples-tests-py39"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+ key: "TRAMPOLINE_IMAGE"
+ value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-bigquery/.kokoro/trampoline.sh"
\ No newline at end of file
diff --git a/.kokoro/samples/python3.9/continuous.cfg b/.kokoro/samples/python3.9/continuous.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.9/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.9/periodic-head.cfg b/.kokoro/samples/python3.9/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.9/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.9/periodic.cfg b/.kokoro/samples/python3.9/periodic.cfg
new file mode 100644
index 000000000..50fec9649
--- /dev/null
+++ b/.kokoro/samples/python3.9/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "False"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.9/presubmit.cfg b/.kokoro/samples/python3.9/presubmit.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.9/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh
new file mode 100755
index 000000000..689948a23
--- /dev/null
+++ b/.kokoro/test-samples-against-head.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# A customized test runner for samples.
+#
+# For periodic builds, you can specify this file for testing against head.
+
+# `-e` enables the script to automatically fail when a command fails
+# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero
+set -eo pipefail
+# Enables `**` to include files nested inside sub-folders
+shopt -s globstar
+
+cd github/python-bigquery
+
+exec .kokoro/test-samples-impl.sh
diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh
new file mode 100755
index 000000000..311a8d54b
--- /dev/null
+++ b/.kokoro/test-samples-impl.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# `-e` enables the script to automatically fail when a command fails
+# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero
+set -eo pipefail
+# Enables `**` to include files nested inside sub-folders
+shopt -s globstar
+
+# Exit early if samples don't exist
+if ! find samples -name 'requirements.txt' | grep -q .; then
+ echo "No tests run. './samples/**/requirements.txt' not found"
+ exit 0
+fi
+
+# Disable buffering, so that the logs stream through.
+export PYTHONUNBUFFERED=1
+
+# Debug: show build environment
+env | grep KOKORO
+
+# Install nox
+python3.6 -m pip install --upgrade --quiet nox
+
+# Use secrets acessor service account to get secrets
+if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then
+ gcloud auth activate-service-account \
+ --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \
+ --project="cloud-devrel-kokoro-resources"
+fi
+
+# This script will create 3 files:
+# - testing/test-env.sh
+# - testing/service-account.json
+# - testing/client-secrets.json
+./scripts/decrypt-secrets.sh
+
+source ./testing/test-env.sh
+export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json
+
+# For cloud-run session, we activate the service account for gcloud sdk.
+gcloud auth activate-service-account \
+ --key-file "${GOOGLE_APPLICATION_CREDENTIALS}"
+
+export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json
+
+echo -e "\n******************** TESTING PROJECTS ********************"
+
+# Switch to 'fail at end' to allow all tests to complete before exiting.
+set +e
+# Use RTN to return a non-zero value if the test fails.
+RTN=0
+ROOT=$(pwd)
+# Find all requirements.txt in the samples directory (may break on whitespace).
+for file in samples/**/requirements.txt; do
+ cd "$ROOT"
+ # Navigate to the project folder.
+ file=$(dirname "$file")
+ cd "$file"
+
+ echo "------------------------------------------------------------"
+ echo "- testing $file"
+ echo "------------------------------------------------------------"
+
+ # Use nox to execute the tests for the project.
+ python3.6 -m nox -s "$RUN_TESTS_SESSION"
+ EXIT=$?
+
+ # If this is a periodic build, send the test log to the FlakyBot.
+ # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot.
+ if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then
+ chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot
+ $KOKORO_GFILE_DIR/linux_amd64/flakybot
+ fi
+
+ if [[ $EXIT -ne 0 ]]; then
+ RTN=1
+ echo -e "\n Testing failed: Nox returned a non-zero exit code. \n"
+ else
+ echo -e "\n Testing completed.\n"
+ fi
+
+done
+cd "$ROOT"
+
+# Workaround for Kokoro permissions issue: delete secrets
+rm testing/{test-env.sh,client-secrets.json,service-account.json}
+
+exit "$RTN"
diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh
index c5653a81d..62ef534cd 100755
--- a/.kokoro/test-samples.sh
+++ b/.kokoro/test-samples.sh
@@ -13,6 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# The default test runner for samples.
+#
+# For periodic builds, we rewinds the repo to the latest release, and
+# run test-samples-impl.sh.
# `-e` enables the script to automatically fail when a command fails
# `-o pipefail` sets the exit code to the rightmost comment to exit with a non-zero
@@ -24,87 +28,19 @@ cd github/python-bigquery
# Run periodic samples tests at latest release
if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then
+ # preserving the test runner implementation.
+ cp .kokoro/test-samples-impl.sh "${TMPDIR}/test-samples-impl.sh"
+ echo "--- IMPORTANT IMPORTANT IMPORTANT ---"
+ echo "Now we rewind the repo back to the latest release..."
LATEST_RELEASE=$(git describe --abbrev=0 --tags)
git checkout $LATEST_RELEASE
-fi
-
-# Exit early if samples directory doesn't exist
-if [ ! -d "./samples" ]; then
- echo "No tests run. `./samples` not found"
- exit 0
-fi
-
-# Disable buffering, so that the logs stream through.
-export PYTHONUNBUFFERED=1
-
-# Debug: show build environment
-env | grep KOKORO
-
-# Install nox
-python3.6 -m pip install --upgrade --quiet nox
-
-# Use secrets acessor service account to get secrets
-if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then
- gcloud auth activate-service-account \
- --key-file="${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" \
- --project="cloud-devrel-kokoro-resources"
-fi
-
-# This script will create 3 files:
-# - testing/test-env.sh
-# - testing/service-account.json
-# - testing/client-secrets.json
-./scripts/decrypt-secrets.sh
-
-source ./testing/test-env.sh
-export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/testing/service-account.json
-
-# For cloud-run session, we activate the service account for gcloud sdk.
-gcloud auth activate-service-account \
- --key-file "${GOOGLE_APPLICATION_CREDENTIALS}"
-
-export GOOGLE_CLIENT_SECRETS=$(pwd)/testing/client-secrets.json
-
-echo -e "\n******************** TESTING PROJECTS ********************"
-
-# Switch to 'fail at end' to allow all tests to complete before exiting.
-set +e
-# Use RTN to return a non-zero value if the test fails.
-RTN=0
-ROOT=$(pwd)
-# Find all requirements.txt in the samples directory (may break on whitespace).
-for file in samples/**/requirements.txt; do
- cd "$ROOT"
- # Navigate to the project folder.
- file=$(dirname "$file")
- cd "$file"
-
- echo "------------------------------------------------------------"
- echo "- testing $file"
- echo "------------------------------------------------------------"
-
- # Use nox to execute the tests for the project.
- python3.6 -m nox -s "$RUN_TESTS_SESSION"
- EXIT=$?
-
- # If this is a periodic build, send the test log to the Build Cop Bot.
- # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/buildcop.
- if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then
- chmod +x $KOKORO_GFILE_DIR/linux_amd64/buildcop
- $KOKORO_GFILE_DIR/linux_amd64/buildcop
+ echo "The current head is: "
+ echo $(git rev-parse --verify HEAD)
+ echo "--- IMPORTANT IMPORTANT IMPORTANT ---"
+ # move back the test runner implementation if there's no file.
+ if [ ! -f .kokoro/test-samples-impl.sh ]; then
+ cp "${TMPDIR}/test-samples-impl.sh" .kokoro/test-samples-impl.sh
fi
+fi
- if [[ $EXIT -ne 0 ]]; then
- RTN=1
- echo -e "\n Testing failed: Nox returned a non-zero exit code. \n"
- else
- echo -e "\n Testing completed.\n"
- fi
-
-done
-cd "$ROOT"
-
-# Workaround for Kokoro permissions issue: delete secrets
-rm testing/{test-env.sh,client-secrets.json,service-account.json}
-
-exit "$RTN"
+exec .kokoro/test-samples-impl.sh
diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh
index 719bcd5ba..4af6cdc26 100755
--- a/.kokoro/trampoline_v2.sh
+++ b/.kokoro/trampoline_v2.sh
@@ -159,7 +159,7 @@ if [[ -n "${KOKORO_BUILD_ID:-}" ]]; then
"KOKORO_GITHUB_COMMIT"
"KOKORO_GITHUB_PULL_REQUEST_NUMBER"
"KOKORO_GITHUB_PULL_REQUEST_COMMIT"
- # For Build Cop Bot
+ # For FlakyBot
"KOKORO_GITHUB_COMMIT_URL"
"KOKORO_GITHUB_PULL_REQUEST_URL"
)
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..62eb5a77d
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,31 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.0.1
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-yaml
+- repo: https://github.com/psf/black
+ rev: 19.10b0
+ hooks:
+ - id: black
+- repo: https://gitlab.com/pycqa/flake8
+ rev: 3.9.2
+ hooks:
+ - id: flake8
diff --git a/.repo-metadata.json b/.repo-metadata.json
index f50dbbeb2..f132056d5 100644
--- a/.repo-metadata.json
+++ b/.repo-metadata.json
@@ -6,6 +6,7 @@
"issue_tracker": "https://issuetracker.google.com/savedsearches/559654",
"release_level": "ga",
"language": "python",
+ "library_type": "GAPIC_COMBO",
"repo": "googleapis/python-bigquery",
"distribution_name": "google-cloud-bigquery",
"api_id": "bigquery.googleapis.com",
diff --git a/.trampolinerc b/.trampolinerc
index c7d663ae9..383b6ec89 100644
--- a/.trampolinerc
+++ b/.trampolinerc
@@ -18,7 +18,6 @@
required_envvars+=(
"STAGING_BUCKET"
"V2_STAGING_BUCKET"
- "NOX_SESSION"
)
# Add env vars which are passed down into the container here.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index a58510c66..8a21df6fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,323 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history
+
+### [2.25.1](https://www.github.com/googleapis/python-bigquery/compare/v2.25.0...v2.25.1) (2021-08-25)
+
+
+### Bug Fixes
+
+* populate default `timeout` and retry after client-side timeout ([#896](https://www.github.com/googleapis/python-bigquery/issues/896)) ([b508809](https://www.github.com/googleapis/python-bigquery/commit/b508809c0f887575274309a463e763c56ddd017d))
+* use REST API in cell magic when requested ([#892](https://www.github.com/googleapis/python-bigquery/issues/892)) ([1cb3e55](https://www.github.com/googleapis/python-bigquery/commit/1cb3e55253e824e3a1da5201f6ec09065fb6b627))
+
+## [2.25.0](https://www.github.com/googleapis/python-bigquery/compare/v2.24.1...v2.25.0) (2021-08-24)
+
+
+### Features
+
+* Support using GeoPandas for GEOGRAPHY columns ([#848](https://www.github.com/googleapis/python-bigquery/issues/848)) ([16f65e6](https://www.github.com/googleapis/python-bigquery/commit/16f65e6ae15979217ceea6c6d398c9057a363a13))
+
+### [2.24.1](https://www.github.com/googleapis/python-bigquery/compare/v2.24.0...v2.24.1) (2021-08-13)
+
+
+### Bug Fixes
+
+* remove pytz dependency and require pyarrow>=3.0.0 ([#875](https://www.github.com/googleapis/python-bigquery/issues/875)) ([2cb3563](https://www.github.com/googleapis/python-bigquery/commit/2cb3563ee863edef7eaf5d04d739bcfe7bc6438e))
+
+## [2.24.0](https://www.github.com/googleapis/python-bigquery/compare/v2.23.3...v2.24.0) (2021-08-11)
+
+
+### Features
+
+* add support for transaction statistics ([#849](https://www.github.com/googleapis/python-bigquery/issues/849)) ([7f7b1a8](https://www.github.com/googleapis/python-bigquery/commit/7f7b1a808d50558772a0deb534ca654da65d629e))
+* make the same `Table*` instances equal to each other ([#867](https://www.github.com/googleapis/python-bigquery/issues/867)) ([c1a3d44](https://www.github.com/googleapis/python-bigquery/commit/c1a3d4435739a21d25aa154145e36d3a7c42eeb6))
+* retry failed query jobs in `result()` ([#837](https://www.github.com/googleapis/python-bigquery/issues/837)) ([519d99c](https://www.github.com/googleapis/python-bigquery/commit/519d99c20e7d1101f76981f3de036fdf3c7a4ecc))
+* support `ScalarQueryParameterType` for `type_` argument in `ScalarQueryParameter` constructor ([#850](https://www.github.com/googleapis/python-bigquery/issues/850)) ([93d15e2](https://www.github.com/googleapis/python-bigquery/commit/93d15e2e5405c2cc6d158c4e5737361344193dbc))
+
+
+### Bug Fixes
+
+* make unicode characters working well in load_table_from_json ([#865](https://www.github.com/googleapis/python-bigquery/issues/865)) ([ad9c802](https://www.github.com/googleapis/python-bigquery/commit/ad9c8026f0e667f13dd754279f9dc40d06f4fa78))
+
+### [2.23.3](https://www.github.com/googleapis/python-bigquery/compare/v2.23.2...v2.23.3) (2021-08-06)
+
+
+### Bug Fixes
+
+* increase default retry deadline to 10 minutes ([#859](https://www.github.com/googleapis/python-bigquery/issues/859)) ([30770fd](https://www.github.com/googleapis/python-bigquery/commit/30770fd0575fbd5aaa70c14196a4cc54627aecd2))
+
+### [2.23.2](https://www.github.com/googleapis/python-bigquery/compare/v2.23.1...v2.23.2) (2021-07-29)
+
+
+### Dependencies
+
+* expand pyarrow pins to support 5.x releases ([#833](https://www.github.com/googleapis/python-bigquery/issues/833)) ([80e3a61](https://www.github.com/googleapis/python-bigquery/commit/80e3a61c60419fb19b70b664c6415cd01ba82f5b))
+
+### [2.23.1](https://www.github.com/googleapis/python-bigquery/compare/v2.23.0...v2.23.1) (2021-07-28)
+
+
+### Bug Fixes
+
+* `insert_rows()` accepts float column values as strings again ([#824](https://www.github.com/googleapis/python-bigquery/issues/824)) ([d9378af](https://www.github.com/googleapis/python-bigquery/commit/d9378af13add879118a1d004529b811f72c325d6))
+
+## [2.23.0](https://www.github.com/googleapis/python-bigquery/compare/v2.22.1...v2.23.0) (2021-07-27)
+
+
+### Features
+
+* Update proto definitions for bigquery/v2 to support new proto fields for BQML. ([#817](https://www.github.com/googleapis/python-bigquery/issues/817)) ([fe7a902](https://www.github.com/googleapis/python-bigquery/commit/fe7a902e8b3e723ace335c9b499aea6d180a025b))
+
+
+### Bug Fixes
+
+* no longer raise a warning in `to_dataframe` if `max_results` set ([#815](https://www.github.com/googleapis/python-bigquery/issues/815)) ([3c1be14](https://www.github.com/googleapis/python-bigquery/commit/3c1be149e76b1d1d8879fdcf0924ddb1c1839e94))
+* retry ChunkedEncodingError by default ([#802](https://www.github.com/googleapis/python-bigquery/issues/802)) ([419d36d](https://www.github.com/googleapis/python-bigquery/commit/419d36d6b1887041e5795dbc8fc808890e91ab11))
+
+
+### Documentation
+
+* correct docs for `LoadJobConfig.destination_table_description` ([#810](https://www.github.com/googleapis/python-bigquery/issues/810)) ([da87fd9](https://www.github.com/googleapis/python-bigquery/commit/da87fd921cc8067b187d7985c978aac8eb58d107))
+
+### [2.22.1](https://www.github.com/googleapis/python-bigquery/compare/v2.22.0...v2.22.1) (2021-07-22)
+
+
+### Bug Fixes
+
+* issue a warning if buggy pyarrow is detected ([#787](https://www.github.com/googleapis/python-bigquery/issues/787)) ([e403721](https://www.github.com/googleapis/python-bigquery/commit/e403721af1373eb1f1a1c7be5b2182e3819ed1f9))
+* use a larger chunk size when loading data ([#799](https://www.github.com/googleapis/python-bigquery/issues/799)) ([b804373](https://www.github.com/googleapis/python-bigquery/commit/b804373277c1c1baa3370ebfb4783503b7ff360f))
+
+
+### Documentation
+
+* add Samples section to CONTRIBUTING.rst ([#785](https://www.github.com/googleapis/python-bigquery/issues/785)) ([e587029](https://www.github.com/googleapis/python-bigquery/commit/e58702967d572e83b4c774278818302594a511b7))
+* add sample to delete job metadata ([#798](https://www.github.com/googleapis/python-bigquery/issues/798)) ([be9b242](https://www.github.com/googleapis/python-bigquery/commit/be9b242f2180f5b795dfb3a168a97af1682999fd))
+
+## [2.22.0](https://www.github.com/googleapis/python-bigquery/compare/v2.21.0...v2.22.0) (2021-07-19)
+
+
+### Features
+
+* add `LoadJobConfig.projection_fields` to select DATASTORE_BACKUP fields ([#736](https://www.github.com/googleapis/python-bigquery/issues/736)) ([c45a738](https://www.github.com/googleapis/python-bigquery/commit/c45a7380871af3dfbd3c45524cb606c60e1a01d1))
+* add standard sql table type, update scalar type enums ([#777](https://www.github.com/googleapis/python-bigquery/issues/777)) ([b8b5433](https://www.github.com/googleapis/python-bigquery/commit/b8b5433898ec881f8da1303614780a660d94733a))
+* add support for more detailed DML stats ([#758](https://www.github.com/googleapis/python-bigquery/issues/758)) ([36fe86f](https://www.github.com/googleapis/python-bigquery/commit/36fe86f41c1a8f46167284f752a6d6bbf886a04b))
+* add support for user defined Table View Functions ([#724](https://www.github.com/googleapis/python-bigquery/issues/724)) ([8c7b839](https://www.github.com/googleapis/python-bigquery/commit/8c7b839a6ac1491c1c3b6b0e8755f4b70ed72ee3))
+
+
+### Bug Fixes
+
+* avoid possible job already exists error ([#751](https://www.github.com/googleapis/python-bigquery/issues/751)) ([45b9308](https://www.github.com/googleapis/python-bigquery/commit/45b93089f5398740413104285cc8acfd5ebc9c08))
+
+
+### Dependencies
+
+* allow 2.x versions of `google-api-core`, `google-cloud-core`, `google-resumable-media` ([#770](https://www.github.com/googleapis/python-bigquery/issues/770)) ([87a09fa](https://www.github.com/googleapis/python-bigquery/commit/87a09fa3f2a9ab35728a1ac925f9d5f2e6616c65))
+
+
+### Documentation
+
+* add loading data from Firestore backup sample ([#737](https://www.github.com/googleapis/python-bigquery/issues/737)) ([22fd848](https://www.github.com/googleapis/python-bigquery/commit/22fd848cae4af1148040e1faa31dd15a4d674687))
+
+## [2.21.0](https://www.github.com/googleapis/python-bigquery/compare/v2.20.0...v2.21.0) (2021-07-12)
+
+
+### Features
+
+* Add max_results parameter to some of the `QueryJob` methods. ([#698](https://www.github.com/googleapis/python-bigquery/issues/698)) ([2a9618f](https://www.github.com/googleapis/python-bigquery/commit/2a9618f4daaa4a014161e1a2f7376844eec9e8da))
+* Add support for decimal target types. ([#735](https://www.github.com/googleapis/python-bigquery/issues/735)) ([7d2d3e9](https://www.github.com/googleapis/python-bigquery/commit/7d2d3e906a9eb161911a198fb925ad79de5df934))
+* Add support for table snapshots. ([#740](https://www.github.com/googleapis/python-bigquery/issues/740)) ([ba86b2a](https://www.github.com/googleapis/python-bigquery/commit/ba86b2a6300ae5a9f3c803beeb42bda4c522e34c))
+* Enable unsetting policy tags on schema fields. ([#703](https://www.github.com/googleapis/python-bigquery/issues/703)) ([18bb443](https://www.github.com/googleapis/python-bigquery/commit/18bb443c7acd0a75dcb57d9aebe38b2d734ff8c7))
+* Make it easier to disable best-effort deduplication with streaming inserts. ([#734](https://www.github.com/googleapis/python-bigquery/issues/734)) ([1246da8](https://www.github.com/googleapis/python-bigquery/commit/1246da86b78b03ca1aa2c45ec71649e294cfb2f1))
+* Support passing struct data to the DB API. ([#718](https://www.github.com/googleapis/python-bigquery/issues/718)) ([38b3ef9](https://www.github.com/googleapis/python-bigquery/commit/38b3ef96c3dedc139b84f0ff06885141ae7ce78c))
+
+
+### Bug Fixes
+
+* Inserting non-finite floats with `insert_rows()`. ([#728](https://www.github.com/googleapis/python-bigquery/issues/728)) ([d047419](https://www.github.com/googleapis/python-bigquery/commit/d047419879e807e123296da2eee89a5253050166))
+* Use `pandas` function to check for `NaN`. ([#750](https://www.github.com/googleapis/python-bigquery/issues/750)) ([67bc5fb](https://www.github.com/googleapis/python-bigquery/commit/67bc5fbd306be7cdffd216f3791d4024acfa95b3))
+
+
+### Documentation
+
+* Add docs for all enums in module. ([#745](https://www.github.com/googleapis/python-bigquery/issues/745)) ([145944f](https://www.github.com/googleapis/python-bigquery/commit/145944f24fedc4d739687399a8309f9d51d43dfd))
+* Omit mention of Python 2.7 in `CONTRIBUTING.rst`. ([#706](https://www.github.com/googleapis/python-bigquery/issues/706)) ([27d6839](https://www.github.com/googleapis/python-bigquery/commit/27d6839ee8a40909e4199cfa0da8b6b64705b2e9))
+
+## [2.20.0](https://www.github.com/googleapis/python-bigquery/compare/v2.19.0...v2.20.0) (2021-06-07)
+
+
+### Features
+
+* support script options in query job config ([#690](https://www.github.com/googleapis/python-bigquery/issues/690)) ([1259e16](https://www.github.com/googleapis/python-bigquery/commit/1259e16394784315368e8be959c1ac097782b62e))
+
+## [2.19.0](https://www.github.com/googleapis/python-bigquery/compare/v2.18.0...v2.19.0) (2021-06-06)
+
+
+### Features
+
+* list_tables, list_projects, list_datasets, list_models, list_routines, and list_jobs now accept a page_size parameter to control page size ([#686](https://www.github.com/googleapis/python-bigquery/issues/686)) ([1f1c4b7](https://www.github.com/googleapis/python-bigquery/commit/1f1c4b7ba4390fc4c5c8186bc22b83b45304ca06))
+
+## [2.18.0](https://www.github.com/googleapis/python-bigquery/compare/v2.17.0...v2.18.0) (2021-06-02)
+
+
+### Features
+
+* add support for Parquet options ([#679](https://www.github.com/googleapis/python-bigquery/issues/679)) ([d792ce0](https://www.github.com/googleapis/python-bigquery/commit/d792ce09388a6ee3706777915dd2818d4c854f79))
+
+## [2.17.0](https://www.github.com/googleapis/python-bigquery/compare/v2.16.1...v2.17.0) (2021-05-21)
+
+
+### Features
+
+* detect obsolete BQ Storage extra at runtime ([#666](https://www.github.com/googleapis/python-bigquery/issues/666)) ([bd7dbda](https://www.github.com/googleapis/python-bigquery/commit/bd7dbdae5c972b16bafc53c67911eeaa3255a880))
+* Support parameterized NUMERIC, BIGNUMERIC, STRING, and BYTES types ([#673](https://www.github.com/googleapis/python-bigquery/issues/673)) ([45421e7](https://www.github.com/googleapis/python-bigquery/commit/45421e73bfcddb244822e6a5cd43be6bd1ca2256))
+
+
+### Bug Fixes
+
+* **tests:** invalid path to strptime() ([#672](https://www.github.com/googleapis/python-bigquery/issues/672)) ([591cdd8](https://www.github.com/googleapis/python-bigquery/commit/591cdd851bb1321b048a05a378a0ef48d3ade462))
+
+### [2.16.1](https://www.github.com/googleapis/python-bigquery/compare/v2.16.0...v2.16.1) (2021-05-12)
+
+
+### Bug Fixes
+
+* executemany rowcount only reflected the last execution ([#660](https://www.github.com/googleapis/python-bigquery/issues/660)) ([aeadc8c](https://www.github.com/googleapis/python-bigquery/commit/aeadc8c2d614bb9f0883ec901fca48930f3aaf19))
+
+## [2.16.0](https://www.github.com/googleapis/python-bigquery/compare/v2.15.0...v2.16.0) (2021-05-05)
+
+
+### Features
+
+* add with_name() to ScalarQueryParameterType ([#644](https://www.github.com/googleapis/python-bigquery/issues/644)) ([6cc6876](https://www.github.com/googleapis/python-bigquery/commit/6cc6876eb0e5bf49fdc047256a945dcf1b289576))
+
+
+### Dependencies
+
+* expand supported pyarrow versions to v4 ([#643](https://www.github.com/googleapis/python-bigquery/issues/643)) ([9e1d386](https://www.github.com/googleapis/python-bigquery/commit/9e1d3869c2024fe7a8af57ff59838d904ca5db03))
+
+## [2.15.0](https://www.github.com/googleapis/python-bigquery/compare/v2.14.0...v2.15.0) (2021-04-29)
+
+
+### Features
+
+* Extended DB API parameter syntax to optionally provide parameter types ([#626](https://www.github.com/googleapis/python-bigquery/issues/626)) ([8bcf397](https://www.github.com/googleapis/python-bigquery/commit/8bcf397fbe2527e06317741875a059b109cfcd9c))
+
+
+### Bug Fixes
+
+* add DECIMAL and BIGDECIMAL as aliases for NUMERIC and BIGNUMERIC ([#638](https://www.github.com/googleapis/python-bigquery/issues/638)) ([aa59023](https://www.github.com/googleapis/python-bigquery/commit/aa59023317b1c63720fb717b3544f755652da58d))
+* The DB API Binary function accepts bytes data ([#630](https://www.github.com/googleapis/python-bigquery/issues/630)) ([4396e70](https://www.github.com/googleapis/python-bigquery/commit/4396e70771af6889d3242c37c5ff2e80241023a2))
+
+## [2.14.0](https://www.github.com/googleapis/python-bigquery/compare/v2.13.1...v2.14.0) (2021-04-26)
+
+
+### Features
+
+* accept DatasetListItem where DatasetReference is accepted ([#597](https://www.github.com/googleapis/python-bigquery/issues/597)) ([c8b5581](https://www.github.com/googleapis/python-bigquery/commit/c8b5581ea3c94005d69755c4a3b5a0d8900f3fe2))
+* accept job object as argument to `get_job` and `cancel_job` ([#617](https://www.github.com/googleapis/python-bigquery/issues/617)) ([f75dcdf](https://www.github.com/googleapis/python-bigquery/commit/f75dcdf3943b87daba60011c9a3b42e34ff81910))
+* add `Client.delete_job_metadata` method to remove job metadata ([#610](https://www.github.com/googleapis/python-bigquery/issues/610)) ([0abb566](https://www.github.com/googleapis/python-bigquery/commit/0abb56669c097c59fbffce007c702e7a55f2d9c1))
+* add `max_queue_size` argument to `RowIterator.to_dataframe_iterable` ([#575](https://www.github.com/googleapis/python-bigquery/issues/575)) ([f95f415](https://www.github.com/googleapis/python-bigquery/commit/f95f415d3441b3928f6cc705cb8a75603d790fd6))
+* add type hints for public methods ([#613](https://www.github.com/googleapis/python-bigquery/issues/613)) ([f8d4aaa](https://www.github.com/googleapis/python-bigquery/commit/f8d4aaa335a0eef915e73596fc9b43b11d11be9f))
+* DB API cursors are now iterable ([#618](https://www.github.com/googleapis/python-bigquery/issues/618)) ([e0b373d](https://www.github.com/googleapis/python-bigquery/commit/e0b373d0e721a70656ed8faceb7f5c70f642d144))
+* retry google.auth TransportError by default ([#624](https://www.github.com/googleapis/python-bigquery/issues/624)) ([34ecc3f](https://www.github.com/googleapis/python-bigquery/commit/34ecc3f1ca0ff073330c0c605673d89b43af7ed9))
+* use pyarrow stream compression, if available ([#593](https://www.github.com/googleapis/python-bigquery/issues/593)) ([dde9dc5](https://www.github.com/googleapis/python-bigquery/commit/dde9dc5114c2311fb76fafc5b222fff561e8abf1))
+
+
+### Bug Fixes
+
+* consistent percents handling in DB API query ([#619](https://www.github.com/googleapis/python-bigquery/issues/619)) ([6502a60](https://www.github.com/googleapis/python-bigquery/commit/6502a602337ae562652a20b20270949f2c9d5073))
+* missing license headers in new test files ([#604](https://www.github.com/googleapis/python-bigquery/issues/604)) ([df48cc5](https://www.github.com/googleapis/python-bigquery/commit/df48cc5a0be99ad39d5835652d1b7422209afc5d))
+* unsetting clustering fields on Table is now possible ([#622](https://www.github.com/googleapis/python-bigquery/issues/622)) ([33a871f](https://www.github.com/googleapis/python-bigquery/commit/33a871f06329f9bf5a6a92fab9ead65bf2bee75d))
+
+
+### Documentation
+
+* add sample to run DML query ([#591](https://www.github.com/googleapis/python-bigquery/issues/591)) ([ff2ec3a](https://www.github.com/googleapis/python-bigquery/commit/ff2ec3abe418a443cd07751c08e654f94e8b3155))
+* update the description of the return value of `_QueryResults.rows()` ([#594](https://www.github.com/googleapis/python-bigquery/issues/594)) ([8f4c0b8](https://www.github.com/googleapis/python-bigquery/commit/8f4c0b84dac3840532d7865247b8ad94b625b897))
+
+### [2.13.1](https://www.github.com/googleapis/python-bigquery/compare/v2.13.0...v2.13.1) (2021-03-23)
+
+
+### Bug Fixes
+
+* add ConnectionError to default retry ([#571](https://www.github.com/googleapis/python-bigquery/issues/571)) ([a3edb8b](https://www.github.com/googleapis/python-bigquery/commit/a3edb8b921e029e2c03d33302d408ad5d4e9d4ad))
+
+## [2.13.0](https://www.github.com/googleapis/python-bigquery/compare/v2.12.0...v2.13.0) (2021-03-22)
+
+
+### Features
+
+* add `ExternalConfig.connection_id` property to connect to external sources ([#560](https://www.github.com/googleapis/python-bigquery/issues/560)) ([d93986e](https://www.github.com/googleapis/python-bigquery/commit/d93986e0259952257f2571f60719b52099c29c0c))
+
+
+### Bug Fixes
+
+* avoid overly strict dependency on pyarrow 3.x ([#564](https://www.github.com/googleapis/python-bigquery/issues/564)) ([97ee6ec](https://www.github.com/googleapis/python-bigquery/commit/97ee6ec6cd4bc9f833cd506dc6d244d103654cfd))
+* avoid policy tags 403 error in `load_table_from_dataframe` ([#557](https://www.github.com/googleapis/python-bigquery/issues/557)) ([84e646e](https://www.github.com/googleapis/python-bigquery/commit/84e646e6b7087a1626e56ad51eeb130f4ddfa2fb))
+
+## [2.12.0](https://www.github.com/googleapis/python-bigquery/compare/v2.11.0...v2.12.0) (2021-03-16)
+
+
+### Features
+
+* make QueryJob.done() method more performant ([#544](https://www.github.com/googleapis/python-bigquery/issues/544)) ([a3ab9ef](https://www.github.com/googleapis/python-bigquery/commit/a3ab9efdd0758829845cfcb6ca0ac1f03ab44f64))
+
+
+### Bug Fixes
+
+* remove DB-API dependency on pyarrow with decimal query parameters ([#551](https://www.github.com/googleapis/python-bigquery/issues/551)) ([1b946ba](https://www.github.com/googleapis/python-bigquery/commit/1b946ba23ee7df86114c6acb338ec34e6c92af6d))
+
+## [2.11.0](https://www.github.com/googleapis/python-bigquery/compare/v2.10.0...v2.11.0) (2021-03-09)
+
+
+### Features
+
+* add context manager support to client ([#540](https://www.github.com/googleapis/python-bigquery/issues/540)) ([d5c7e11](https://www.github.com/googleapis/python-bigquery/commit/d5c7e11a1dc2a149d74294bfadbae62d70573e69))
+
+## [2.10.0](https://www.github.com/googleapis/python-bigquery/compare/v2.9.0...v2.10.0) (2021-02-25)
+
+
+### Features
+
+* add BIGNUMERIC support ([#527](https://www.github.com/googleapis/python-bigquery/issues/527)) ([cc3394f](https://www.github.com/googleapis/python-bigquery/commit/cc3394f80934419eb00c2029bb81c92a696e7d88))
+
+
+### Bug Fixes
+
+* error using empty array of structs parameter ([#474](https://www.github.com/googleapis/python-bigquery/issues/474)) ([c1d15f4](https://www.github.com/googleapis/python-bigquery/commit/c1d15f4e5da4b7e10c00afffd59a5c7f3ded027a))
+* QueryJob.exception() *returns* the errors, not raises them ([#467](https://www.github.com/googleapis/python-bigquery/issues/467)) ([d763279](https://www.github.com/googleapis/python-bigquery/commit/d7632799769248b09a8558ba18f5025ebdd9675a))
+
+
+### Documentation
+
+* **bigquery:** Add alternative approach to setting credentials ([#517](https://www.github.com/googleapis/python-bigquery/issues/517)) ([60fbf28](https://www.github.com/googleapis/python-bigquery/commit/60fbf287b0d34d5db2e61cce7a5b42735ed43d0e))
+* explain retry behavior for DONE jobs ([#532](https://www.github.com/googleapis/python-bigquery/issues/532)) ([696c443](https://www.github.com/googleapis/python-bigquery/commit/696c443f0a6740be0767e12b706a7771bc1460c3))
+
+## [2.9.0](https://www.github.com/googleapis/python-bigquery/compare/v2.8.0...v2.9.0) (2021-02-18)
+
+
+### Features
+
+* add determinism level for javascript UDFs ([#522](https://www.github.com/googleapis/python-bigquery/issues/522)) ([edd3328](https://www.github.com/googleapis/python-bigquery/commit/edd3328fffa3040b2cd3a3c668c90a0e43e4c94c))
+* expose reservation usage stats on jobs ([#524](https://www.github.com/googleapis/python-bigquery/issues/524)) ([4ffb4e0](https://www.github.com/googleapis/python-bigquery/commit/4ffb4e067abdaa54dad6eff49a7fbdb0fa358637))
+
+
+### Documentation
+
+* clarify `%%bigquery`` magics and fix broken link ([#508](https://www.github.com/googleapis/python-bigquery/issues/508)) ([eedf93b](https://www.github.com/googleapis/python-bigquery/commit/eedf93b6636c5ff1bd810c6038cfeaea8ccb64d8))
+* update python contributing guide ([#514](https://www.github.com/googleapis/python-bigquery/issues/514)) ([01e851d](https://www.github.com/googleapis/python-bigquery/commit/01e851d00fc17a780375580776753d78f6d74174))
+
+## [2.8.0](https://www.github.com/googleapis/python-bigquery/compare/v2.7.0...v2.8.0) (2021-02-08)
+
+
+### Features
+
+* Add mTLS support to client. ([#492](https://www.github.com/googleapis/python-bigquery/issues/492)) ([1823cad](https://www.github.com/googleapis/python-bigquery/commit/1823cadee3acf95c516d0479400e4175349ea199))
+
+
+### Bug Fixes
+
+* Don't try to close closed cursors. ([#498](https://www.github.com/googleapis/python-bigquery/issues/498)) ([bf44e7b](https://www.github.com/googleapis/python-bigquery/commit/bf44e7b67d2de41c13053a4550484b9ea049db3e))
+
## [2.7.0](https://www.github.com/googleapis/python-bigquery/compare/v2.6.2...v2.7.0) (2021-01-27)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index b3b802b49..2faf5aed3 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -21,8 +21,8 @@ In order to add a feature:
- The feature must be documented in both the API and narrative
documentation.
-- The feature must work fully on the following CPython versions: 2.7,
- 3.5, 3.6, 3.7 and 3.8 on both UNIX and Windows.
+- The feature must work fully on the following CPython versions:
+ 3.6, 3.7, 3.8 and 3.9 on both UNIX and Windows.
- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
@@ -68,10 +68,12 @@ Using ``nox``
We use `nox `__ to instrument our tests.
- To test your changes, run unit tests with ``nox``::
+ $ nox -s unit
+
+- To run a single unit test::
+
+ $ nox -s unit-3.9 -- -k
- $ nox -s unit-2.7
- $ nox -s unit-3.7
- $ ...
.. note::
@@ -93,8 +95,12 @@ On Debian/Ubuntu::
************
Coding Style
************
+- We use the automatic code formatter ``black``. You can run it using
+ the nox session ``blacken``. This will eliminate many lint errors. Run via::
+
+ $ nox -s blacken
-- PEP8 compliance, with exceptions defined in the linter configuration.
+- PEP8 compliance is required, with exceptions defined in the linter configuration.
If you have ``nox`` installed, you can test that you have not introduced
any non-compliant code via::
@@ -111,6 +117,16 @@ Coding Style
should point to the official ``googleapis`` checkout and the
the branch should be the main branch on that remote (``master``).
+- This repository contains configuration for the
+ `pre-commit `__ tool, which automates checking
+ our linters during a commit. If you have it installed on your ``$PATH``,
+ you can enable enforcing those checks via:
+
+.. code-block:: bash
+
+ $ pre-commit install
+ pre-commit installed at .git/hooks/pre-commit
+
Exceptions to PEP8:
- Many unit tests use a helper method, ``_call_fut`` ("FUT" is short for
@@ -123,34 +139,23 @@ Running System Tests
- To run system tests, you can execute::
- $ nox -s system-3.7
- $ nox -s system-2.7
+ # Run all system tests
+ $ nox -s system
+
+ # Run a single system test
+ $ nox -s system-3.8 -- -k
+
.. note::
- System tests are only configured to run under Python 2.7 and
- Python 3.7. For expediency, we do not run them in older versions
- of Python 3.
+ System tests are only configured to run under Python 3.8.
+ For expediency, we do not run them in older versions of Python 3.
This alone will not run the tests. You'll need to change some local
auth settings and change some configuration in your project to
run all the tests.
-- System tests will be run against an actual project and
- so you'll need to provide some environment variables to facilitate
- authentication to your project:
-
- - ``GOOGLE_APPLICATION_CREDENTIALS``: The path to a JSON key file;
- Such a file can be downloaded directly from the developer's console by clicking
- "Generate new JSON key". See private key
- `docs `__
- for more details.
-
-- Once you have downloaded your json keys, set the environment variable
- ``GOOGLE_APPLICATION_CREDENTIALS`` to the absolute path of the json file::
-
- $ export GOOGLE_APPLICATION_CREDENTIALS="/Users//path/to/app_credentials.json"
-
+- System tests will be run against an actual project. You should use local credentials from gcloud when possible. See `Best practices for application authentication `__. Some tests require a service account. For those tests see `Authenticating as a service account `__.
*************
Test Coverage
@@ -172,6 +177,30 @@ Build the docs via:
$ nox -s docs
+*************************
+Samples and code snippets
+*************************
+
+Code samples and snippets live in the `samples/` catalogue. Feel free to
+provide more examples, but make sure to write tests for those examples.
+Each folder containing example code requires its own `noxfile.py` script
+which automates testing. If you decide to create a new folder, you can
+base it on the `samples/snippets` folder (providing `noxfile.py` and
+the requirements files).
+
+The tests will run against a real Google Cloud Project, so you should
+configure them just like the System Tests.
+
+- To run sample tests, you can execute::
+
+ # Run all tests in a folder
+ $ cd samples/snippets
+ $ nox -s py-3.8
+
+ # Run a single sample test
+ $ cd samples/snippets
+ $ nox -s py-3.8 -- -k
+
********************************************
Note About ``README`` as it pertains to PyPI
********************************************
@@ -192,25 +221,24 @@ Supported Python Versions
We support:
-- `Python 3.5`_
- `Python 3.6`_
- `Python 3.7`_
- `Python 3.8`_
+- `Python 3.9`_
-.. _Python 3.5: https://docs.python.org/3.5/
.. _Python 3.6: https://docs.python.org/3.6/
.. _Python 3.7: https://docs.python.org/3.7/
.. _Python 3.8: https://docs.python.org/3.8/
+.. _Python 3.9: https://docs.python.org/3.9/
Supported versions can be found in our ``noxfile.py`` `config`_.
.. _config: https://github.com/googleapis/python-bigquery/blob/master/noxfile.py
-Python 2.7 support is deprecated. All code changes should maintain Python 2.7 compatibility until January 1, 2020.
-We also explicitly decided to support Python 3 beginning with version
-3.5. Reasons for this include:
+We also explicitly decided to support Python 3 beginning with version 3.6.
+Reasons for this include:
- Encouraging use of newest versions of Python 3
- Taking the lead of `prominent`_ open-source `projects`_
diff --git a/LICENSE b/LICENSE
index a8ee855de..d64569567 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,7 @@
- Apache License
+
+ Apache License
Version 2.0, January 2004
- https://www.apache.org/licenses/
+ http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
@@ -192,7 +193,7 @@
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
- https://www.apache.org/licenses/LICENSE-2.0
+ http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/MANIFEST.in b/MANIFEST.in
index e9e29d120..e783f4c62 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -16,10 +16,10 @@
# Generated by synthtool. DO NOT EDIT!
include README.rst LICENSE
-recursive-include google *.json *.proto
+recursive-include google *.json *.proto py.typed
recursive-include tests *
global-exclude *.py[co]
global-exclude __pycache__
# Exclude scripts for samples readmegen
-prune scripts/readme-gen
\ No newline at end of file
+prune scripts/readme-gen
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 000000000..8b58ae9c0
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,7 @@
+# Security Policy
+
+To report a security issue, please use [g.co/vulnz](https://g.co/vulnz).
+
+The Google Security Team will respond within 5 working days of your report on g.co/vulnz.
+
+We use g.co/vulnz for our intake, and do coordination and disclosure here using GitHub Security Advisory to privately discuss and fix the issue.
diff --git a/docs/_static/custom.css b/docs/_static/custom.css
index 0abaf229f..b0a295464 100644
--- a/docs/_static/custom.css
+++ b/docs/_static/custom.css
@@ -1,4 +1,20 @@
div#python2-eol {
border-color: red;
border-width: medium;
-}
\ No newline at end of file
+}
+
+/* Ensure minimum width for 'Parameters' / 'Returns' column */
+dl.field-list > dt {
+ min-width: 100px
+}
+
+/* Insert space between methods for readability */
+dl.method {
+ padding-top: 10px;
+ padding-bottom: 10px
+}
+
+/* Insert empty space between classes */
+dl.class {
+ padding-bottom: 50px
+}
diff --git a/docs/bigquery_v2/services.rst b/docs/bigquery_v2/services.rst
deleted file mode 100644
index 65fbb438c..000000000
--- a/docs/bigquery_v2/services.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Services for Google Cloud Bigquery v2 API
-=========================================
-
-.. automodule:: google.cloud.bigquery_v2.services.model_service
- :members:
- :inherited-members:
diff --git a/docs/bigquery_v2/types.rst b/docs/bigquery_v2/types.rst
index 41b906514..c36a83e0b 100644
--- a/docs/bigquery_v2/types.rst
+++ b/docs/bigquery_v2/types.rst
@@ -3,4 +3,5 @@ Types for Google Cloud Bigquery v2 API
.. automodule:: google.cloud.bigquery_v2.types
:members:
+ :undoc-members:
:show-inheritance:
diff --git a/docs/conf.py b/docs/conf.py
index 37e0c46af..59a2d8fb3 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,17 @@
# -*- coding: utf-8 -*-
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
#
# google-cloud-bigquery documentation build configuration file
#
@@ -67,9 +80,9 @@
master_doc = "index"
# General information about the project.
-project = u"google-cloud-bigquery"
-copyright = u"2019, Google"
-author = u"Google APIs"
+project = "google-cloud-bigquery"
+copyright = "2019, Google"
+author = "Google APIs"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
@@ -97,6 +110,7 @@
# directories to ignore when looking for source files.
exclude_patterns = [
"_build",
+ "**/.nox/**/*",
"samples/AUTHORING_GUIDE.md",
"samples/CONTRIBUTING.md",
"samples/snippets/README.rst",
@@ -269,7 +283,7 @@
(
master_doc,
"google-cloud-bigquery.tex",
- u"google-cloud-bigquery Documentation",
+ "google-cloud-bigquery Documentation",
author,
"manual",
)
@@ -304,7 +318,7 @@
(
master_doc,
"google-cloud-bigquery",
- u"google-cloud-bigquery Documentation",
+ "google-cloud-bigquery Documentation",
[author],
1,
)
@@ -323,7 +337,7 @@
(
master_doc,
"google-cloud-bigquery",
- u"google-cloud-bigquery Documentation",
+ "google-cloud-bigquery Documentation",
author,
"google-cloud-bigquery",
"google-cloud-bigquery Library",
@@ -351,6 +365,9 @@
"google.api_core": ("https://googleapis.dev/python/google-api-core/latest/", None,),
"grpc": ("https://grpc.github.io/grpc/python/", None),
"proto-plus": ("https://proto-plus-python.readthedocs.io/en/latest/", None),
+ "protobuf": ("https://googleapis.dev/python/protobuf/latest/", None),
+ "pandas": ("http://pandas.pydata.org/pandas-docs/dev", None),
+ "geopandas": ("https://geopandas.org/", None),
}
diff --git a/docs/dbapi.rst b/docs/dbapi.rst
index ca0256d3c..81f000bc7 100644
--- a/docs/dbapi.rst
+++ b/docs/dbapi.rst
@@ -4,3 +4,47 @@ DB-API Reference
.. automodule:: google.cloud.bigquery.dbapi
:members:
:show-inheritance:
+
+
+DB-API Query-Parameter Syntax
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The BigQuery DB-API uses the `qmark` `parameter style
+`_ for
+unnamed/positional parameters and the `pyformat` parameter style for
+named parameters.
+
+An example of a query using unnamed parameters::
+
+ insert into people (name, income) values (?, ?)
+
+and using named parameters::
+
+ insert into people (name, income) values (%(name)s, %(income)s)
+
+Providing explicit type information
+-----------------------------------
+
+BigQuery requires type information for parameters. The BigQuery
+DB-API can usually determine parameter types for parameters based on
+provided values. Sometimes, however, types can't be determined (for
+example when `None` is passed) or are determined incorrectly (for
+example when passing a floating-point value to a numeric column).
+
+The BigQuery DB-API provides an extended parameter syntax. For named
+parameters, a BigQuery type is provided after the name separated by a
+colon, as in::
+
+ insert into people (name, income) values (%(name:string)s, %(income:numeric)s)
+
+For unnamed parameters, use the named syntax with a type, but no
+name, as in::
+
+ insert into people (name, income) values (%(:string)s, %(:numeric)s)
+
+Providing type information is the *only* way to pass `struct` data::
+
+ cursor.execute(
+ "insert into points (point) values (%(:struct)s)",
+ [{"x": 10, "y": 20}],
+ )
diff --git a/docs/enums.rst b/docs/enums.rst
new file mode 100644
index 000000000..57608968a
--- /dev/null
+++ b/docs/enums.rst
@@ -0,0 +1,6 @@
+BigQuery Enums
+==============
+
+.. automodule:: google.cloud.bigquery.enums
+ :members:
+ :undoc-members:
diff --git a/docs/reference.rst b/docs/reference.rst
index 3643831cb..d8738e67b 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -58,13 +58,17 @@ Job-Related Types
job.Compression
job.CreateDisposition
job.DestinationFormat
+ job.DmlStats
job.Encoding
+ job.OperationType
job.QueryPlanEntry
job.QueryPlanEntryStep
job.QueryPriority
+ job.ReservationUsage
job.SourceFormat
job.WriteDisposition
job.SchemaUpdateOption
+ job.TransactionInfo
Dataset
@@ -89,6 +93,7 @@ Table
table.RangePartitioning
table.Row
table.RowIterator
+ table.SnapshotDefinition
table.Table
table.TableListItem
table.TableReference
@@ -110,9 +115,11 @@ Routine
.. autosummary::
:toctree: generated
+ routine.DeterminismLevel
routine.Routine
routine.RoutineArgument
routine.RoutineReference
+ routine.RoutineType
Schema
======
@@ -131,6 +138,7 @@ Query
query.ArrayQueryParameter
query.ScalarQueryParameter
+ query.ScalarQueryParameterType
query.StructQueryParameter
query.UDFResource
@@ -171,10 +179,11 @@ Magics
Enums
=====
-.. autosummary::
- :toctree: generated
+.. toctree::
+ :maxdepth: 2
+
+ enums
- enums.StandardSqlDataTypes
Encryption Configuration
========================
diff --git a/docs/snippets.py b/docs/snippets.py
index 8c106e63d..c62001fc0 100644
--- a/docs/snippets.py
+++ b/docs/snippets.py
@@ -363,7 +363,6 @@ def test_update_table_expiration(client, to_delete):
# [START bigquery_update_table_expiration]
import datetime
- import pytz
# from google.cloud import bigquery
# client = bigquery.Client()
@@ -375,7 +374,9 @@ def test_update_table_expiration(client, to_delete):
assert table.expires is None
# set table to expire 5 days from now
- expiration = datetime.datetime.now(pytz.utc) + datetime.timedelta(days=5)
+ expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
+ days=5
+ )
table.expires = expiration
table = client.update_table(table, ["expires"]) # API request
@@ -478,132 +479,6 @@ def test_update_table_cmek(client, to_delete):
# [END bigquery_update_table_cmek]
-@pytest.mark.skip(
- reason=(
- "update_table() is flaky "
- "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
- )
-)
-def test_manage_views(client, to_delete):
- project = client.project
- source_dataset_id = "source_dataset_{}".format(_millis())
- source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id)
- source_dataset = bigquery.Dataset(source_dataset_ref)
- source_dataset = client.create_dataset(source_dataset)
- to_delete.append(source_dataset)
-
- job_config = bigquery.LoadJobConfig()
- job_config.schema = [
- bigquery.SchemaField("name", "STRING"),
- bigquery.SchemaField("post_abbr", "STRING"),
- ]
- job_config.skip_leading_rows = 1
- uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
- source_table_id = "us_states"
- load_job = client.load_table_from_uri(
- uri, source_dataset.table(source_table_id), job_config=job_config
- )
- load_job.result()
-
- shared_dataset_id = "shared_dataset_{}".format(_millis())
- shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id)
- shared_dataset = bigquery.Dataset(shared_dataset_ref)
- shared_dataset = client.create_dataset(shared_dataset)
- to_delete.append(shared_dataset)
-
- # [START bigquery_create_view]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = 'my-project'
- # source_dataset_id = 'my_source_dataset'
- # source_table_id = 'us_states'
- # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset')
-
- # This example shows how to create a shared view of a source table of
- # US States. The source table contains all 50 states, while the view will
- # contain only states with names starting with 'W'.
- view_ref = shared_dataset_ref.table("my_shared_view")
- view = bigquery.Table(view_ref)
- sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "W%"'
- view.view_query = sql_template.format(project, source_dataset_id, source_table_id)
- view = client.create_table(view) # API request
-
- print("Successfully created view at {}".format(view.full_table_id))
- # [END bigquery_create_view]
-
- # [START bigquery_update_view_query]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = 'my-project'
- # source_dataset_id = 'my_source_dataset'
- # source_table_id = 'us_states'
- # shared_dataset_ref = bigquery.DatasetReference(project, 'my_shared_dataset')
-
- # This example shows how to update a shared view of a source table of
- # US States. The view's query will be updated to contain only states with
- # names starting with 'M'.
- view_ref = shared_dataset_ref.table("my_shared_view")
- view = bigquery.Table(view_ref)
- sql_template = 'SELECT name, post_abbr FROM `{}.{}.{}` WHERE name LIKE "M%"'
- view.view_query = sql_template.format(project, source_dataset_id, source_table_id)
- view = client.update_table(view, ["view_query"]) # API request
- # [END bigquery_update_view_query]
-
- # [START bigquery_get_view]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # shared_dataset_id = 'my_shared_dataset'
- project = client.project
- shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id)
- view_ref = shared_dataset_ref.table("my_shared_view")
- view = client.get_table(view_ref) # API Request
-
- # Display view properties
- print("View at {}".format(view.full_table_id))
- print("View Query:\n{}".format(view.view_query))
- # [END bigquery_get_view]
- assert view.view_query is not None
-
- analyst_group_email = "example-analyst-group@google.com"
- # [START bigquery_grant_view_access]
- # from google.cloud import bigquery
- # client = bigquery.Client()
-
- # Assign access controls to the dataset containing the view
- # shared_dataset_id = 'my_shared_dataset'
- # analyst_group_email = 'data_analysts@example.com'
- project = client.project
- shared_dataset_ref = bigquery.DatasetReference(project, shared_dataset_id)
- shared_dataset = client.get_dataset(shared_dataset_ref) # API request
- access_entries = shared_dataset.access_entries
- access_entries.append(
- bigquery.AccessEntry("READER", "groupByEmail", analyst_group_email)
- )
- shared_dataset.access_entries = access_entries
- shared_dataset = client.update_dataset(
- shared_dataset, ["access_entries"]
- ) # API request
-
- # Authorize the view to access the source dataset
- # project = 'my-project'
- # source_dataset_id = 'my_source_dataset'
- project = client.project
- source_dataset_ref = bigquery.DatasetReference(project, source_dataset_id)
- source_dataset = client.get_dataset(source_dataset_ref) # API request
- view_reference = {
- "projectId": project,
- "datasetId": shared_dataset_id,
- "tableId": "my_shared_view",
- }
- access_entries = source_dataset.access_entries
- access_entries.append(bigquery.AccessEntry(None, "view", view_reference))
- source_dataset.access_entries = access_entries
- source_dataset = client.update_dataset(
- source_dataset, ["access_entries"]
- ) # API request
- # [END bigquery_grant_view_access]
-
-
def test_load_table_add_column(client, to_delete):
dataset_id = "load_table_add_column_{}".format(_millis())
project = client.project
diff --git a/docs/usage/index.rst b/docs/usage/index.rst
index ff4c9d7f1..1d3cc9f64 100644
--- a/docs/usage/index.rst
+++ b/docs/usage/index.rst
@@ -29,7 +29,7 @@ Integrations with Other Libraries
pandas
-See also, the :mod:`google.cloud.bigquery.magics` module for integrations
-with Jupyter.
+See also, the :mod:`google.cloud.bigquery.magics.magics` module for
+integrations with Jupyter.
diff --git a/docs/usage/pandas.rst b/docs/usage/pandas.rst
index 9db98dfbb..92eee67cf 100644
--- a/docs/usage/pandas.rst
+++ b/docs/usage/pandas.rst
@@ -37,6 +37,21 @@ To retrieve table rows as a :class:`pandas.DataFrame`:
:start-after: [START bigquery_list_rows_dataframe]
:end-before: [END bigquery_list_rows_dataframe]
+
+Retrieve BigQuery GEOGRAPHY data as a GeoPandas GeoDataFrame
+------------------------------------------------------------
+
+`GeoPandas `_ adds geospatial analytics
+capabilities to Pandas. To retrieve query results containing
+GEOGRAPHY data as a :class:`geopandas.GeoDataFrame`:
+
+.. literalinclude:: ../samples/geography/to_geodataframe.py
+ :language: python
+ :dedent: 4
+ :start-after: [START bigquery_query_results_geodataframe]
+ :end-before: [END bigquery_query_results_geodataframe]
+
+
Load a Pandas DataFrame to a BigQuery Table
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py
index 41f987228..a7a0da3dd 100644
--- a/google/cloud/bigquery/__init__.py
+++ b/google/cloud/bigquery/__init__.py
@@ -37,8 +37,12 @@
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery import enums
+from google.cloud.bigquery.enums import AutoRowIDs
+from google.cloud.bigquery.enums import DecimalTargetType
+from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import SqlTypeNames
from google.cloud.bigquery.enums import StandardSqlDataTypes
+from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.external_config import BigtableOptions
from google.cloud.bigquery.external_config import BigtableColumnFamily
@@ -46,37 +50,48 @@
from google.cloud.bigquery.external_config import CSVOptions
from google.cloud.bigquery.external_config import GoogleSheetsOptions
from google.cloud.bigquery.external_config import ExternalSourceFormat
+from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job import Compression
from google.cloud.bigquery.job import CopyJob
from google.cloud.bigquery.job import CopyJobConfig
from google.cloud.bigquery.job import CreateDisposition
from google.cloud.bigquery.job import DestinationFormat
+from google.cloud.bigquery.job import DmlStats
from google.cloud.bigquery.job import Encoding
from google.cloud.bigquery.job import ExtractJob
from google.cloud.bigquery.job import ExtractJobConfig
from google.cloud.bigquery.job import LoadJob
from google.cloud.bigquery.job import LoadJobConfig
+from google.cloud.bigquery.job import OperationType
from google.cloud.bigquery.job import QueryJob
from google.cloud.bigquery.job import QueryJobConfig
from google.cloud.bigquery.job import QueryPriority
from google.cloud.bigquery.job import SchemaUpdateOption
+from google.cloud.bigquery.job import ScriptOptions
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import UnknownJob
+from google.cloud.bigquery.job import TransactionInfo
from google.cloud.bigquery.job import WriteDisposition
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.query import ArrayQueryParameter
+from google.cloud.bigquery.query import ArrayQueryParameterType
from google.cloud.bigquery.query import ScalarQueryParameter
+from google.cloud.bigquery.query import ScalarQueryParameterType
from google.cloud.bigquery.query import StructQueryParameter
+from google.cloud.bigquery.query import StructQueryParameterType
from google.cloud.bigquery.query import UDFResource
from google.cloud.bigquery.retry import DEFAULT_RETRY
+from google.cloud.bigquery.routine import DeterminismLevel
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineArgument
from google.cloud.bigquery.routine import RoutineReference
+from google.cloud.bigquery.routine import RoutineType
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import PartitionRange
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import Row
+from google.cloud.bigquery.table import SnapshotDefinition
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TimePartitioningType
@@ -92,6 +107,9 @@
"ArrayQueryParameter",
"ScalarQueryParameter",
"StructQueryParameter",
+ "ArrayQueryParameterType",
+ "ScalarQueryParameterType",
+ "StructQueryParameterType",
# Datasets
"Dataset",
"DatasetReference",
@@ -102,6 +120,7 @@
"PartitionRange",
"RangePartitioning",
"Row",
+ "SnapshotDefinition",
"TimePartitioning",
"TimePartitioningType",
# Jobs
@@ -126,17 +145,27 @@
"BigtableOptions",
"BigtableColumnFamily",
"BigtableColumn",
+ "DmlStats",
"CSVOptions",
"GoogleSheetsOptions",
+ "ParquetOptions",
+ "ScriptOptions",
+ "TransactionInfo",
"DEFAULT_RETRY",
# Enum Constants
"enums",
+ "AutoRowIDs",
"Compression",
"CreateDisposition",
+ "DecimalTargetType",
"DestinationFormat",
+ "DeterminismLevel",
"ExternalSourceFormat",
"Encoding",
+ "KeyResultStatementKind",
+ "OperationType",
"QueryPriority",
+ "RoutineType",
"SchemaUpdateOption",
"SourceFormat",
"SqlTypeNames",
@@ -144,6 +173,8 @@
"WriteDisposition",
# EncryptionConfiguration
"EncryptionConfiguration",
+ # Custom exceptions
+ "LegacyBigQueryStorageError",
]
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index daa14b92a..0a1f71444 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -17,7 +17,9 @@
import base64
import datetime
import decimal
+import math
import re
+from typing import Union
from google.cloud._helpers import UTC
from google.cloud._helpers import _date_from_iso8601_date
@@ -25,6 +27,10 @@
from google.cloud._helpers import _RFC3339_MICROS
from google.cloud._helpers import _RFC3339_NO_FRACTION
from google.cloud._helpers import _to_bytes
+import packaging.version
+
+from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
_TIMEONLY_WO_MICROS = "%H:%M:%S"
@@ -36,6 +42,66 @@
re.VERBOSE,
)
+_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
+_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
+
+
+class BQStorageVersions:
+ """Version comparisons for google-cloud-bigqueyr-storage package."""
+
+ def __init__(self):
+ self._installed_version = None
+
+ @property
+ def installed_version(self) -> packaging.version.Version:
+ """Return the parsed version of google-cloud-bigquery-storage."""
+ if self._installed_version is None:
+ from google.cloud import bigquery_storage
+
+ self._installed_version = packaging.version.parse(
+ # Use 0.0.0, since it is earlier than any released version.
+ # Legacy versions also have the same property, but
+ # creating a LegacyVersion has been deprecated.
+ # https://github.com/pypa/packaging/issues/321
+ getattr(bigquery_storage, "__version__", "0.0.0")
+ )
+
+ return self._installed_version
+
+ @property
+ def is_read_session_optional(self) -> bool:
+ """True if read_session is optional to rows().
+
+ See: https://github.com/googleapis/python-bigquery-storage/pull/228
+ """
+ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION
+
+ def verify_version(self):
+ """Verify that a recent enough version of BigQuery Storage extra is
+ installed.
+
+ The function assumes that google-cloud-bigquery-storage extra is
+ installed, and should thus be used in places where this assumption
+ holds.
+
+ Because `pip` can install an outdated version of this extra despite the
+ constraints in `setup.py`, the calling code can use this helper to
+ verify the version compatibility at runtime.
+
+ Raises:
+ LegacyBigQueryStorageError:
+ If the google-cloud-bigquery-storage package is outdated.
+ """
+ if self.installed_version < _MIN_BQ_STORAGE_VERSION:
+ msg = (
+ "Dependency google-cloud-bigquery-storage is outdated, please upgrade "
+ f"it to version >= 2.0.0 (version found: {self.installed_version})."
+ )
+ raise LegacyBigQueryStorageError(msg)
+
+
+BQ_STORAGE_VERSIONS = BQStorageVersions()
+
def _not_null(value, field):
"""Check whether 'value' should be coerced to 'field' type."""
@@ -273,9 +339,15 @@ def _int_to_json(value):
return value
-def _float_to_json(value):
+def _float_to_json(value) -> Union[None, str, float]:
"""Coerce 'value' to an JSON-compatible representation."""
- return value
+ if value is None:
+ return None
+
+ if isinstance(value, str):
+ value = float(value)
+
+ return str(value) if (math.isnan(value) or math.isinf(value)) else float(value)
def _decimal_to_json(value):
@@ -363,6 +435,11 @@ def _time_to_json(value):
"DATETIME": _datetime_to_json,
"DATE": _date_to_json,
"TIME": _time_to_json,
+ # Make sure DECIMAL and BIGDECIMAL are handled, even though
+ # requests for them should be converted to NUMERIC. Better safe
+ # than sorry.
+ "DECIMAL": _decimal_to_json,
+ "BIGDECIMAL": _decimal_to_json,
}
diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py
index 8ee633e64..81e7922e6 100644
--- a/google/cloud/bigquery/_http.py
+++ b/google/cloud/bigquery/_http.py
@@ -14,11 +14,22 @@
"""Create / interact with Google BigQuery connections."""
-from google.cloud import _http
+import os
+import pkg_resources
+from google.cloud import _http # pytype: disable=import-error
from google.cloud.bigquery import __version__
+# TODO: Increase the minimum version of google-cloud-core to 1.6.0
+# and remove this logic. See:
+# https://github.com/googleapis/python-bigquery/issues/509
+if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER
+ release = pkg_resources.get_distribution("google-cloud-core").parsed_version
+ if release < pkg_resources.parse_version("1.6.0"):
+ raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature")
+
+
class Connection(_http.JSONConnection):
"""A connection to Google BigQuery via the JSON REST API.
@@ -26,13 +37,18 @@ class Connection(_http.JSONConnection):
client (google.cloud.bigquery.client.Client): The client that owns the current connection.
client_info (Optional[google.api_core.client_info.ClientInfo]): Instance used to generate user agent.
+
+ api_endpoint (str): The api_endpoint to use. If None, the library will decide what endpoint to use.
"""
DEFAULT_API_ENDPOINT = "https://bigquery.googleapis.com"
+ DEFAULT_API_MTLS_ENDPOINT = "https://bigquery.mtls.googleapis.com"
- def __init__(self, client, client_info=None, api_endpoint=DEFAULT_API_ENDPOINT):
+ def __init__(self, client, client_info=None, api_endpoint=None):
super(Connection, self).__init__(client, client_info)
- self.API_BASE_URL = api_endpoint
+ self.API_BASE_URL = api_endpoint or self.DEFAULT_API_ENDPOINT
+ self.API_BASE_MTLS_URL = self.DEFAULT_API_MTLS_ENDPOINT
+ self.ALLOW_AUTO_SWITCH_TO_MTLS_URL = api_endpoint is None
self._client_info.gapic_version = __version__
self._client_info.client_library_version = __version__
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 162c58b4b..ab58b1729 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -20,11 +20,40 @@
import queue
import warnings
-
try:
import pandas
except ImportError: # pragma: NO COVER
pandas = None
+else:
+ import numpy
+
+try:
+ # _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
+ from shapely.geometry.base import BaseGeometry as _BaseGeometry
+except ImportError: # pragma: NO COVER
+ # No shapely, use NoneType for _BaseGeometry as a placeholder.
+ _BaseGeometry = type(None)
+else:
+ if pandas is not None: # pragma: NO COVER
+
+ def _to_wkb():
+ # Create a closure that:
+ # - Adds a not-null check. This allows the returned function to
+ # be used directly with apply, unlike `shapely.wkb.dumps`.
+ # - Avoid extra work done by `shapely.wkb.dumps` that we don't need.
+ # - Caches the WKBWriter (and write method lookup :) )
+ # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace.
+ from shapely.geos import WKBWriter, lgeos
+
+ write = WKBWriter(lgeos).write
+ notnull = pandas.notnull
+
+ def _to_wkb(v):
+ return write(v) if notnull(v) else v
+
+ return _to_wkb
+
+ _to_wkb = _to_wkb()
try:
import pyarrow
@@ -32,6 +61,15 @@
except ImportError: # pragma: NO COVER
pyarrow = None
+try:
+ from google.cloud.bigquery_storage import ArrowSerializationOptions
+except ImportError:
+ _ARROW_COMPRESSION_SUPPORT = False
+else:
+ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
+ _ARROW_COMPRESSION_SUPPORT = True
+
+from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema
@@ -44,6 +82,8 @@
_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.
+_MAX_QUEUE_SIZE_DEFAULT = object() # max queue size sentinel for BQ Storage downloads
+
_PANDAS_DTYPE_TO_BQ = {
"bool": "BOOLEAN",
"datetime64[ns, UTC]": "TIMESTAMP",
@@ -59,6 +99,7 @@
"uint8": "INTEGER",
"uint16": "INTEGER",
"uint32": "INTEGER",
+ "geometry": "GEOGRAPHY",
}
@@ -80,6 +121,12 @@ def pyarrow_numeric():
return pyarrow.decimal128(38, 9)
+def pyarrow_bignumeric():
+ # 77th digit is partial.
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
+ return pyarrow.decimal256(76, 38)
+
+
def pyarrow_time():
return pyarrow.time64("us")
@@ -92,6 +139,7 @@ def pyarrow_timestamp():
# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
# When modifying it be sure to update it there as well.
BQ_TO_ARROW_SCALARS = {
+ "BIGNUMERIC": pyarrow_bignumeric,
"BOOL": pyarrow.bool_,
"BOOLEAN": pyarrow.bool_,
"BYTES": pyarrow.binary,
@@ -128,9 +176,10 @@ def pyarrow_timestamp():
pyarrow.date64().id: "DATETIME", # because millisecond resolution
pyarrow.binary().id: "BYTES",
pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
- pyarrow.decimal128(38, scale=9).id: "NUMERIC",
# The exact decimal's scale and precision are not important, as only
- # the type ID matters, and it's the same for all decimal128 instances.
+ # the type ID matters, and it's the same for all decimal256 instances.
+ pyarrow.decimal128(38, scale=9).id: "NUMERIC",
+ pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
}
else: # pragma: NO COVER
@@ -175,14 +224,16 @@ def bq_to_arrow_data_type(field):
return data_type_constructor()
-def bq_to_arrow_field(bq_field):
+def bq_to_arrow_field(bq_field, array_type=None):
"""Return the Arrow field, corresponding to a given BigQuery column.
Returns:
None: if the Arrow type cannot be determined.
"""
arrow_type = bq_to_arrow_data_type(bq_field)
- if arrow_type:
+ if arrow_type is not None:
+ if array_type is not None:
+ arrow_type = array_type # For GEOGRAPHY, at least initially
is_nullable = bq_field.mode.upper() == "NULLABLE"
return pyarrow.field(bq_field.name, arrow_type, nullable=is_nullable)
@@ -207,7 +258,24 @@ def bq_to_arrow_schema(bq_schema):
def bq_to_arrow_array(series, bq_field):
- arrow_type = bq_to_arrow_data_type(bq_field)
+ if bq_field.field_type.upper() == "GEOGRAPHY":
+ arrow_type = None
+ first = _first_valid(series)
+ if first is not None:
+ if series.dtype.name == "geometry" or isinstance(first, _BaseGeometry):
+ arrow_type = pyarrow.binary()
+ # Convert shapey geometry to WKB binary format:
+ series = series.apply(_to_wkb)
+ elif isinstance(first, bytes):
+ arrow_type = pyarrow.binary()
+ elif series.dtype.name == "geometry":
+ # We have a GeoSeries containing all nulls, convert it to a pandas series
+ series = pandas.Series(numpy.array(series))
+
+ if arrow_type is None:
+ arrow_type = bq_to_arrow_data_type(bq_field)
+ else:
+ arrow_type = bq_to_arrow_data_type(bq_field)
field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""
@@ -261,6 +329,12 @@ def list_columns_and_indexes(dataframe):
return columns_and_indexes
+def _first_valid(series):
+ first_valid_index = series.first_valid_index()
+ if first_valid_index is not None:
+ return series.at[first_valid_index]
+
+
def dataframe_to_bq_schema(dataframe, bq_schema):
"""Convert a pandas DataFrame schema to a BigQuery schema.
@@ -301,6 +375,13 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
# Otherwise, try to automatically determine the type based on the
# pandas dtype.
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
+ if bq_type is None:
+ sample_data = _first_valid(dataframe[column])
+ if (
+ isinstance(sample_data, _BaseGeometry)
+ and sample_data is not None # Paranoia
+ ):
+ bq_type = "GEOGRAPHY"
bq_field = schema.SchemaField(column, bq_type)
bq_schema_out.append(bq_field)
@@ -349,6 +430,7 @@ def augment_schema(dataframe, current_bq_schema):
Returns:
Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]
"""
+ # pytype: disable=attribute-error
augmented_schema = []
unknown_type_fields = []
@@ -382,6 +464,7 @@ def augment_schema(dataframe, current_bq_schema):
return None
return augmented_schema
+ # pytype: enable=attribute-error
def dataframe_to_arrow(dataframe, bq_schema):
@@ -430,11 +513,11 @@ def dataframe_to_arrow(dataframe, bq_schema):
arrow_names = []
arrow_fields = []
for bq_field in bq_schema:
- arrow_fields.append(bq_to_arrow_field(bq_field))
arrow_names.append(bq_field.name)
arrow_arrays.append(
bq_to_arrow_array(get_column_or_index(dataframe, bq_field.name), bq_field)
)
+ arrow_fields.append(bq_to_arrow_field(bq_field, arrow_arrays[-1].type))
if all((field is not None for field in arrow_fields)):
return pyarrow.Table.from_arrays(
@@ -564,7 +647,14 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page):
def _download_table_bqstorage_stream(
download_state, bqstorage_client, session, stream, worker_queue, page_to_item
):
- rowstream = bqstorage_client.read_rows(stream.name).rows(session)
+ reader = bqstorage_client.read_rows(stream.name)
+
+ # Avoid deprecation warnings for passing in unnecessary read session.
+ # https://github.com/googleapis/python-bigquery-storage/issues/229
+ if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional:
+ rowstream = reader.rows()
+ else:
+ rowstream = reader.rows(session)
for page in rowstream.pages:
if download_state.done:
@@ -594,6 +684,7 @@ def _download_table_bqstorage(
preserve_order=False,
selected_fields=None,
page_to_item=None,
+ max_queue_size=_MAX_QUEUE_SIZE_DEFAULT,
):
"""Use (faster, but billable) BQ Storage API to construct DataFrame."""
@@ -617,6 +708,11 @@ def _download_table_bqstorage(
for field in selected_fields:
requested_session.read_options.selected_fields.append(field.name)
+ if _ARROW_COMPRESSION_SUPPORT:
+ requested_session.read_options.arrow_serialization_options.buffer_compression = (
+ ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
+ )
+
session = bqstorage_client.create_read_session(
parent="projects/{}".format(project_id),
read_session=requested_session,
@@ -640,7 +736,17 @@ def _download_table_bqstorage(
download_state = _DownloadState()
# Create a queue to collect frames as they are created in each thread.
- worker_queue = queue.Queue()
+ #
+ # The queue needs to be bounded by default, because if the user code processes the
+ # fetched result pages too slowly, while at the same time new pages are rapidly being
+ # fetched from the server, the queue can grow to the point where the process runs
+ # out of memory.
+ if max_queue_size is _MAX_QUEUE_SIZE_DEFAULT:
+ max_queue_size = total_streams
+ elif max_queue_size is None:
+ max_queue_size = 0 # unbounded
+
+ worker_queue = queue.Queue(maxsize=max_queue_size)
with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool:
try:
@@ -681,15 +787,12 @@ def _download_table_bqstorage(
continue
# Return any remaining values after the workers finished.
- while not worker_queue.empty(): # pragma: NO COVER
+ while True: # pragma: NO COVER
try:
- # Include a timeout because even though the queue is
- # non-empty, it doesn't guarantee that a subsequent call to
- # get() will not block.
- frame = worker_queue.get(timeout=_PROGRESS_INTERVAL)
+ frame = worker_queue.get_nowait()
yield frame
except queue.Empty: # pragma: NO COVER
- continue
+ break
finally:
# No need for a lock because reading/replacing a variable is
# defined to be an atomic operation in the Python language
@@ -702,7 +805,7 @@ def _download_table_bqstorage(
def download_arrow_bqstorage(
- project_id, table, bqstorage_client, preserve_order=False, selected_fields=None
+ project_id, table, bqstorage_client, preserve_order=False, selected_fields=None,
):
return _download_table_bqstorage(
project_id,
@@ -722,6 +825,7 @@ def download_dataframe_bqstorage(
dtypes,
preserve_order=False,
selected_fields=None,
+ max_queue_size=_MAX_QUEUE_SIZE_DEFAULT,
):
page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes)
return _download_table_bqstorage(
@@ -731,6 +835,7 @@ def download_dataframe_bqstorage(
preserve_order=preserve_order,
selected_fields=selected_fields,
page_to_item=page_to_item,
+ max_queue_size=max_queue_size,
)
@@ -739,7 +844,7 @@ def dataframe_to_json_generator(dataframe):
output = {}
for column, value in zip(dataframe.columns, row):
# Omit NaN values.
- if value != value:
+ if pandas.isna(value):
continue
output[column] = value
yield output
diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py
index 2fcf2a981..99e720e2b 100644
--- a/google/cloud/bigquery/_tqdm_helpers.py
+++ b/google/cloud/bigquery/_tqdm_helpers.py
@@ -16,6 +16,8 @@
import concurrent.futures
import time
+import typing
+from typing import Optional
import warnings
try:
@@ -23,6 +25,10 @@
except ImportError: # pragma: NO COVER
tqdm = None
+if typing.TYPE_CHECKING: # pragma: NO COVER
+ from google.cloud.bigquery import QueryJob
+ from google.cloud.bigquery.table import RowIterator
+
_NO_TQDM_ERROR = (
"A progress bar was requested, but there was an error loading the tqdm "
"library. Please install tqdm to use the progress bar functionality."
@@ -32,7 +38,7 @@
def get_progress_bar(progress_bar_type, description, total, unit):
- """Construct a tqdm progress bar object, if tqdm is ."""
+ """Construct a tqdm progress bar object, if tqdm is installed."""
if tqdm is None:
if progress_bar_type is not None:
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
@@ -53,16 +59,34 @@ def get_progress_bar(progress_bar_type, description, total, unit):
return None
-def wait_for_query(query_job, progress_bar_type=None):
- """Return query result and display a progress bar while the query running, if tqdm is installed."""
+def wait_for_query(
+ query_job: "QueryJob",
+ progress_bar_type: Optional[str] = None,
+ max_results: Optional[int] = None,
+) -> "RowIterator":
+ """Return query result and display a progress bar while the query running, if tqdm is installed.
+
+ Args:
+ query_job:
+ The job representing the execution of the query on the server.
+ progress_bar_type:
+ The type of progress bar to use to show query progress.
+ max_results:
+ The maximum number of rows the row iterator should return.
+
+ Returns:
+ A row iterator over the query results.
+ """
default_total = 1
current_stage = None
start_time = time.time()
+
progress_bar = get_progress_bar(
progress_bar_type, "Query is running", default_total, "query"
)
if progress_bar is None:
- return query_job.result()
+ return query_job.result(max_results=max_results)
+
i = 0
while True:
if query_job.query_plan:
@@ -75,7 +99,9 @@ def wait_for_query(query_job, progress_bar_type=None):
),
)
try:
- query_result = query_job.result(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)
+ query_result = query_job.result(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=max_results
+ )
progress_bar.update(default_total)
progress_bar.set_description(
"Query complete after {:0.2f}s".format(time.time() - start_time),
@@ -89,5 +115,6 @@ def wait_for_query(query_job, progress_bar_type=None):
progress_bar.update(i + 1)
i += 1
continue
+
progress_bar.close()
return query_result
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
index b270075a9..023346ffa 100644
--- a/google/cloud/bigquery/client.py
+++ b/google/cloud/bigquery/client.py
@@ -19,6 +19,7 @@
from collections import abc as collections_abc
import copy
+import datetime
import functools
import gzip
import io
@@ -26,44 +27,73 @@
import json
import math
import os
+import packaging.version
import tempfile
+from typing import Any, BinaryIO, Dict, Iterable, Optional, Sequence, Tuple, Union
import uuid
import warnings
try:
import pyarrow
+
+ _PYARROW_VERSION = packaging.version.parse(pyarrow.__version__)
except ImportError: # pragma: NO COVER
pyarrow = None
-from google import resumable_media
+from google import resumable_media # type: ignore
from google.resumable_media.requests import MultipartUpload
from google.resumable_media.requests import ResumableUpload
import google.api_core.client_options
-import google.api_core.exceptions
+import google.api_core.exceptions as core_exceptions
from google.api_core.iam import Policy
from google.api_core import page_iterator
+from google.api_core import retry as retries
import google.cloud._helpers
-from google.cloud import exceptions
-from google.cloud.client import ClientWithProject
+from google.cloud import exceptions # pytype: disable=import-error
+from google.cloud.client import ClientWithProject # pytype: disable=import-error
+
+try:
+ from google.cloud.bigquery_storage_v1.services.big_query_read.client import (
+ DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO,
+ )
+except ImportError:
+ DEFAULT_BQSTORAGE_CLIENT_INFO = None
from google.cloud.bigquery._helpers import _del_sub_prop
from google.cloud.bigquery._helpers import _get_sub_prop
from google.cloud.bigquery._helpers import _record_field_to_json
from google.cloud.bigquery._helpers import _str_or_none
+from google.cloud.bigquery._helpers import BQ_STORAGE_VERSIONS
from google.cloud.bigquery._helpers import _verify_job_config_type
from google.cloud.bigquery._http import Connection
from google.cloud.bigquery import _pandas_helpers
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.enums import AutoRowIDs
+from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.opentelemetry_tracing import create_span
from google.cloud.bigquery import job
+from google.cloud.bigquery.job import (
+ CopyJob,
+ CopyJobConfig,
+ ExtractJob,
+ ExtractJobConfig,
+ LoadJob,
+ LoadJobConfig,
+ QueryJob,
+ QueryJobConfig,
+)
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.model import _model_arg_to_model_ref
from google.cloud.bigquery.query import _QueryResults
-from google.cloud.bigquery.retry import DEFAULT_RETRY
+from google.cloud.bigquery.retry import (
+ DEFAULT_JOB_RETRY,
+ DEFAULT_RETRY,
+ DEFAULT_TIMEOUT,
+)
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.schema import SchemaField
@@ -75,13 +105,10 @@
from google.cloud.bigquery.table import RowIterator
-_DEFAULT_CHUNKSIZE = 1048576 # 1024 * 1024 B = 1 MB
+_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
_DEFAULT_NUM_RETRIES = 6
-_BASE_UPLOAD_TEMPLATE = (
- "https://bigquery.googleapis.com/upload/bigquery/v2/projects/"
- "{project}/jobs?uploadType="
-)
+_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="
_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
_GENERIC_CONTENT_TYPE = "*/*"
@@ -101,6 +128,9 @@
# https://github.com/googleapis/python-bigquery/issues/438
_MIN_GET_QUERY_RESULTS_TIMEOUT = 120
+# https://github.com/googleapis/python-bigquery/issues/781#issuecomment-883497414
+_PYARROW_BAD_VERSIONS = frozenset([packaging.version.Version("2.0.0")])
+
class Project(object):
"""Wrapper for resource describing a BigQuery project.
@@ -219,8 +249,11 @@ def close(self):
self._http.close()
def get_service_account_email(
- self, project=None, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ project: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> str:
"""Get the email address of the project's BigQuery service account
Note:
@@ -262,8 +295,13 @@ def get_service_account_email(
return api_response["email"]
def list_projects(
- self, max_results=None, page_token=None, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ max_results: int = None,
+ page_token: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""List projects for the project associated with this client.
See
@@ -271,8 +309,8 @@ def list_projects(
Args:
max_results (Optional[int]):
- Maximum number of projects to return, If not passed,
- defaults to a value set by the API.
+ Maximum number of projects to return.
+ Defaults to a value set by the API.
page_token (Optional[str]):
Token representing a cursor into the projects. If not passed,
@@ -287,6 +325,10 @@ def list_projects(
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (Optional[int]):
+ Maximum number of projects to return in each page.
+ Defaults to a value set by the API.
+
Returns:
google.api_core.page_iterator.Iterator:
Iterator of :class:`~google.cloud.bigquery.client.Project`
@@ -312,18 +354,20 @@ def api_request(*args, **kwargs):
items_key="projects",
page_token=page_token,
max_results=max_results,
+ page_size=page_size,
)
def list_datasets(
self,
- project=None,
- include_all=False,
- filter=None,
- max_results=None,
- page_token=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ project: str = None,
+ include_all: bool = False,
+ filter: str = None,
+ max_results: int = None,
+ page_token: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""List datasets for the project associated with this client.
See
@@ -352,6 +396,8 @@ def list_datasets(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (Optional[int]):
+ Maximum number of datasets to return per page.
Returns:
google.api_core.page_iterator.Iterator:
@@ -391,9 +437,10 @@ def api_request(*args, **kwargs):
page_token=page_token,
max_results=max_results,
extra_params=extra_params,
+ page_size=page_size,
)
- def dataset(self, dataset_id, project=None):
+ def dataset(self, dataset_id: str, project: str = None) -> DatasetReference:
"""Deprecated: Construct a reference to a dataset.
.. deprecated:: 1.24.0
@@ -431,15 +478,38 @@ def dataset(self, dataset_id, project=None):
)
return DatasetReference(project, dataset_id)
- def _create_bqstorage_client(self):
+ def _ensure_bqstorage_client(
+ self,
+ bqstorage_client: Optional[
+ "google.cloud.bigquery_storage.BigQueryReadClient"
+ ] = None,
+ client_options: Optional[google.api_core.client_options.ClientOptions] = None,
+ client_info: Optional[
+ "google.api_core.gapic_v1.client_info.ClientInfo"
+ ] = DEFAULT_BQSTORAGE_CLIENT_INFO,
+ ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]:
"""Create a BigQuery Storage API client using this client's credentials.
- If a client cannot be created due to missing dependencies, raise a
- warning and return ``None``.
+ If a client cannot be created due to a missing or outdated dependency
+ `google-cloud-bigquery-storage`, raise a warning and return ``None``.
+
+ If the `bqstorage_client` argument is not ``None``, still perform the version
+ check and return the argument back to the caller if the check passes. If it
+ fails, raise a warning and return ``None``.
+
+ Args:
+ bqstorage_client:
+ An existing BigQuery Storage client instance to check for version
+ compatibility. If ``None``, a new instance is created and returned.
+ client_options:
+ Custom options used with a new BigQuery Storage client instance if one
+ is created.
+ client_info:
+ The client info used with a new BigQuery Storage client instance if one
+ is created.
Returns:
- Optional[google.cloud.bigquery_storage.BigQueryReadClient]:
- A BigQuery Storage API client.
+ A BigQuery Storage API client.
"""
try:
from google.cloud import bigquery_storage
@@ -450,11 +520,44 @@ def _create_bqstorage_client(self):
)
return None
- return bigquery_storage.BigQueryReadClient(credentials=self._credentials)
+ try:
+ BQ_STORAGE_VERSIONS.verify_version()
+ except LegacyBigQueryStorageError as exc:
+ warnings.warn(str(exc))
+ return None
+
+ if bqstorage_client is None:
+ bqstorage_client = bigquery_storage.BigQueryReadClient(
+ credentials=self._credentials,
+ client_options=client_options,
+ client_info=client_info,
+ )
+
+ return bqstorage_client
+
+ def _dataset_from_arg(self, dataset):
+ if isinstance(dataset, str):
+ dataset = DatasetReference.from_string(
+ dataset, default_project=self.project
+ )
+
+ if not isinstance(dataset, (Dataset, DatasetReference)):
+ if isinstance(dataset, DatasetListItem):
+ dataset = dataset.reference
+ else:
+ raise TypeError(
+ "dataset must be a Dataset, DatasetReference, DatasetListItem,"
+ " or string"
+ )
+ return dataset
def create_dataset(
- self, dataset, exists_ok=False, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ dataset: Union[str, Dataset, DatasetReference],
+ exists_ok: bool = False,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Dataset:
"""API call: create the dataset via a POST request.
See
@@ -464,6 +567,7 @@ def create_dataset(
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
google.cloud.bigquery.dataset.DatasetReference, \
+ google.cloud.bigquery.dataset.DatasetListItem, \
str, \
]):
A :class:`~google.cloud.bigquery.dataset.Dataset` to create.
@@ -494,10 +598,7 @@ def create_dataset(
>>> dataset = client.create_dataset(dataset)
"""
- if isinstance(dataset, str):
- dataset = DatasetReference.from_string(
- dataset, default_project=self.project
- )
+ dataset = self._dataset_from_arg(dataset)
if isinstance(dataset, DatasetReference):
dataset = Dataset(dataset)
@@ -520,14 +621,18 @@ def create_dataset(
timeout=timeout,
)
return Dataset.from_api_repr(api_response)
- except google.api_core.exceptions.Conflict:
+ except core_exceptions.Conflict:
if not exists_ok:
raise
return self.get_dataset(dataset.reference, retry=retry)
def create_routine(
- self, routine, exists_ok=False, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ routine: Routine,
+ exists_ok: bool = False,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Routine:
"""[Beta] Create a routine via a POST request.
See
@@ -571,12 +676,18 @@ def create_routine(
timeout=timeout,
)
return Routine.from_api_repr(api_response)
- except google.api_core.exceptions.Conflict:
+ except core_exceptions.Conflict:
if not exists_ok:
raise
return self.get_routine(routine.reference, retry=retry)
- def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None):
+ def create_table(
+ self,
+ table: Union[str, Table, TableReference],
+ exists_ok: bool = False,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Table:
"""API call: create a table via a PUT request
See
@@ -625,7 +736,7 @@ def create_table(self, table, exists_ok=False, retry=DEFAULT_RETRY, timeout=None
timeout=timeout,
)
return Table.from_api_repr(api_response)
- except google.api_core.exceptions.Conflict:
+ except core_exceptions.Conflict:
if not exists_ok:
raise
return self.get_table(table.reference, retry=retry)
@@ -643,7 +754,12 @@ def _call_api(
return call()
return call()
- def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None):
+ def get_dataset(
+ self,
+ dataset_ref: Union[DatasetReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Dataset:
"""Fetch the dataset referenced by ``dataset_ref``
Args:
@@ -682,8 +798,12 @@ def get_dataset(self, dataset_ref, retry=DEFAULT_RETRY, timeout=None):
return Dataset.from_api_repr(api_response)
def get_iam_policy(
- self, table, requested_policy_version=1, retry=DEFAULT_RETRY, timeout=None,
- ):
+ self,
+ table: Union[Table, TableReference],
+ requested_policy_version: int = 1,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Policy:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -707,8 +827,13 @@ def get_iam_policy(
return Policy.from_api_repr(response)
def set_iam_policy(
- self, table, policy, updateMask=None, retry=DEFAULT_RETRY, timeout=None,
- ):
+ self,
+ table: Union[Table, TableReference],
+ policy: Policy,
+ updateMask: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Policy:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -736,8 +861,12 @@ def set_iam_policy(
return Policy.from_api_repr(response)
def test_iam_permissions(
- self, table, permissions, retry=DEFAULT_RETRY, timeout=None,
- ):
+ self,
+ table: Union[Table, TableReference],
+ permissions: Sequence[str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Dict[str, Any]:
if not isinstance(table, (Table, TableReference)):
raise TypeError("table must be a Table or TableReference")
@@ -757,7 +886,12 @@ def test_iam_permissions(
return response
- def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None):
+ def get_model(
+ self,
+ model_ref: Union[ModelReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Model:
"""[Beta] Fetch the model referenced by ``model_ref``.
Args:
@@ -795,7 +929,12 @@ def get_model(self, model_ref, retry=DEFAULT_RETRY, timeout=None):
)
return Model.from_api_repr(api_response)
- def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None):
+ def get_routine(
+ self,
+ routine_ref: Union[Routine, RoutineReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Routine:
"""[Beta] Get the routine referenced by ``routine_ref``.
Args:
@@ -834,7 +973,12 @@ def get_routine(self, routine_ref, retry=DEFAULT_RETRY, timeout=None):
)
return Routine.from_api_repr(api_response)
- def get_table(self, table, retry=DEFAULT_RETRY, timeout=None):
+ def get_table(
+ self,
+ table: Union[Table, TableReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Table:
"""Fetch the table referenced by ``table``.
Args:
@@ -870,7 +1014,13 @@ def get_table(self, table, retry=DEFAULT_RETRY, timeout=None):
)
return Table.from_api_repr(api_response)
- def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None):
+ def update_dataset(
+ self,
+ dataset: Dataset,
+ fields: Sequence[str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Dataset:
"""Change some fields of a dataset.
Use ``fields`` to specify which fields to update. At least one field
@@ -934,7 +1084,13 @@ def update_dataset(self, dataset, fields, retry=DEFAULT_RETRY, timeout=None):
)
return Dataset.from_api_repr(api_response)
- def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None):
+ def update_model(
+ self,
+ model: Model,
+ fields: Sequence[str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Model:
"""[Beta] Change some fields of a model.
Use ``fields`` to specify which fields to update. At least one field
@@ -992,7 +1148,13 @@ def update_model(self, model, fields, retry=DEFAULT_RETRY, timeout=None):
)
return Model.from_api_repr(api_response)
- def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None):
+ def update_routine(
+ self,
+ routine: Routine,
+ fields: Sequence[str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Routine:
"""[Beta] Change some fields of a routine.
Use ``fields`` to specify which fields to update. At least one field
@@ -1060,7 +1222,13 @@ def update_routine(self, routine, fields, retry=DEFAULT_RETRY, timeout=None):
)
return Routine.from_api_repr(api_response)
- def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None):
+ def update_table(
+ self,
+ table: Table,
+ fields: Sequence[str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Table:
"""Change some fields of a table.
Use ``fields`` to specify which fields to update. At least one field
@@ -1121,12 +1289,13 @@ def update_table(self, table, fields, retry=DEFAULT_RETRY, timeout=None):
def list_models(
self,
- dataset,
- max_results=None,
- page_token=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ dataset: Union[Dataset, DatasetReference, str],
+ max_results: int = None,
+ page_token: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""[Beta] List models in the dataset.
See
@@ -1136,6 +1305,7 @@ def list_models(
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
google.cloud.bigquery.dataset.DatasetReference, \
+ google.cloud.bigquery.dataset.DatasetListItem, \
str, \
]):
A reference to the dataset whose models to list from the
@@ -1143,7 +1313,7 @@ def list_models(
to create a dataset reference from a string using
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
max_results (Optional[int]):
- Maximum number of models to return. If not passed, defaults to a
+ Maximum number of models to return. Defaults to a
value set by the API.
page_token (Optional[str]):
Token representing a cursor into the models. If not passed,
@@ -1156,6 +1326,9 @@ def list_models(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (Optional[int]):
+ Maximum number of models to return per page.
+ Defaults to a value set by the API.
Returns:
google.api_core.page_iterator.Iterator:
@@ -1163,13 +1336,7 @@ def list_models(
:class:`~google.cloud.bigquery.model.Model` contained
within the requested dataset.
"""
- if isinstance(dataset, str):
- dataset = DatasetReference.from_string(
- dataset, default_project=self.project
- )
-
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError("dataset must be a Dataset, DatasetReference, or string")
+ dataset = self._dataset_from_arg(dataset)
path = "%s/models" % dataset.path
span_attributes = {"path": path}
@@ -1192,18 +1359,20 @@ def api_request(*args, **kwargs):
items_key="models",
page_token=page_token,
max_results=max_results,
+ page_size=page_size,
)
result.dataset = dataset
return result
def list_routines(
self,
- dataset,
- max_results=None,
- page_token=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ dataset: Union[Dataset, DatasetReference, str],
+ max_results: int = None,
+ page_token: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""[Beta] List routines in the dataset.
See
@@ -1213,6 +1382,7 @@ def list_routines(
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
google.cloud.bigquery.dataset.DatasetReference, \
+ google.cloud.bigquery.dataset.DatasetListItem, \
str, \
]):
A reference to the dataset whose routines to list from the
@@ -1220,7 +1390,7 @@ def list_routines(
to create a dataset reference from a string using
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
max_results (Optional[int]):
- Maximum number of routines to return. If not passed, defaults
+ Maximum number of routines to return. Defaults
to a value set by the API.
page_token (Optional[str]):
Token representing a cursor into the routines. If not passed,
@@ -1233,6 +1403,9 @@ def list_routines(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (Optional[int]):
+ Maximum number of routines to return per page.
+ Defaults to a value set by the API.
Returns:
google.api_core.page_iterator.Iterator:
@@ -1240,14 +1413,7 @@ def list_routines(
:class:`~google.cloud.bigquery.routine.Routine`s contained
within the requested dataset, limited by ``max_results``.
"""
- if isinstance(dataset, str):
- dataset = DatasetReference.from_string(
- dataset, default_project=self.project
- )
-
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError("dataset must be a Dataset, DatasetReference, or string")
-
+ dataset = self._dataset_from_arg(dataset)
path = "{}/routines".format(dataset.path)
span_attributes = {"path": path}
@@ -1270,18 +1436,20 @@ def api_request(*args, **kwargs):
items_key="routines",
page_token=page_token,
max_results=max_results,
+ page_size=page_size,
)
result.dataset = dataset
return result
def list_tables(
self,
- dataset,
- max_results=None,
- page_token=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ dataset: Union[Dataset, DatasetReference, str],
+ max_results: int = None,
+ page_token: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""List tables in the dataset.
See
@@ -1291,6 +1459,7 @@ def list_tables(
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
google.cloud.bigquery.dataset.DatasetReference, \
+ google.cloud.bigquery.dataset.DatasetListItem, \
str, \
]):
A reference to the dataset whose tables to list from the
@@ -1298,7 +1467,7 @@ def list_tables(
to create a dataset reference from a string using
:func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
max_results (Optional[int]):
- Maximum number of tables to return. If not passed, defaults
+ Maximum number of tables to return. Defaults
to a value set by the API.
page_token (Optional[str]):
Token representing a cursor into the tables. If not passed,
@@ -1311,6 +1480,9 @@ def list_tables(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (Optional[int]):
+ Maximum number of tables to return per page.
+ Defaults to a value set by the API.
Returns:
google.api_core.page_iterator.Iterator:
@@ -1318,14 +1490,7 @@ def list_tables(
:class:`~google.cloud.bigquery.table.TableListItem` contained
within the requested dataset.
"""
- if isinstance(dataset, str):
- dataset = DatasetReference.from_string(
- dataset, default_project=self.project
- )
-
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError("dataset must be a Dataset, DatasetReference, or string")
-
+ dataset = self._dataset_from_arg(dataset)
path = "%s/tables" % dataset.path
span_attributes = {"path": path}
@@ -1347,18 +1512,19 @@ def api_request(*args, **kwargs):
items_key="tables",
page_token=page_token,
max_results=max_results,
+ page_size=page_size,
)
result.dataset = dataset
return result
def delete_dataset(
self,
- dataset,
- delete_contents=False,
- retry=DEFAULT_RETRY,
- timeout=None,
- not_found_ok=False,
- ):
+ dataset: Union[Dataset, DatasetReference, str],
+ delete_contents: bool = False,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
+ ) -> None:
"""Delete a dataset.
See
@@ -1368,6 +1534,7 @@ def delete_dataset(
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
google.cloud.bigquery.dataset.DatasetReference, \
+ google.cloud.bigquery.dataset.DatasetListItem, \
str, \
]):
A reference to the dataset to delete. If a string is passed
@@ -1387,14 +1554,7 @@ def delete_dataset(
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the dataset.
"""
- if isinstance(dataset, str):
- dataset = DatasetReference.from_string(
- dataset, default_project=self.project
- )
-
- if not isinstance(dataset, (Dataset, DatasetReference)):
- raise TypeError("dataset must be a Dataset or a DatasetReference")
-
+ dataset = self._dataset_from_arg(dataset)
params = {}
path = dataset.path
if delete_contents:
@@ -1413,13 +1573,17 @@ def delete_dataset(
query_params=params,
timeout=timeout,
)
- except google.api_core.exceptions.NotFound:
+ except core_exceptions.NotFound:
if not not_found_ok:
raise
def delete_model(
- self, model, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False
- ):
+ self,
+ model: Union[Model, ModelReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
+ ) -> None:
"""[Beta] Delete a model
See
@@ -1461,13 +1625,82 @@ def delete_model(
path=path,
timeout=timeout,
)
+ except core_exceptions.NotFound:
+ if not not_found_ok:
+ raise
+
+ def delete_job_metadata(
+ self,
+ job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob],
+ project: Optional[str] = None,
+ location: Optional[str] = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
+ ):
+ """[Beta] Delete job metadata from job history.
+
+ Note: This does not stop a running job. Use
+ :func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
+
+ Args:
+ job_id: Job or job identifier.
+
+ Keyword Arguments:
+ project:
+ ID of the project which owns the job (defaults to the client's project).
+ location:
+ Location where the job was run. Ignored if ``job_id`` is a job
+ object.
+ retry:
+ How to retry the RPC.
+ timeout:
+ The number of seconds to wait for the underlying HTTP transport
+ before using ``retry``.
+ not_found_ok:
+ Defaults to ``False``. If ``True``, ignore "not found" errors
+ when deleting the job.
+ """
+ extra_params = {}
+
+ project, location, job_id = _extract_job_reference(
+ job_id, project=project, location=location
+ )
+
+ if project is None:
+ project = self.project
+
+ if location is None:
+ location = self.location
+
+ # Location is always required for jobs.delete()
+ extra_params["location"] = location
+
+ path = f"/projects/{project}/jobs/{job_id}/delete"
+
+ span_attributes = {"path": path, "job_id": job_id, "location": location}
+
+ try:
+ self._call_api(
+ retry,
+ span_name="BigQuery.deleteJob",
+ span_attributes=span_attributes,
+ method="DELETE",
+ path=path,
+ query_params=extra_params,
+ timeout=timeout,
+ )
except google.api_core.exceptions.NotFound:
if not not_found_ok:
raise
def delete_routine(
- self, routine, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False
- ):
+ self,
+ routine: Union[Routine, RoutineReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
+ ) -> None:
"""[Beta] Delete a routine.
See
@@ -1511,13 +1744,17 @@ def delete_routine(
path=path,
timeout=timeout,
)
- except google.api_core.exceptions.NotFound:
+ except core_exceptions.NotFound:
if not not_found_ok:
raise
def delete_table(
- self, table, retry=DEFAULT_RETRY, timeout=None, not_found_ok=False
- ):
+ self,
+ table: Union[Table, TableReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ not_found_ok: bool = False,
+ ) -> None:
"""Delete a table
See
@@ -1557,13 +1794,19 @@ def delete_table(
path=path,
timeout=timeout,
)
- except google.api_core.exceptions.NotFound:
+ except core_exceptions.NotFound:
if not not_found_ok:
raise
def _get_query_results(
- self, job_id, retry, project=None, timeout_ms=None, location=None, timeout=None,
- ):
+ self,
+ job_id: str,
+ retry: retries.Retry,
+ project: str = None,
+ timeout_ms: int = None,
+ location: str = None,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> _QueryResults:
"""Get the query results object for a query job.
Args:
@@ -1621,7 +1864,7 @@ def _get_query_results(
)
return _QueryResults.from_api_repr(resource)
- def job_from_resource(self, resource):
+ def job_from_resource(self, resource: dict) -> job.UnknownJob:
"""Detect correct job type from resource and instantiate.
Args:
@@ -1647,7 +1890,12 @@ def job_from_resource(self, resource):
return job.QueryJob.from_api_repr(resource, self)
return job.UnknownJob.from_api_repr(resource, self)
- def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None):
+ def create_job(
+ self,
+ job_config: dict,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Create a new job.
Args:
job_config (dict): configuration job representation returned from the API.
@@ -1738,20 +1986,33 @@ def create_job(self, job_config, retry=DEFAULT_RETRY, timeout=None):
raise TypeError("Invalid job configuration received.")
def get_job(
- self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ job_id: str,
+ project: str = None,
+ location: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Fetch a job for the project associated with this client.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
Args:
- job_id (str): Unique job identifier.
+ job_id (Union[ \
+ str, \
+ google.cloud.bigquery.job.LoadJob, \
+ google.cloud.bigquery.job.CopyJob, \
+ google.cloud.bigquery.job.ExtractJob, \
+ google.cloud.bigquery.job.QueryJob \
+ ]): Job identifier.
Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
- location (Optional[str]): Location where the job was run.
+ location (Optional[str]):
+ Location where the job was run. Ignored if ``job_id`` is a job
+ object.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
@@ -1769,6 +2030,10 @@ def get_job(
"""
extra_params = {"projection": "full"}
+ project, location, job_id = _extract_job_reference(
+ job_id, project=project, location=location
+ )
+
if project is None:
project = self.project
@@ -1795,20 +2060,33 @@ def get_job(
return self.job_from_resource(resource)
def cancel_job(
- self, job_id, project=None, location=None, retry=DEFAULT_RETRY, timeout=None
- ):
+ self,
+ job_id: str,
+ project: str = None,
+ location: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Attempt to cancel a job from a job ID.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel
Args:
- job_id (str): Unique job identifier.
+ job_id (Union[ \
+ str, \
+ google.cloud.bigquery.job.LoadJob, \
+ google.cloud.bigquery.job.CopyJob, \
+ google.cloud.bigquery.job.ExtractJob, \
+ google.cloud.bigquery.job.QueryJob \
+ ]): Job identifier.
Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
- location (Optional[str]): Location where the job was run.
+ location (Optional[str]):
+ Location where the job was run. Ignored if ``job_id`` is a job
+ object.
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
@@ -1826,6 +2104,10 @@ def cancel_job(
"""
extra_params = {"projection": "full"}
+ project, location, job_id = _extract_job_reference(
+ job_id, project=project, location=location
+ )
+
if project is None:
project = self.project
@@ -1853,17 +2135,18 @@ def cancel_job(
def list_jobs(
self,
- project=None,
- parent_job=None,
- max_results=None,
- page_token=None,
- all_users=None,
- state_filter=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- min_creation_time=None,
- max_creation_time=None,
- ):
+ project: str = None,
+ parent_job: Optional[Union[QueryJob, str]] = None,
+ max_results: int = None,
+ page_token: str = None,
+ all_users: bool = None,
+ state_filter: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ min_creation_time: datetime.datetime = None,
+ max_creation_time: datetime.datetime = None,
+ page_size: int = None,
+ ) -> page_iterator.Iterator:
"""List jobs for the project associated with this client.
See
@@ -1908,13 +2191,15 @@ def list_jobs(
Max value for job creation time. If set, only jobs created
before or at this timestamp are returned. If the datetime has
no time zone assumes UTC time.
+ page_size (Optional[int]):
+ Maximum number of jobs to return per page.
Returns:
google.api_core.page_iterator.Iterator:
Iterable of job instances.
"""
if isinstance(parent_job, job._AsyncJob):
- parent_job = parent_job.job_id
+ parent_job = parent_job.job_id # pytype: disable=attribute-error
extra_params = {
"allUsers": all_users,
@@ -1959,20 +2244,21 @@ def api_request(*args, **kwargs):
page_token=page_token,
max_results=max_results,
extra_params=extra_params,
+ page_size=page_size,
)
def load_table_from_uri(
self,
- source_uris,
- destination,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ source_uris: Union[str, Sequence[str]],
+ destination: Union[Table, TableReference, str],
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: LoadJobConfig = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> job.LoadJob:
"""Starts a job for loading data into a table from CloudStorage.
See
@@ -2045,18 +2331,18 @@ def load_table_from_uri(
def load_table_from_file(
self,
- file_obj,
- destination,
- rewind=False,
- size=None,
- num_retries=_DEFAULT_NUM_RETRIES,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- timeout=None,
- ):
+ file_obj: BinaryIO,
+ destination: Union[Table, TableReference, str],
+ rewind: bool = False,
+ size: int = None,
+ num_retries: int = _DEFAULT_NUM_RETRIES,
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: LoadJobConfig = None,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> job.LoadJob:
"""Upload the contents of this table from a file-like object.
Similar to :meth:`load_table_from_uri`, this method creates, starts and
@@ -2150,16 +2436,16 @@ def load_table_from_file(
def load_table_from_dataframe(
self,
dataframe,
- destination,
- num_retries=_DEFAULT_NUM_RETRIES,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- parquet_compression="snappy",
- timeout=None,
- ):
+ destination: Union[Table, TableReference, str],
+ num_retries: int = _DEFAULT_NUM_RETRIES,
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: LoadJobConfig = None,
+ parquet_compression: str = "snappy",
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> job.LoadJob:
"""Upload the contents of a table from a pandas DataFrame.
Similar to :meth:`load_table_from_uri`, this method creates, starts and
@@ -2287,16 +2573,25 @@ def load_table_from_dataframe(
):
try:
table = self.get_table(destination)
- except google.api_core.exceptions.NotFound:
+ except core_exceptions.NotFound:
table = None
else:
columns_and_indexes = frozenset(
name
for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe)
)
- # schema fields not present in the dataframe are not needed
job_config.schema = [
- field for field in table.schema if field.name in columns_and_indexes
+ # Field description and policy tags are not needed to
+ # serialize a data frame.
+ SchemaField(
+ field.name,
+ field.field_type,
+ mode=field.mode,
+ fields=field.fields,
+ )
+ # schema fields not present in the dataframe are not needed
+ for field in table.schema
+ if field.name in columns_and_indexes
]
job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
@@ -2321,6 +2616,15 @@ def load_table_from_dataframe(
try:
if job_config.source_format == job.SourceFormat.PARQUET:
+ if _PYARROW_VERSION in _PYARROW_BAD_VERSIONS:
+ msg = (
+ "Loading dataframe data in PARQUET format with pyarrow "
+ f"{_PYARROW_VERSION} can result in data corruption. It is "
+ "therefore *strongly* advised to use a different pyarrow "
+ "version or a different source format. "
+ "See: https://github.com/googleapis/python-bigquery/issues/781"
+ )
+ warnings.warn(msg, category=RuntimeWarning)
if job_config.schema:
if parquet_compression == "snappy": # adjust the default value
@@ -2367,16 +2671,16 @@ def load_table_from_dataframe(
def load_table_from_json(
self,
- json_rows,
- destination,
- num_retries=_DEFAULT_NUM_RETRIES,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- timeout=None,
- ):
+ json_rows: Iterable[Dict[str, Any]],
+ destination: Union[Table, TableReference, str],
+ num_retries: int = _DEFAULT_NUM_RETRIES,
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: LoadJobConfig = None,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> job.LoadJob:
"""Upload the contents of a table from a JSON string or dict.
Args:
@@ -2459,7 +2763,7 @@ def load_table_from_json(
destination = _table_arg_to_table_ref(destination, default_project=self.project)
- data_str = "\n".join(json.dumps(item) for item in json_rows)
+ data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
encoded_str = data_str.encode()
data_file = io.BytesIO(encoded_str)
return self.load_table_from_file(
@@ -2547,7 +2851,15 @@ def _initiate_resumable_upload(
if project is None:
project = self.project
- upload_url = _RESUMABLE_URL_TEMPLATE.format(project=project)
+ # TODO: Increase the minimum version of google-cloud-core to 1.6.0
+ # and remove this logic. See:
+ # https://github.com/googleapis/python-bigquery/issues/509
+ hostname = (
+ self._connection.API_BASE_URL
+ if not hasattr(self._connection, "get_api_base_url_for_mtls")
+ else self._connection.get_api_base_url_for_mtls()
+ )
+ upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project)
# TODO: modify ResumableUpload to take a retry.Retry object
# that it can use for the initial RPC.
@@ -2616,7 +2928,15 @@ def _do_multipart_upload(
if project is None:
project = self.project
- upload_url = _MULTIPART_URL_TEMPLATE.format(project=project)
+ # TODO: Increase the minimum version of google-cloud-core to 1.6.0
+ # and remove this logic. See:
+ # https://github.com/googleapis/python-bigquery/issues/509
+ hostname = (
+ self._connection.API_BASE_URL
+ if not hasattr(self._connection, "get_api_base_url_for_mtls")
+ else self._connection.get_api_base_url_for_mtls()
+ )
+ upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project)
upload = MultipartUpload(upload_url, headers=headers)
if num_retries is not None:
@@ -2632,16 +2952,18 @@ def _do_multipart_upload(
def copy_table(
self,
- sources,
- destination,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ sources: Union[
+ Table, TableReference, str, Sequence[Union[Table, TableReference, str]]
+ ],
+ destination: Union[Table, TableReference, str],
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: CopyJobConfig = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> job.CopyJob:
"""Copy one or more tables to another table.
See
@@ -2735,17 +3057,17 @@ def copy_table(
def extract_table(
self,
- source,
- destination_uris,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- job_config=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- source_type="Table",
- ):
+ source: Union[Table, TableReference, Model, ModelReference, str],
+ destination_uris: Union[str, Sequence[str]],
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ job_config: ExtractJobConfig = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ source_type: str = "Table",
+ ) -> job.ExtractJob:
"""Start a job to extract a table into Cloud Storage files.
See
@@ -2834,15 +3156,16 @@ def extract_table(
def query(
self,
- query,
- job_config=None,
- job_id=None,
- job_id_prefix=None,
- location=None,
- project=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ query: str,
+ job_config: QueryJobConfig = None,
+ job_id: str = None,
+ job_id_prefix: str = None,
+ location: str = None,
+ project: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ job_retry: retries.Retry = DEFAULT_JOB_RETRY,
+ ) -> job.QueryJob:
"""Run a SQL query.
See
@@ -2871,20 +3194,52 @@ def query(
Project ID of the project of where to run the job. Defaults
to the client's project.
retry (Optional[google.api_core.retry.Retry]):
- How to retry the RPC.
+ How to retry the RPC. This only applies to making RPC
+ calls. It isn't used to retry failed jobs. This has
+ a reasonable default that should only be overridden
+ with care.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ job_retry (Optional[google.api_core.retry.Retry]):
+ How to retry failed jobs. The default retries
+ rate-limit-exceeded errors. Passing ``None`` disables
+ job retry.
+
+ Not all jobs can be retried. If ``job_id`` is
+ provided, then the job returned by the query will not
+ be retryable, and an exception will be raised if a
+ non-``None`` (and non-default) value for ``job_retry``
+ is also provided.
+
+ Note that errors aren't detected until ``result()`` is
+ called on the job returned. The ``job_retry``
+ specified here becomes the default ``job_retry`` for
+ ``result()``, where it can also be specified.
Returns:
google.cloud.bigquery.job.QueryJob: A new query job instance.
Raises:
TypeError:
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig`
- class.
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.QueryJobConfig`
+ class, or if both ``job_id`` and non-``None`` non-default
+ ``job_retry`` are provided.
"""
- job_id = _make_job_id(job_id, job_id_prefix)
+ job_id_given = job_id is not None
+ if (
+ job_id_given
+ and job_retry is not None
+ and job_retry is not DEFAULT_JOB_RETRY
+ ):
+ raise TypeError(
+ "`job_retry` was provided, but the returned job is"
+ " not retryable, because a custom `job_id` was"
+ " provided."
+ )
+
+ job_id_save = job_id
if project is None:
project = self.project
@@ -2892,8 +3247,6 @@ def query(
if location is None:
location = self.location
- job_config = copy.deepcopy(job_config)
-
if self._default_query_job_config:
if job_config:
_verify_job_config_type(
@@ -2903,6 +3256,8 @@ def query(
# that is in the default,
# should be filled in with the default
# the incoming therefore has precedence
+ #
+ # Note that _fill_from_default doesn't mutate the receiver
job_config = job_config._fill_from_default(
self._default_query_job_config
)
@@ -2911,15 +3266,62 @@ def query(
self._default_query_job_config,
google.cloud.bigquery.job.QueryJobConfig,
)
- job_config = copy.deepcopy(self._default_query_job_config)
+ job_config = self._default_query_job_config
- job_ref = job._JobReference(job_id, project=project, location=location)
- query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config)
- query_job._begin(retry=retry, timeout=timeout)
+ # Note that we haven't modified the original job_config (or
+ # _default_query_job_config) up to this point.
+ job_config_save = job_config
+
+ def do_query():
+ # Make a copy now, so that original doesn't get changed by the process
+ # below and to facilitate retry
+ job_config = copy.deepcopy(job_config_save)
+
+ job_id = _make_job_id(job_id_save, job_id_prefix)
+ job_ref = job._JobReference(job_id, project=project, location=location)
+ query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config)
- return query_job
+ try:
+ query_job._begin(retry=retry, timeout=timeout)
+ except core_exceptions.Conflict as create_exc:
+ # The thought is if someone is providing their own job IDs and they get
+ # their job ID generation wrong, this could end up returning results for
+ # the wrong query. We thus only try to recover if job ID was not given.
+ if job_id_given:
+ raise create_exc
+
+ try:
+ query_job = self.get_job(
+ job_id,
+ project=project,
+ location=location,
+ retry=retry,
+ timeout=timeout,
+ )
+ except core_exceptions.GoogleAPIError: # (includes RetryError)
+ raise create_exc
+ else:
+ return query_job
+ else:
+ return query_job
- def insert_rows(self, table, rows, selected_fields=None, **kwargs):
+ future = do_query()
+ # The future might be in a failed state now, but if it's
+ # unrecoverable, we'll find out when we ask for it's result, at which
+ # point, we may retry.
+ if not job_id_given:
+ future._retry_do_query = do_query # in case we have to retry later
+ future._job_retry = job_retry
+
+ return future
+
+ def insert_rows(
+ self,
+ table: Union[Table, TableReference, str],
+ rows: Union[Iterable[Tuple], Iterable[Dict]],
+ selected_fields: Sequence[SchemaField] = None,
+ **kwargs: dict,
+ ) -> Sequence[dict]:
"""Insert rows into a table via the streaming API.
See
@@ -2942,7 +3344,7 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs):
selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):
The fields to return. Required if ``table`` is a
:class:`~google.cloud.bigquery.table.TableReference`.
- kwargs (Dict):
+ kwargs (dict):
Keyword arguments to
:meth:`~google.cloud.bigquery.client.Client.insert_rows_json`.
@@ -2982,8 +3384,13 @@ def insert_rows(self, table, rows, selected_fields=None, **kwargs):
return self.insert_rows_json(table, json_rows, **kwargs)
def insert_rows_from_dataframe(
- self, table, dataframe, selected_fields=None, chunk_size=500, **kwargs
- ):
+ self,
+ table: Union[Table, TableReference, str],
+ dataframe,
+ selected_fields: Sequence[SchemaField] = None,
+ chunk_size: int = 500,
+ **kwargs: Dict,
+ ) -> Sequence[Sequence[dict]]:
"""Insert rows into a table from a dataframe via the streaming API.
Args:
@@ -3031,15 +3438,15 @@ def insert_rows_from_dataframe(
def insert_rows_json(
self,
- table,
- json_rows,
- row_ids=None,
- skip_invalid_rows=None,
- ignore_unknown_values=None,
- template_suffix=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ table: Union[Table, TableReference, str],
+ json_rows: Sequence[Dict],
+ row_ids: Union[Iterable[str], AutoRowIDs, None] = AutoRowIDs.GENERATE_UUID,
+ skip_invalid_rows: bool = None,
+ ignore_unknown_values: bool = None,
+ template_suffix: str = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Sequence[dict]:
"""Insert rows into a table without applying local type conversions.
See
@@ -3055,11 +3462,20 @@ def insert_rows_json(
json_rows (Sequence[Dict]):
Row data to be inserted. Keys must match the table schema fields
and values must be JSON-compatible representations.
- row_ids (Optional[Sequence[Optional[str]]]):
+ row_ids (Union[Iterable[str], AutoRowIDs, None]):
Unique IDs, one per row being inserted. An ID can also be
``None``, indicating that an explicit insert ID should **not**
be used for that row. If the argument is omitted altogether,
unique IDs are created automatically.
+
+ .. versionchanged:: 2.21.0
+ Can also be an iterable, not just a sequence, or an
+ :class:`AutoRowIDs` enum member.
+
+ .. deprecated:: 2.21.0
+ Passing ``None`` to explicitly request autogenerating insert IDs is
+ deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead.
+
skip_invalid_rows (Optional[bool]):
Insert all valid rows of a request, even if invalid rows exist.
The default value is ``False``, which causes the entire request
@@ -3099,12 +3515,37 @@ def insert_rows_json(
rows_info = []
data = {"rows": rows_info}
- for index, row in enumerate(json_rows):
+ if row_ids is None:
+ warnings.warn(
+ "Passing None for row_ids is deprecated. To explicitly request "
+ "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead",
+ category=DeprecationWarning,
+ )
+ row_ids = AutoRowIDs.GENERATE_UUID
+
+ if not isinstance(row_ids, AutoRowIDs):
+ try:
+ row_ids_iter = iter(row_ids)
+ except TypeError:
+ msg = "row_ids is neither an iterable nor an AutoRowIDs enum member"
+ raise TypeError(msg)
+
+ for i, row in enumerate(json_rows):
info = {"json": row}
- if row_ids is not None:
- info["insertId"] = row_ids[index]
- else:
+
+ if row_ids is AutoRowIDs.GENERATE_UUID:
info["insertId"] = str(uuid.uuid4())
+ elif row_ids is AutoRowIDs.DISABLED:
+ info["insertId"] = None
+ else:
+ try:
+ insert_id = next(row_ids_iter)
+ except StopIteration:
+ msg = f"row_ids did not generate enough IDs, error at index {i}"
+ raise ValueError(msg)
+ else:
+ info["insertId"] = insert_id
+
rows_info.append(info)
if skip_invalid_rows is not None:
@@ -3135,7 +3576,12 @@ def insert_rows_json(
return errors
- def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None):
+ def list_partitions(
+ self,
+ table: Union[Table, TableReference, str],
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> Sequence[str]:
"""List the partitions in a table.
Args:
@@ -3177,15 +3623,15 @@ def list_partitions(self, table, retry=DEFAULT_RETRY, timeout=None):
def list_rows(
self,
- table,
- selected_fields=None,
- max_results=None,
- page_token=None,
- start_index=None,
- page_size=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ table: Union[Table, TableListItem, TableReference, str],
+ selected_fields: Sequence[SchemaField] = None,
+ max_results: int = None,
+ page_token: str = None,
+ start_index: int = None,
+ page_size: int = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> RowIterator:
"""List the rows of the table.
See
@@ -3286,18 +3732,18 @@ def list_rows(
def _list_rows_from_query_results(
self,
- job_id,
- location,
- project,
- schema,
- total_rows=None,
- destination=None,
- max_results=None,
- start_index=None,
- page_size=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- ):
+ job_id: str,
+ location: str,
+ project: str,
+ schema: SchemaField,
+ total_rows: int = None,
+ destination: Union[Table, TableReference, TableListItem, str] = None,
+ max_results: int = None,
+ start_index: int = None,
+ page_size: int = None,
+ retry: retries.Retry = DEFAULT_RETRY,
+ timeout: float = DEFAULT_TIMEOUT,
+ ) -> RowIterator:
"""List the rows of a completed query.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults
@@ -3382,7 +3828,7 @@ def _schema_to_json_file_object(self, schema_list, file_obj):
"""
json.dump(schema_list, file_obj, indent=2, sort_keys=True)
- def schema_from_json(self, file_or_path):
+ def schema_from_json(self, file_or_path: Union[str, BinaryIO]):
"""Takes a file object or file path that contains json that describes
a table schema.
@@ -3395,7 +3841,9 @@ def schema_from_json(self, file_or_path):
with open(file_or_path) as file_obj:
return self._schema_from_json_file_object(file_obj)
- def schema_to_json(self, schema_list, destination):
+ def schema_to_json(
+ self, schema_list: Sequence[SchemaField], destination: Union[str, BinaryIO]
+ ):
"""Takes a list of schema field objects.
Serializes the list of schema field objects as json to a file.
@@ -3410,6 +3858,12 @@ def schema_to_json(self, schema_list, destination):
with open(destination, mode="w") as file_obj:
return self._schema_to_json_file_object(json_schema_list, file_obj)
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+
# pylint: disable=unused-argument
def _item_to_project(iterator, resource):
@@ -3499,6 +3953,37 @@ def _item_to_table(iterator, resource):
return TableListItem(resource)
+def _extract_job_reference(job, project=None, location=None):
+ """Extract fully-qualified job reference from a job-like object.
+
+ Args:
+ job_id (Union[ \
+ str, \
+ google.cloud.bigquery.job.LoadJob, \
+ google.cloud.bigquery.job.CopyJob, \
+ google.cloud.bigquery.job.ExtractJob, \
+ google.cloud.bigquery.job.QueryJob \
+ ]): Job identifier.
+ project (Optional[str]):
+ Project where the job was run. Ignored if ``job_id`` is a job
+ object.
+ location (Optional[str]):
+ Location where the job was run. Ignored if ``job_id`` is a job
+ object.
+
+ Returns:
+ Tuple[str, str, str]: ``(project, location, job_id)``
+ """
+ if hasattr(job, "job_id"):
+ project = job.project
+ job_id = job.job_id
+ location = job.location
+ else:
+ job_id = job
+
+ return (project, location, job_id)
+
+
def _make_job_id(job_id, prefix=None):
"""Construct an ID for a new job.
@@ -3532,7 +4017,7 @@ def _check_mode(stream):
mode = getattr(stream, "mode", None)
if isinstance(stream, gzip.GzipFile):
- if mode != gzip.READ:
+ if mode != gzip.READ: # pytype: disable=module-attr
raise ValueError(
"Cannot upload gzip files opened in write mode: use "
"gzip.GzipFile(filename, mode='rb')"
diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py
index 2d3a4755f..21e56f305 100644
--- a/google/cloud/bigquery/dataset.py
+++ b/google/cloud/bigquery/dataset.py
@@ -220,7 +220,7 @@ def to_api_repr(self):
return resource
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "AccessEntry":
"""Factory: construct an access entry given its API representation
Args:
@@ -288,7 +288,7 @@ def path(self):
routine = _get_routine_reference
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "DatasetReference":
"""Factory: construct a dataset reference given its API representation
Args:
@@ -304,7 +304,9 @@ def from_api_repr(cls, resource):
return cls(project, dataset_id)
@classmethod
- def from_string(cls, dataset_id, default_project=None):
+ def from_string(
+ cls, dataset_id: str, default_project: str = None
+ ) -> "DatasetReference":
"""Construct a dataset reference from dataset ID string.
Args:
@@ -350,7 +352,7 @@ def from_string(cls, dataset_id, default_project=None):
return cls(output_project_id, output_dataset_id)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this dataset reference
Returns:
@@ -640,7 +642,7 @@ def default_encryption_configuration(self, value):
self._properties["defaultEncryptionConfiguration"] = api_repr
@classmethod
- def from_string(cls, full_dataset_id):
+ def from_string(cls, full_dataset_id: str) -> "Dataset":
"""Construct a dataset from fully-qualified dataset ID.
Args:
@@ -664,7 +666,7 @@ def from_string(cls, full_dataset_id):
return cls(DatasetReference.from_string(full_dataset_id))
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "Dataset":
"""Factory: construct a dataset given its API representation
Args:
@@ -689,7 +691,7 @@ def from_api_repr(cls, resource):
dataset._properties = copy.deepcopy(resource)
return dataset
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this dataset
Returns:
diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py
index 95b5869e5..9c134b47c 100644
--- a/google/cloud/bigquery/dbapi/_helpers.py
+++ b/google/cloud/bigquery/dbapi/_helpers.py
@@ -18,13 +18,54 @@
import decimal
import functools
import numbers
+import re
+import typing
from google.cloud import bigquery
-from google.cloud.bigquery import table
+from google.cloud.bigquery import table, enums, query
from google.cloud.bigquery.dbapi import exceptions
-def scalar_to_query_parameter(value, name=None):
+_NUMERIC_SERVER_MIN = decimal.Decimal("-9.9999999999999999999999999999999999999E+28")
+_NUMERIC_SERVER_MAX = decimal.Decimal("9.9999999999999999999999999999999999999E+28")
+
+type_parameters_re = re.compile(
+ r"""
+ \(
+ \s*[0-9]+\s*
+ (,
+ \s*[0-9]+\s*
+ )*
+ \)
+ """,
+ re.VERBOSE,
+)
+
+
+def _parameter_type(name, value, query_parameter_type=None, value_doc=""):
+ if query_parameter_type:
+ # Strip type parameters
+ query_parameter_type = type_parameters_re.sub("", query_parameter_type)
+ try:
+ parameter_type = getattr(
+ enums.SqlParameterScalarTypes, query_parameter_type.upper()
+ )._type
+ except AttributeError:
+ raise exceptions.ProgrammingError(
+ f"The given parameter type, {query_parameter_type},"
+ f" for {name} is not a valid BigQuery scalar type."
+ )
+ else:
+ parameter_type = bigquery_scalar_type(value)
+ if parameter_type is None:
+ raise exceptions.ProgrammingError(
+ f"Encountered parameter {name} with "
+ f"{value_doc} value {value} of unexpected type."
+ )
+ return parameter_type
+
+
+def scalar_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert a scalar value into a query parameter.
Args:
@@ -33,6 +74,7 @@ def scalar_to_query_parameter(value, name=None):
name (str):
(Optional) Name of the query parameter.
+ query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
google.cloud.bigquery.ScalarQueryParameter:
@@ -43,24 +85,19 @@ def scalar_to_query_parameter(value, name=None):
google.cloud.bigquery.dbapi.exceptions.ProgrammingError:
if the type cannot be determined.
"""
- parameter_type = bigquery_scalar_type(value)
-
- if parameter_type is None:
- raise exceptions.ProgrammingError(
- "encountered parameter {} with value {} of unexpected type".format(
- name, value
- )
- )
- return bigquery.ScalarQueryParameter(name, parameter_type, value)
+ return bigquery.ScalarQueryParameter(
+ name, _parameter_type(name, value, query_parameter_type), value
+ )
-def array_to_query_parameter(value, name=None):
+def array_to_query_parameter(value, name=None, query_parameter_type=None):
"""Convert an array-like value into a query parameter.
Args:
value (Sequence[Any]): The elements of the array (should not be a
string-like Sequence).
name (Optional[str]): Name of the query parameter.
+ query_parameter_type (Optional[str]): Given type for the parameter.
Returns:
A query parameter corresponding with the type and value of the plain
@@ -76,81 +113,267 @@ def array_to_query_parameter(value, name=None):
"not string-like.".format(name)
)
- if not value:
+ if query_parameter_type or value:
+ array_type = _parameter_type(
+ name,
+ value[0] if value else None,
+ query_parameter_type,
+ value_doc="array element ",
+ )
+ else:
raise exceptions.ProgrammingError(
"Encountered an empty array-like value of parameter {}, cannot "
"determine array elements type.".format(name)
)
- # Assume that all elements are of the same type, and let the backend handle
- # any type incompatibilities among the array elements
- array_type = bigquery_scalar_type(value[0])
- if array_type is None:
- raise exceptions.ProgrammingError(
- "Encountered unexpected first array element of parameter {}, "
- "cannot determine array elements type.".format(name)
+ return bigquery.ArrayQueryParameter(name, array_type, value)
+
+
+def _parse_struct_fields(
+ fields,
+ base,
+ parse_struct_field=re.compile(
+ r"""
+ (?:(\w+)\s+) # field name
+ ([A-Z0-9<> ,()]+) # Field type
+ $""",
+ re.VERBOSE | re.IGNORECASE,
+ ).match,
+):
+ # Split a string of struct fields. They're defined by commas, but
+ # we have to avoid splitting on commas internal to fields. For
+ # example:
+ # name string, children array>
+ #
+ # only has 2 top-level fields.
+ fields = fields.split(",")
+ fields = list(reversed(fields)) # in the off chance that there are very many
+ while fields:
+ field = fields.pop()
+ while fields and field.count("<") != field.count(">"):
+ field += "," + fields.pop()
+
+ m = parse_struct_field(field.strip())
+ if not m:
+ raise exceptions.ProgrammingError(
+ f"Invalid struct field, {field}, in {base}"
+ )
+ yield m.group(1, 2)
+
+
+SCALAR, ARRAY, STRUCT = "sar"
+
+
+def _parse_type(
+ type_,
+ name,
+ base,
+ complex_query_parameter_parse=re.compile(
+ r"""
+ \s*
+ (ARRAY|STRUCT|RECORD) # Type
+ \s*
+ <([A-Z0-9<> ,()]+)> # Subtype(s)
+ \s*$
+ """,
+ re.IGNORECASE | re.VERBOSE,
+ ).match,
+):
+ if "<" not in type_:
+ # Scalar
+
+ # Strip type parameters
+ type_ = type_parameters_re.sub("", type_).strip()
+ try:
+ type_ = getattr(enums.SqlParameterScalarTypes, type_.upper())
+ except AttributeError:
+ raise exceptions.ProgrammingError(
+ f"The given parameter type, {type_},"
+ f"{' for ' + name if name else ''}"
+ f" is not a valid BigQuery scalar type, in {base}."
+ )
+ if name:
+ type_ = type_.with_name(name)
+ return SCALAR, type_
+
+ m = complex_query_parameter_parse(type_)
+ if not m:
+ raise exceptions.ProgrammingError(f"Invalid parameter type, {type_}")
+ tname, sub = m.group(1, 2)
+ if tname.upper() == "ARRAY":
+ sub_type = complex_query_parameter_type(None, sub, base)
+ if isinstance(sub_type, query.ArrayQueryParameterType):
+ raise exceptions.ProgrammingError(f"Array can't contain an array in {base}")
+ sub_type._complex__src = sub
+ return ARRAY, sub_type
+ else:
+ return STRUCT, _parse_struct_fields(sub, base)
+
+
+def complex_query_parameter_type(name: typing.Optional[str], type_: str, base: str):
+ """Construct a parameter type (`StructQueryParameterType`) for a complex type
+
+ or a non-complex type that's part of a complex type.
+
+ Examples:
+
+ array>
+
+ struct>>
+
+ This is used for computing array types.
+ """
+
+ type_type, sub_type = _parse_type(type_, name, base)
+ if type_type == SCALAR:
+ type_ = sub_type
+ elif type_type == ARRAY:
+ type_ = query.ArrayQueryParameterType(sub_type, name=name)
+ elif type_type == STRUCT:
+ fields = [
+ complex_query_parameter_type(field_name, field_type, base)
+ for field_name, field_type in sub_type
+ ]
+ type_ = query.StructQueryParameterType(*fields, name=name)
+ else: # pragma: NO COVER
+ raise AssertionError("Bad type_type", type_type) # Can't happen :)
+
+ return type_
+
+
+def complex_query_parameter(
+ name: typing.Optional[str], value, type_: str, base: typing.Optional[str] = None
+):
+ """
+ Construct a query parameter for a complex type (array or struct record)
+
+ or for a subtype, which may not be complex
+
+ Examples:
+
+ array>
+
+ struct>>
+
+ """
+ base = base or type_
+
+ type_type, sub_type = _parse_type(type_, name, base)
+
+ if type_type == SCALAR:
+ param = query.ScalarQueryParameter(name, sub_type._type, value)
+ elif type_type == ARRAY:
+ if not array_like(value):
+ raise exceptions.ProgrammingError(
+ f"Array type with non-array-like value"
+ f" with type {type(value).__name__}"
+ )
+ param = query.ArrayQueryParameter(
+ name,
+ sub_type,
+ value
+ if isinstance(sub_type, query.ScalarQueryParameterType)
+ else [
+ complex_query_parameter(None, v, sub_type._complex__src, base)
+ for v in value
+ ],
)
+ elif type_type == STRUCT:
+ if not isinstance(value, collections_abc.Mapping):
+ raise exceptions.ProgrammingError(f"Non-mapping value for type {type_}")
+ value_keys = set(value)
+ fields = []
+ for field_name, field_type in sub_type:
+ if field_name not in value:
+ raise exceptions.ProgrammingError(
+ f"No field value for {field_name} in {type_}"
+ )
+ value_keys.remove(field_name)
+ fields.append(
+ complex_query_parameter(field_name, value[field_name], field_type, base)
+ )
+ if value_keys:
+ raise exceptions.ProgrammingError(f"Extra data keys for {type_}")
- return bigquery.ArrayQueryParameter(name, array_type, value)
+ param = query.StructQueryParameter(name, *fields)
+ else: # pragma: NO COVER
+ raise AssertionError("Bad type_type", type_type) # Can't happen :)
+ return param
+
+
+def _dispatch_parameter(type_, value, name=None):
+ if type_ is not None and "<" in type_:
+ param = complex_query_parameter(name, value, type_)
+ elif isinstance(value, collections_abc.Mapping):
+ raise NotImplementedError(
+ f"STRUCT-like parameter values are not supported"
+ f"{' (parameter ' + name + ')' if name else ''},"
+ f" unless an explicit type is give in the parameter placeholder"
+ f" (e.g. '%({name if name else ''}:struct<...>)s')."
+ )
+ elif array_like(value):
+ param = array_to_query_parameter(value, name, type_)
+ else:
+ param = scalar_to_query_parameter(value, name, type_)
-def to_query_parameters_list(parameters):
+ return param
+
+
+def to_query_parameters_list(parameters, parameter_types):
"""Converts a sequence of parameter values into query parameters.
Args:
parameters (Sequence[Any]): Sequence of query parameter values.
+ parameter_types:
+ A list of parameter types, one for each parameter.
+ Unknown types are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of query parameters.
"""
- result = []
-
- for value in parameters:
- if isinstance(value, collections_abc.Mapping):
- raise NotImplementedError("STRUCT-like parameter values are not supported.")
- elif array_like(value):
- param = array_to_query_parameter(value)
- else:
- param = scalar_to_query_parameter(value)
- result.append(param)
+ return [
+ _dispatch_parameter(type_, value)
+ for value, type_ in zip(parameters, parameter_types)
+ ]
- return result
-
-def to_query_parameters_dict(parameters):
+def to_query_parameters_dict(parameters, query_parameter_types):
"""Converts a dictionary of parameter values into query parameters.
Args:
parameters (Mapping[str, Any]): Dictionary of query parameter values.
+ parameter_types:
+ A dictionary of parameter types. It needn't have a key for each
+ parameter.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
A list of named query parameters.
"""
- result = []
-
- for name, value in parameters.items():
- if isinstance(value, collections_abc.Mapping):
- raise NotImplementedError(
- "STRUCT-like parameter values are not supported "
- "(parameter {}).".format(name)
- )
- elif array_like(value):
- param = array_to_query_parameter(value, name=name)
- else:
- param = scalar_to_query_parameter(value, name=name)
- result.append(param)
-
- return result
+ return [
+ _dispatch_parameter(query_parameter_types.get(name), value, name)
+ for name, value in parameters.items()
+ ]
-def to_query_parameters(parameters):
+def to_query_parameters(parameters, parameter_types):
"""Converts DB-API parameter values into query parameters.
Args:
parameters (Union[Mapping[str, Any], Sequence[Any]]):
A dictionary or sequence of query parameter values.
+ parameter_types (Union[Mapping[str, str], Sequence[str]]):
+ A dictionary or list of parameter types.
+
+ If parameters is a mapping, then this must be a dictionary
+ of parameter types. It needn't have a key for each
+ parameter.
+
+ If parameters is a sequence, then this must be a list of
+ parameter types, one for each paramater. Unknown types
+ are provided as None.
Returns:
List[google.cloud.bigquery.query._AbstractQueryParameter]:
@@ -160,9 +383,9 @@ def to_query_parameters(parameters):
return []
if isinstance(parameters, collections_abc.Mapping):
- return to_query_parameters_dict(parameters)
-
- return to_query_parameters_list(parameters)
+ return to_query_parameters_dict(parameters, parameter_types)
+ else:
+ return to_query_parameters_list(parameters, parameter_types)
def bigquery_scalar_type(value):
@@ -184,7 +407,20 @@ def bigquery_scalar_type(value):
elif isinstance(value, numbers.Real):
return "FLOAT64"
elif isinstance(value, decimal.Decimal):
- return "NUMERIC"
+ vtuple = value.as_tuple()
+ # NUMERIC values have precision of 38 (number of digits) and scale of 9 (number
+ # of fractional digits), and their max absolute value must be strictly smaller
+ # than 1.0E+29.
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
+ if (
+ len(vtuple.digits) <= 38 # max precision: 38
+ and vtuple.exponent >= -9 # max scale: 9
+ and _NUMERIC_SERVER_MIN <= value <= _NUMERIC_SERVER_MAX
+ ):
+ return "NUMERIC"
+ else:
+ return "BIGNUMERIC"
+
elif isinstance(value, str):
return "STRING"
elif isinstance(value, bytes):
@@ -259,7 +495,7 @@ def decorate_public_methods(klass):
"""Apply ``_raise_on_closed()`` decorator to public instance methods.
"""
for name in dir(klass):
- if name.startswith("_"):
+ if name.startswith("_") and name != "__iter__":
continue
member = getattr(klass, name)
diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py
index 300c77dc9..66dee7dfb 100644
--- a/google/cloud/bigquery/dbapi/connection.py
+++ b/google/cloud/bigquery/dbapi/connection.py
@@ -47,12 +47,14 @@ def __init__(self, client=None, bqstorage_client=None):
else:
self._owns_client = False
+ # A warning is already raised by the BQ Storage client factory factory if
+ # instantiation fails, or if the given BQ Storage client instance is outdated.
if bqstorage_client is None:
- # A warning is already raised by the factory if instantiation fails.
- bqstorage_client = client._create_bqstorage_client()
+ bqstorage_client = client._ensure_bqstorage_client()
self._owns_bqstorage_client = bqstorage_client is not None
else:
self._owns_bqstorage_client = False
+ bqstorage_client = client._ensure_bqstorage_client(bqstorage_client)
self._client = client
self._bqstorage_client = bqstorage_client
@@ -76,7 +78,8 @@ def close(self):
self._bqstorage_client._transport.grpc_channel.close()
for cursor_ in self._cursors_created:
- cursor_.close()
+ if not cursor_._closed:
+ cursor_.close()
def commit(self):
"""No-op, but for consistency raise an error if connection is closed."""
diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py
index e90bcc2c0..587598d5f 100644
--- a/google/cloud/bigquery/dbapi/cursor.py
+++ b/google/cloud/bigquery/dbapi/cursor.py
@@ -18,6 +18,15 @@
from collections import abc as collections_abc
import copy
import logging
+import re
+
+try:
+ from google.cloud.bigquery_storage import ArrowSerializationOptions
+except ImportError:
+ _ARROW_COMPRESSION_SUPPORT = False
+else:
+ # Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
+ _ARROW_COMPRESSION_SUPPORT = True
from google.cloud.bigquery import job
from google.cloud.bigquery.dbapi import _helpers
@@ -153,6 +162,14 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
job_config (google.cloud.bigquery.job.QueryJobConfig):
(Optional) Extra configuration options for the query job.
"""
+ formatted_operation, parameter_types = _format_operation(operation, parameters)
+ self._execute(
+ formatted_operation, parameters, job_id, job_config, parameter_types
+ )
+
+ def _execute(
+ self, formatted_operation, parameters, job_id, job_config, parameter_types
+ ):
self._query_data = None
self._query_job = None
client = self.connection._client
@@ -161,8 +178,7 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
# query parameters was not one of the standard options. Convert both
# the query and the parameters to the format expected by the client
# libraries.
- formatted_operation = _format_operation(operation, parameters=parameters)
- query_parameters = _helpers.to_query_parameters(parameters)
+ query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
if client._default_query_job_config:
if job_config:
@@ -201,8 +217,23 @@ def executemany(self, operation, seq_of_parameters):
seq_of_parameters (Union[Sequence[Mapping[str, Any], Sequence[Any]]]):
Sequence of many sets of parameter values.
"""
- for parameters in seq_of_parameters:
- self.execute(operation, parameters)
+ if seq_of_parameters:
+ rowcount = 0
+ # There's no reason to format the line more than once, as
+ # the operation only barely depends on the parameters. So
+ # we just use the first set of parameters. If there are
+ # different numbers or types of parameters, we'll error
+ # anyway.
+ formatted_operation, parameter_types = _format_operation(
+ operation, seq_of_parameters[0]
+ )
+ for parameters in seq_of_parameters:
+ self._execute(
+ formatted_operation, parameters, None, None, parameter_types
+ )
+ rowcount += self.rowcount
+
+ self.rowcount = rowcount
def _try_fetch(self, size=None):
"""Try to start fetching data, if not yet started.
@@ -255,6 +286,12 @@ def _bqstorage_fetch(self, bqstorage_client):
table=table_reference.to_bqstorage(),
data_format=bigquery_storage.types.DataFormat.ARROW,
)
+
+ if _ARROW_COMPRESSION_SUPPORT:
+ requested_session.read_options.arrow_serialization_options.buffer_compression = (
+ ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
+ )
+
read_session = bqstorage_client.create_read_session(
parent="projects/{}".format(table_reference.project),
read_session=requested_session,
@@ -351,6 +388,10 @@ def setinputsizes(self, sizes):
def setoutputsize(self, size, column=None):
"""No-op, but for consistency raise an error if cursor is closed."""
+ def __iter__(self):
+ self._try_fetch()
+ return iter(self._query_data)
+
def _format_operation_list(operation, parameters):
"""Formats parameters in operation in the way BigQuery expects.
@@ -375,7 +416,7 @@ def _format_operation_list(operation, parameters):
try:
return operation % tuple(formatted_params)
- except TypeError as exc:
+ except (TypeError, ValueError) as exc:
raise exceptions.ProgrammingError(exc)
@@ -405,11 +446,11 @@ def _format_operation_dict(operation, parameters):
try:
return operation % formatted_params
- except KeyError as exc:
+ except (KeyError, ValueError, TypeError) as exc:
raise exceptions.ProgrammingError(exc)
-def _format_operation(operation, parameters=None):
+def _format_operation(operation, parameters):
"""Formats parameters in operation in way BigQuery expects.
Args:
@@ -427,9 +468,93 @@ def _format_operation(operation, parameters=None):
``parameters`` argument.
"""
if parameters is None or len(parameters) == 0:
- return operation
+ return operation.replace("%%", "%"), None # Still do percent de-escaping.
+
+ operation, parameter_types = _extract_types(operation)
+ if parameter_types is None:
+ raise exceptions.ProgrammingError(
+ f"Parameters were provided, but {repr(operation)} has no placeholders."
+ )
if isinstance(parameters, collections_abc.Mapping):
- return _format_operation_dict(operation, parameters)
+ return _format_operation_dict(operation, parameters), parameter_types
+
+ return _format_operation_list(operation, parameters), parameter_types
+
+
+def _extract_types(
+ operation,
+ extra_type_sub=re.compile(
+ r"""
+ (%*) # Extra %s. We'll deal with these in the replacement code
+
+ % # Beginning of replacement, %s, %(...)s
+
+ (?:\( # Begin of optional name and/or type
+ ([^:)]*) # name
+ (?:: # ':' introduces type
+ ( # start of type group
+ [a-zA-Z0-9<>, ]+ # First part, no parens
+
+ (?: # start sets of parens + non-paren text
+ \([0-9 ,]+\) # comma-separated groups of digits in parens
+ # (e.g. string(10))
+ (?=[, >)]) # Must be followed by ,>) or space
+ [a-zA-Z0-9<>, ]* # Optional non-paren chars
+ )* # Can be zero or more of parens and following text
+ ) # end of type group
+ )? # close type clause ":type"
+ \))? # End of optional name and/or type
+
+ s # End of replacement
+ """,
+ re.VERBOSE,
+ ).sub,
+):
+ """Remove type information from parameter placeholders.
+
+ For every parameter of the form %(name:type)s, replace with %(name)s and add the
+ item name->type to dict that's returned.
+
+ Returns operation without type information and a dictionary of names and types.
+ """
+ parameter_types = None
+
+ def repl(m):
+ nonlocal parameter_types
+ prefix, name, type_ = m.groups()
+ if len(prefix) % 2:
+ # The prefix has an odd number of %s, the last of which
+ # escapes the % we're looking for, so we don't want to
+ # change anything.
+ return m.group(0)
+
+ try:
+ if name:
+ if not parameter_types:
+ parameter_types = {}
+ if type_:
+ if name in parameter_types:
+ if type_ != parameter_types[name]:
+ raise exceptions.ProgrammingError(
+ f"Conflicting types for {name}: "
+ f"{parameter_types[name]} and {type_}."
+ )
+ else:
+ parameter_types[name] = type_
+ else:
+ if not isinstance(parameter_types, dict):
+ raise TypeError()
+
+ return f"{prefix}%({name})s"
+ else:
+ if parameter_types is None:
+ parameter_types = []
+ parameter_types.append(type_)
+ return f"{prefix}%s"
+ except (AttributeError, TypeError):
+ raise exceptions.ProgrammingError(
+ f"{repr(operation)} mixes named and unamed parameters."
+ )
- return _format_operation_list(operation, parameters)
+ return extra_type_sub(repl, operation), parameter_types
diff --git a/google/cloud/bigquery/dbapi/types.py b/google/cloud/bigquery/dbapi/types.py
index 14917820c..717593ae1 100644
--- a/google/cloud/bigquery/dbapi/types.py
+++ b/google/cloud/bigquery/dbapi/types.py
@@ -30,16 +30,28 @@
TimestampFromTicks = datetime.datetime.fromtimestamp
-def Binary(string):
+def Binary(data):
"""Contruct a DB-API binary value.
Args:
- string (str): A string to encode as a binary value.
+ data (bytes-like): An object containing binary data and that
+ can be converted to bytes with the `bytes` builtin.
Returns:
- bytes: The UTF-8 encoded bytes representing the string.
+ bytes: The binary data as a bytes object.
"""
- return string.encode("utf-8")
+ if isinstance(data, int):
+ # This is not the conversion we're looking for, because it
+ # will simply create a bytes object of the given size.
+ raise TypeError("cannot convert `int` object to binary")
+
+ try:
+ return bytes(data)
+ except TypeError:
+ if isinstance(data, str):
+ return data.encode("utf-8")
+ else:
+ raise
def TimeFromTicks(ticks, tz=None):
@@ -78,7 +90,7 @@ def __eq__(self, other):
STRING = "STRING"
BINARY = _DBAPITypeObject("BYTES", "RECORD", "STRUCT")
NUMBER = _DBAPITypeObject(
- "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BOOLEAN", "BOOL"
+ "INTEGER", "INT64", "FLOAT", "FLOAT64", "NUMERIC", "BIGNUMERIC", "BOOLEAN", "BOOL"
)
DATETIME = _DBAPITypeObject("TIMESTAMP", "DATE", "TIME", "DATETIME")
ROWID = "ROWID"
diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py
index 2268808fd..d67cebd4c 100644
--- a/google/cloud/bigquery/enums.py
+++ b/google/cloud/bigquery/enums.py
@@ -18,6 +18,14 @@
import itertools
from google.cloud.bigquery_v2 import types as gapic_types
+from google.cloud.bigquery.query import ScalarQueryParameterType
+
+
+class AutoRowIDs(enum.Enum):
+ """How to handle automatic insert IDs when inserting rows as a stream."""
+
+ DISABLED = enum.auto()
+ GENERATE_UUID = enum.auto()
class Compression(object):
@@ -41,6 +49,24 @@ class Compression(object):
"""Specifies no compression."""
+class DecimalTargetType:
+ """The data types that could be used as a target type when converting decimal values.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#DecimalTargetType
+
+ .. versionadded:: 2.21.0
+ """
+
+ NUMERIC = "NUMERIC"
+ """Decimal values could be converted to NUMERIC type."""
+
+ BIGNUMERIC = "BIGNUMERIC"
+ """Decimal values could be converted to BIGNUMERIC type."""
+
+ STRING = "STRING"
+ """Decimal values could be converted to STRING type."""
+
+
class CreateDisposition(object):
"""Specifies whether the job is allowed to create new tables. The default
value is :attr:`CREATE_IF_NEEDED`.
@@ -72,6 +98,9 @@ class DestinationFormat(object):
AVRO = "AVRO"
"""Specifies Avro format."""
+ PARQUET = "PARQUET"
+ """Specifies Parquet format."""
+
class Encoding(object):
"""The character encoding of the data. The default is :attr:`UTF_8`.
@@ -138,6 +167,19 @@ class SourceFormat(object):
"""Specifies Orc format."""
+class KeyResultStatementKind:
+ """Determines which statement in the script represents the "key result".
+
+ The "key result" is used to populate the schema and query results of the script job.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#keyresultstatementkind
+ """
+
+ KEY_RESULT_STATEMENT_KIND_UNSPECIFIED = "KEY_RESULT_STATEMENT_KIND_UNSPECIFIED"
+ LAST = "LAST"
+ FIRST_SELECT = "FIRST_SELECT"
+
+
_SQL_SCALAR_TYPES = frozenset(
(
"INT64",
@@ -149,9 +191,11 @@ class SourceFormat(object):
"DATE",
"TIME",
"DATETIME",
+ "INTERVAL",
"GEOGRAPHY",
"NUMERIC",
"BIGNUMERIC",
+ "JSON",
)
)
@@ -199,8 +243,8 @@ class SqlTypeNames(str, enum.Enum):
INT64 = "INTEGER"
FLOAT = "FLOAT"
FLOAT64 = "FLOAT"
- NUMERIC = "NUMERIC"
- BIGNUMERIC = "BIGNUMERIC"
+ DECIMAL = NUMERIC = "NUMERIC"
+ BIGDECIMAL = BIGNUMERIC = "BIGNUMERIC"
BOOLEAN = "BOOLEAN"
BOOL = "BOOLEAN"
GEOGRAPHY = "GEOGRAPHY" # NOTE: not available in legacy types
@@ -212,6 +256,28 @@ class SqlTypeNames(str, enum.Enum):
DATETIME = "DATETIME"
+class SqlParameterScalarTypes:
+ """Supported scalar SQL query parameter types as type objects."""
+
+ BOOL = ScalarQueryParameterType("BOOL")
+ BOOLEAN = ScalarQueryParameterType("BOOL")
+ BIGDECIMAL = ScalarQueryParameterType("BIGNUMERIC")
+ BIGNUMERIC = ScalarQueryParameterType("BIGNUMERIC")
+ BYTES = ScalarQueryParameterType("BYTES")
+ DATE = ScalarQueryParameterType("DATE")
+ DATETIME = ScalarQueryParameterType("DATETIME")
+ DECIMAL = ScalarQueryParameterType("NUMERIC")
+ FLOAT = ScalarQueryParameterType("FLOAT64")
+ FLOAT64 = ScalarQueryParameterType("FLOAT64")
+ GEOGRAPHY = ScalarQueryParameterType("GEOGRAPHY")
+ INT64 = ScalarQueryParameterType("INT64")
+ INTEGER = ScalarQueryParameterType("INT64")
+ NUMERIC = ScalarQueryParameterType("NUMERIC")
+ STRING = ScalarQueryParameterType("STRING")
+ TIME = ScalarQueryParameterType("TIME")
+ TIMESTAMP = ScalarQueryParameterType("TIMESTAMP")
+
+
class WriteDisposition(object):
"""Specifies the action that occurs if destination table already exists.
@@ -231,3 +297,20 @@ class WriteDisposition(object):
WRITE_EMPTY = "WRITE_EMPTY"
"""If the table already exists and contains data, a 'duplicate' error is
returned in the job result."""
+
+
+class DeterminismLevel:
+ """Specifies determinism level for JavaScript user-defined functions (UDFs).
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#DeterminismLevel
+ """
+
+ DETERMINISM_LEVEL_UNSPECIFIED = "DETERMINISM_LEVEL_UNSPECIFIED"
+ """The determinism of the UDF is unspecified."""
+
+ DETERMINISTIC = "DETERMINISTIC"
+ """The UDF is deterministic, meaning that 2 function calls with the same inputs
+ always produce the same result, even across 2 query runs."""
+
+ NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
+ """The UDF is not deterministic."""
diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py
new file mode 100644
index 000000000..6e5c27eb1
--- /dev/null
+++ b/google/cloud/bigquery/exceptions.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class BigQueryError(Exception):
+ """Base class for all custom exceptions defined by the BigQuery client."""
+
+
+class LegacyBigQueryStorageError(BigQueryError):
+ """Raised when too old a version of BigQuery Storage extra is detected at runtime."""
diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py
index 112dfdba4..f1692ba50 100644
--- a/google/cloud/bigquery/external_config.py
+++ b/google/cloud/bigquery/external_config.py
@@ -22,11 +22,13 @@
import base64
import copy
+from typing import FrozenSet, Iterable, Optional
from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
+from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.schema import SchemaField
@@ -53,6 +55,12 @@ class ExternalSourceFormat(object):
DATASTORE_BACKUP = "DATASTORE_BACKUP"
"""Specifies datastore backup format"""
+ ORC = "ORC"
+ """Specifies ORC format."""
+
+ PARQUET = "PARQUET"
+ """Specifies Parquet format."""
+
BIGTABLE = "BIGTABLE"
"""Specifies Bigtable format."""
@@ -149,7 +157,7 @@ def type_(self):
def type_(self, value):
self._properties["type"] = value
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -159,7 +167,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "BigtableColumn":
"""Factory: construct a :class:`~.external_config.BigtableColumn`
instance given its API representation.
@@ -251,7 +259,7 @@ def columns(self):
def columns(self, value):
self._properties["columns"] = [col.to_api_repr() for col in value]
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -261,7 +269,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
"""Factory: construct a :class:`~.external_config.BigtableColumnFamily`
instance given its API representation.
@@ -333,7 +341,7 @@ def column_families(self):
def column_families(self, value):
self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -343,7 +351,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "BigtableOptions":
"""Factory: construct a :class:`~.external_config.BigtableOptions`
instance given its API representation.
@@ -450,7 +458,7 @@ def skip_leading_rows(self):
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -459,7 +467,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "CSVOptions":
"""Factory: construct a :class:`~.external_config.CSVOptions` instance
given its API representation.
@@ -513,7 +521,7 @@ def range(self):
def range(self, value):
self._properties["range"] = value
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -522,7 +530,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
"""Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
instance given its API representation.
@@ -540,7 +548,7 @@ def from_api_repr(cls, resource):
return config
-_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions)
+_OPTION_CLASSES = (BigtableOptions, CSVOptions, GoogleSheetsOptions, ParquetOptions)
class HivePartitioningOptions(object):
@@ -601,7 +609,7 @@ def require_partition_filter(self):
def require_partition_filter(self, value):
self._properties["requirePartitionFilter"] = value
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -610,7 +618,7 @@ def to_api_repr(self):
return copy.deepcopy(self._properties)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
"""Factory: construct a :class:`~.external_config.HivePartitioningOptions`
instance given its API representation.
@@ -686,6 +694,28 @@ def compression(self):
def compression(self, value):
self._properties["compression"] = value
+ @property
+ def decimal_target_types(self) -> Optional[FrozenSet[str]]:
+ """Possible SQL data types to which the source decimal values are converted.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
+
+ .. versionadded:: 2.21.0
+ """
+ prop = self._properties.get("decimalTargetTypes")
+ if prop is not None:
+ prop = frozenset(prop)
+ return prop
+
+ @decimal_target_types.setter
+ def decimal_target_types(self, value: Optional[Iterable[str]]):
+ if value is not None:
+ self._properties["decimalTargetTypes"] = list(value)
+ else:
+ if "decimalTargetTypes" in self._properties:
+ del self._properties["decimalTargetTypes"]
+
@property
def hive_partitioning(self):
"""Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
@@ -760,6 +790,23 @@ def schema(self):
prop = self._properties.get("schema", {})
return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
+ @property
+ def connection_id(self):
+ """Optional[str]: [Experimental] ID of a BigQuery Connection API
+ resource.
+
+ .. WARNING::
+
+ This feature is experimental. Pre-GA features may have limited
+ support, and changes to pre-GA features may not be compatible with
+ other pre-GA versions.
+ """
+ return self._properties.get("connectionId")
+
+ @connection_id.setter
+ def connection_id(self, value):
+ self._properties["connectionId"] = value
+
@schema.setter
def schema(self, value):
prop = value
@@ -767,7 +814,26 @@ def schema(self, value):
prop = {"fields": [field.to_api_repr() for field in value]}
self._properties["schema"] = prop
- def to_api_repr(self):
+ @property
+ def parquet_options(self):
+ """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
+ properties to set if ``sourceFormat`` is set to PARQUET.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
+ """
+ if self.source_format != ExternalSourceFormat.PARQUET:
+ return None
+ return self._options
+
+ @parquet_options.setter
+ def parquet_options(self, value):
+ if self.source_format != ExternalSourceFormat.PARQUET:
+ msg = f"Cannot set Parquet options, source format is {self.source_format}"
+ raise TypeError(msg)
+ self._options = value
+
+ def to_api_repr(self) -> dict:
"""Build an API representation of this object.
Returns:
@@ -782,7 +848,7 @@ def to_api_repr(self):
return config
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
"""Factory: construct an :class:`~.external_config.ExternalConfig`
instance given its API representation.
diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py
new file mode 100644
index 000000000..2c9a2ce20
--- /dev/null
+++ b/google/cloud/bigquery/format_options.py
@@ -0,0 +1,80 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+from typing import Dict
+
+
+class ParquetOptions:
+ """Additional options if the PARQUET source format is used."""
+
+ _SOURCE_FORMAT = "PARQUET"
+ _RESOURCE_NAME = "parquetOptions"
+
+ def __init__(self):
+ self._properties = {}
+
+ @property
+ def enum_as_string(self) -> bool:
+ """Indicates whether to infer Parquet ENUM logical type as STRING instead of
+ BYTES by default.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enum_as_string
+ """
+ return self._properties.get("enumAsString")
+
+ @enum_as_string.setter
+ def enum_as_string(self, value: bool) -> None:
+ self._properties["enumAsString"] = value
+
+ @property
+ def enable_list_inference(self) -> bool:
+ """Indicates whether to use schema inference specifically for Parquet LIST
+ logical type.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ParquetOptions.FIELDS.enable_list_inference
+ """
+ return self._properties.get("enableListInference")
+
+ @enable_list_inference.setter
+ def enable_list_inference(self, value: bool) -> None:
+ self._properties["enableListInference"] = value
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions":
+ """Factory: construct an instance from a resource dict.
+
+ Args:
+ resource (Dict[str, bool]):
+ Definition of a :class:`~.format_options.ParquetOptions` instance in
+ the same representation as is returned from the API.
+
+ Returns:
+ :class:`~.format_options.ParquetOptions`:
+ Configuration parsed from ``resource``.
+ """
+ config = cls()
+ config._properties = copy.deepcopy(resource)
+ return config
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, bool]:
+ A dictionary in the format used by the BigQuery API.
+ """
+ return copy.deepcopy(self._properties)
diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py
index 26ecf8d3c..f51311b0b 100644
--- a/google/cloud/bigquery/job/__init__.py
+++ b/google/cloud/bigquery/job/__init__.py
@@ -19,20 +19,25 @@
from google.cloud.bigquery.job.base import _DONE_STATE
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
+from google.cloud.bigquery.job.base import ReservationUsage
from google.cloud.bigquery.job.base import ScriptStatistics
from google.cloud.bigquery.job.base import ScriptStackFrame
+from google.cloud.bigquery.job.base import TransactionInfo
from google.cloud.bigquery.job.base import UnknownJob
from google.cloud.bigquery.job.copy_ import CopyJob
from google.cloud.bigquery.job.copy_ import CopyJobConfig
+from google.cloud.bigquery.job.copy_ import OperationType
from google.cloud.bigquery.job.extract import ExtractJob
from google.cloud.bigquery.job.extract import ExtractJobConfig
from google.cloud.bigquery.job.load import LoadJob
from google.cloud.bigquery.job.load import LoadJobConfig
from google.cloud.bigquery.job.query import _contains_order_by
+from google.cloud.bigquery.job.query import DmlStats
from google.cloud.bigquery.job.query import QueryJob
from google.cloud.bigquery.job.query import QueryJobConfig
from google.cloud.bigquery.job.query import QueryPlanEntry
from google.cloud.bigquery.job.query import QueryPlanEntryStep
+from google.cloud.bigquery.job.query import ScriptOptions
from google.cloud.bigquery.job.query import TimelineEntry
from google.cloud.bigquery.enums import Compression
from google.cloud.bigquery.enums import CreateDisposition
@@ -51,20 +56,24 @@
"_DONE_STATE",
"_JobConfig",
"_JobReference",
+ "ReservationUsage",
"ScriptStatistics",
"ScriptStackFrame",
"UnknownJob",
"CopyJob",
"CopyJobConfig",
+ "OperationType",
"ExtractJob",
"ExtractJobConfig",
"LoadJob",
"LoadJobConfig",
"_contains_order_by",
+ "DmlStats",
"QueryJob",
"QueryJobConfig",
"QueryPlanEntry",
"QueryPlanEntryStep",
+ "ScriptOptions",
"TimelineEntry",
"Compression",
"CreateDisposition",
@@ -73,5 +82,6 @@
"QueryPriority",
"SchemaUpdateOption",
"SourceFormat",
+ "TransactionInfo",
"WriteDisposition",
]
diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py
index 5ba01aa67..e5fc592a6 100644
--- a/google/cloud/bigquery/job/base.py
+++ b/google/cloud/bigquery/job/base.py
@@ -14,9 +14,12 @@
"""Base classes and helpers for job classes."""
+from collections import namedtuple
import copy
import http
import threading
+import typing
+from typing import Dict, Optional
from google.api_core import exceptions
import google.api_core.future.polling
@@ -24,6 +27,9 @@
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.retry import DEFAULT_RETRY
+if typing.TYPE_CHECKING: # pragma: NO COVER
+ from google.api_core import retry as retries
+
_DONE_STATE = "DONE"
_STOPPED_REASON = "stopped"
@@ -73,6 +79,32 @@ def _error_result_to_exception(error_result):
)
+ReservationUsage = namedtuple("ReservationUsage", "name slot_ms")
+ReservationUsage.__doc__ = "Job resource usage for a reservation."
+ReservationUsage.name.__doc__ = (
+ 'Reservation name or "unreserved" for on-demand resources usage.'
+)
+ReservationUsage.slot_ms.__doc__ = (
+ "Total slot milliseconds used by the reservation for a particular job."
+)
+
+
+class TransactionInfo(typing.NamedTuple):
+ """[Alpha] Information of a multi-statement transaction.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#TransactionInfo
+
+ .. versionadded:: 2.24.0
+ """
+
+ transaction_id: str
+ """Output only. ID of the transaction."""
+
+ @classmethod
+ def from_api_repr(cls, transaction_info: Dict[str, str]) -> "TransactionInfo":
+ return cls(transaction_info["transactionId"])
+
+
class _JobReference(object):
"""A reference to a job.
@@ -305,6 +337,34 @@ def _job_statistics(self):
statistics = self._properties.get("statistics", {})
return statistics.get(self._JOB_TYPE, {})
+ @property
+ def reservation_usage(self):
+ """Job resource usage breakdown by reservation.
+
+ Returns:
+ List[google.cloud.bigquery.job.ReservationUsage]:
+ Reservation usage stats. Can be empty if not set from the server.
+ """
+ usage_stats_raw = _helpers._get_sub_prop(
+ self._properties, ["statistics", "reservationUsage"], default=()
+ )
+ return [
+ ReservationUsage(name=usage["name"], slot_ms=int(usage["slotMs"]))
+ for usage in usage_stats_raw
+ ]
+
+ @property
+ def transaction_info(self) -> Optional[TransactionInfo]:
+ """Information of the multi-statement transaction if this job is part of one.
+
+ .. versionadded:: 2.24.0
+ """
+ info = self._properties.get("statistics", {}).get("transactionInfo")
+ if info is None:
+ return None
+ else:
+ return TransactionInfo.from_api_repr(info)
+
@property
def error_result(self):
"""Error information about the job as a whole.
@@ -439,7 +499,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
)
self._set_properties(api_response)
- def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None):
+ def exists(
+ self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ ) -> bool:
"""API call: test for the existence of the job via a GET request
See
@@ -482,7 +544,9 @@ def exists(self, client=None, retry=DEFAULT_RETRY, timeout=None):
else:
return True
- def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None):
+ def reload(
+ self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ ):
"""API call: refresh job properties via a GET request.
See
@@ -517,7 +581,9 @@ def reload(self, client=None, retry=DEFAULT_RETRY, timeout=None):
)
self._set_properties(api_response)
- def cancel(self, client=None, retry=DEFAULT_RETRY, timeout=None):
+ def cancel(
+ self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ ) -> bool:
"""API call: cancel job via a POST request
See
@@ -583,11 +649,18 @@ def _set_future_result(self):
else:
self.set_result(self)
- def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True):
+ def done(
+ self,
+ retry: "retries.Retry" = DEFAULT_RETRY,
+ timeout: float = None,
+ reload: bool = True,
+ ) -> bool:
"""Checks if the job is complete.
Args:
- retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC. If the job state is ``DONE``, retrying is aborted
+ early, as the job will not change anymore.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
@@ -604,11 +677,15 @@ def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True):
self.reload(retry=retry, timeout=timeout)
return self.state == _DONE_STATE
- def result(self, retry=DEFAULT_RETRY, timeout=None):
+ def result(
+ self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ ) -> "_AsyncJob":
"""Start the job and wait for it to complete and get the result.
Args:
- retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC. If the job state is ``DONE``, retrying is aborted
+ early, as the job will not change anymore.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
@@ -757,7 +834,7 @@ def _del_sub_prop(self, key):
"""
_helpers._del_sub_prop(self._properties, [self._job_type, key])
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Build an API representation of the job config.
Returns:
@@ -787,7 +864,10 @@ def _fill_from_default(self, default_job_config):
+ repr(default_job_config._job_type)
)
- new_job_config = self.__class__()
+ # cls is one of the job config subclasses that provides the job_type argument to
+ # this base class on instantiation, thus missing-parameter warning is a false
+ # positive here.
+ new_job_config = self.__class__() # pytype: disable=missing-parameter
default_job_properties = copy.deepcopy(default_job_config._properties)
for key in self._properties:
@@ -800,7 +880,7 @@ def _fill_from_default(self, default_job_config):
return new_job_config
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "_JobConfig":
"""Factory: construct a job configuration given its API representation
Args:
@@ -811,7 +891,10 @@ def from_api_repr(cls, resource):
Returns:
google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``.
"""
- job_config = cls()
+ # cls is one of the job config subclasses that provides the job_type argument to
+ # this base class on instantiation, thus missing-parameter warning is a false
+ # positive here.
+ job_config = cls() # pytype: disable=missing-parameter
job_config._properties = resource
return job_config
@@ -898,7 +981,7 @@ class UnknownJob(_AsyncJob):
"""A job whose type cannot be determined."""
@classmethod
- def from_api_repr(cls, resource, client):
+ def from_api_repr(cls, resource: dict, client) -> "UnknownJob":
"""Construct an UnknownJob from the JSON representation.
Args:
diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py
index 95f4b613b..c6ee98944 100644
--- a/google/cloud/bigquery/job/copy_.py
+++ b/google/cloud/bigquery/job/copy_.py
@@ -14,6 +14,8 @@
"""Classes for copy jobs."""
+from typing import Optional
+
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.table import TableReference
@@ -23,6 +25,25 @@
from google.cloud.bigquery.job.base import _JobReference
+class OperationType:
+ """Different operation types supported in table copy job.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#operationtype
+ """
+
+ OPERATION_TYPE_UNSPECIFIED = "OPERATION_TYPE_UNSPECIFIED"
+ """Unspecified operation type."""
+
+ COPY = "COPY"
+ """The source and destination table have the same table type."""
+
+ SNAPSHOT = "SNAPSHOT"
+ """The source table type is TABLE and the destination table type is SNAPSHOT."""
+
+ RESTORE = "RESTORE"
+ """The source table type is SNAPSHOT and the destination table type is TABLE."""
+
+
class CopyJobConfig(_JobConfig):
"""Configuration options for copy jobs.
@@ -85,6 +106,23 @@ def destination_encryption_configuration(self, value):
api_repr = value.to_api_repr()
self._set_sub_prop("destinationEncryptionConfiguration", api_repr)
+ @property
+ def operation_type(self) -> str:
+ """The operation to perform with this copy job.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationTableCopy.FIELDS.operation_type
+ """
+ return self._get_sub_prop(
+ "operationType", OperationType.OPERATION_TYPE_UNSPECIFIED
+ )
+
+ @operation_type.setter
+ def operation_type(self, value: Optional[str]):
+ if value is None:
+ value = OperationType.OPERATION_TYPE_UNSPECIFIED
+ self._set_sub_prop("operationType", value)
+
class CopyJob(_AsyncJob):
"""Asynchronous job: copy data into a table from other tables.
diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py
index a6e262a32..3373bcdef 100644
--- a/google/cloud/bigquery/job/extract.py
+++ b/google/cloud/bigquery/job/extract.py
@@ -241,7 +241,7 @@ def to_api_repr(self):
}
@classmethod
- def from_api_repr(cls, resource, client):
+ def from_api_repr(cls, resource: dict, client) -> "ExtractJob":
"""Factory: construct a job given its API representation
.. note:
diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py
index e784af0a6..aee055c1c 100644
--- a/google/cloud/bigquery/job/load.py
+++ b/google/cloud/bigquery/job/load.py
@@ -14,15 +14,17 @@
"""Classes for load jobs."""
+from typing import FrozenSet, List, Iterable, Optional
+
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
from google.cloud.bigquery.external_config import HivePartitioningOptions
+from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.schema import _to_schema_fields
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TimePartitioning
-
from google.cloud.bigquery.job.base import _AsyncJob
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
@@ -120,6 +122,27 @@ def create_disposition(self):
def create_disposition(self, value):
self._set_sub_prop("createDisposition", value)
+ @property
+ def decimal_target_types(self) -> Optional[FrozenSet[str]]:
+ """Possible SQL data types to which the source decimal values are converted.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.decimal_target_types
+
+ .. versionadded:: 2.21.0
+ """
+ prop = self._get_sub_prop("decimalTargetTypes")
+ if prop is not None:
+ prop = frozenset(prop)
+ return prop
+
+ @decimal_target_types.setter
+ def decimal_target_types(self, value: Optional[Iterable[str]]):
+ if value is not None:
+ self._set_sub_prop("decimalTargetTypes", list(value))
+ else:
+ self._del_sub_prop("decimalTargetTypes")
+
@property
def destination_encryption_configuration(self):
"""Optional[google.cloud.bigquery.encryption_configuration.EncryptionConfiguration]: Custom
@@ -147,7 +170,7 @@ def destination_encryption_configuration(self, value):
@property
def destination_table_description(self):
- """Optional[str]: Name given to destination table.
+ """Optional[str]: Description of the destination table.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
@@ -276,6 +299,27 @@ def null_marker(self):
def null_marker(self, value):
self._set_sub_prop("nullMarker", value)
+ @property
+ def projection_fields(self) -> Optional[List[str]]:
+ """Optional[List[str]]: If
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.source_format` is set to
+ "DATASTORE_BACKUP", indicates which entity properties to load into
+ BigQuery from a Cloud Datastore backup.
+
+ Property names are case sensitive and must be top-level properties. If
+ no properties are specified, BigQuery loads all properties. If any
+ named property isn't found in the Cloud Datastore backup, an invalid
+ error is returned in the job result.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.projection_fields
+ """
+ return self._get_sub_prop("projectionFields")
+
+ @projection_fields.setter
+ def projection_fields(self, value: Optional[List[str]]):
+ self._set_sub_prop("projectionFields", value)
+
@property
def quote_character(self):
"""Optional[str]: Character used to quote data sections (CSV only).
@@ -439,6 +483,26 @@ def write_disposition(self):
def write_disposition(self, value):
self._set_sub_prop("writeDisposition", value)
+ @property
+ def parquet_options(self):
+ """Optional[google.cloud.bigquery.format_options.ParquetOptions]: Additional
+ properties to set if ``sourceFormat`` is set to PARQUET.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.parquet_options
+ """
+ prop = self._get_sub_prop("parquetOptions")
+ if prop is not None:
+ prop = ParquetOptions.from_api_repr(prop)
+ return prop
+
+ @parquet_options.setter
+ def parquet_options(self, value):
+ if value is not None:
+ self._set_sub_prop("parquetOptions", value.to_api_repr())
+ else:
+ self._del_sub_prop("parquetOptions")
+
class LoadJob(_AsyncJob):
"""Asynchronous job for loading data into a table.
@@ -733,7 +797,7 @@ def to_api_repr(self):
}
@classmethod
- def from_api_repr(cls, resource, client):
+ def from_api_repr(cls, resource: dict, client) -> "LoadJob":
"""Factory: construct a job given its API representation
.. note:
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index d87f87f52..0cb4798be 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -17,14 +17,18 @@
import concurrent.futures
import copy
import re
+import typing
+from typing import Any, Dict, Optional, Union
from google.api_core import exceptions
+from google.api_core.future import polling as polling_future
import requests
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.query import _query_param_from_api_repr
@@ -32,7 +36,7 @@
from google.cloud.bigquery.query import ScalarQueryParameter
from google.cloud.bigquery.query import StructQueryParameter
from google.cloud.bigquery.query import UDFResource
-from google.cloud.bigquery.retry import DEFAULT_RETRY
+from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.table import _EmptyRowIterator
from google.cloud.bigquery.table import RangePartitioning
@@ -42,10 +46,19 @@
from google.cloud.bigquery._tqdm_helpers import wait_for_query
from google.cloud.bigquery.job.base import _AsyncJob
-from google.cloud.bigquery.job.base import _DONE_STATE
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
+if typing.TYPE_CHECKING: # pragma: NO COVER
+ # Assumption: type checks are only used by library developers and CI environments
+ # that have all optional dependencies installed, thus no conditional imports.
+ import pandas
+ import geopandas
+ import pyarrow
+ from google.api_core import retry as retries
+ from google.cloud import bigquery_storage
+ from google.cloud.bigquery.table import RowIterator
+
_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE)
_TIMEOUT_BUFFER_SECS = 0.1
@@ -102,6 +115,111 @@ def _to_api_repr_table_defs(value):
return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}
+class DmlStats(typing.NamedTuple):
+ """Detailed statistics for DML statements.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats
+ """
+
+ inserted_row_count: int = 0
+ """Number of inserted rows. Populated by DML INSERT and MERGE statements."""
+
+ deleted_row_count: int = 0
+ """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements.
+ """
+
+ updated_row_count: int = 0
+ """Number of updated rows. Populated by DML UPDATE and MERGE statements."""
+
+ @classmethod
+ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":
+ # NOTE: The field order here must match the order of fields set at the
+ # class level.
+ api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount")
+
+ args = (
+ int(stats.get(api_field, default_val))
+ for api_field, default_val in zip(api_fields, cls.__new__.__defaults__)
+ )
+ return cls(*args)
+
+
+class ScriptOptions:
+ """Options controlling the execution of scripts.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ScriptOptions
+ """
+
+ def __init__(
+ self,
+ statement_timeout_ms: Optional[int] = None,
+ statement_byte_budget: Optional[int] = None,
+ key_result_statement: Optional[KeyResultStatementKind] = None,
+ ):
+ self._properties = {}
+ self.statement_timeout_ms = statement_timeout_ms
+ self.statement_byte_budget = statement_byte_budget
+ self.key_result_statement = key_result_statement
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, Any]) -> "ScriptOptions":
+ """Factory: construct instance from the JSON repr.
+
+ Args:
+ resource(Dict[str: Any]):
+ ScriptOptions representation returned from API.
+
+ Returns:
+ google.cloud.bigquery.ScriptOptions:
+ ScriptOptions sample parsed from ``resource``.
+ """
+ entry = cls()
+ entry._properties = copy.deepcopy(resource)
+ return entry
+
+ def to_api_repr(self) -> Dict[str, Any]:
+ """Construct the API resource representation."""
+ return copy.deepcopy(self._properties)
+
+ @property
+ def statement_timeout_ms(self) -> Union[int, None]:
+ """Timeout period for each statement in a script."""
+ return _helpers._int_or_none(self._properties.get("statementTimeoutMs"))
+
+ @statement_timeout_ms.setter
+ def statement_timeout_ms(self, value: Union[int, None]):
+ if value is not None:
+ value = str(value)
+ self._properties["statementTimeoutMs"] = value
+
+ @property
+ def statement_byte_budget(self) -> Union[int, None]:
+ """Limit on the number of bytes billed per statement.
+
+ Exceeding this budget results in an error.
+ """
+ return _helpers._int_or_none(self._properties.get("statementByteBudget"))
+
+ @statement_byte_budget.setter
+ def statement_byte_budget(self, value: Union[int, None]):
+ if value is not None:
+ value = str(value)
+ self._properties["statementByteBudget"] = value
+
+ @property
+ def key_result_statement(self) -> Union[KeyResultStatementKind, None]:
+ """Determines which statement in the script represents the "key result".
+
+ This is used to populate the schema and query results of the script job.
+ Default is ``KeyResultStatementKind.LAST``.
+ """
+ return self._properties.get("keyResultStatement")
+
+ @key_result_statement.setter
+ def key_result_statement(self, value: Union[KeyResultStatementKind, None]):
+ self._properties["keyResultStatement"] = value
+
+
class QueryJobConfig(_JobConfig):
"""Configuration options for query jobs.
@@ -491,7 +609,24 @@ def schema_update_options(self):
def schema_update_options(self, values):
self._set_sub_prop("schemaUpdateOptions", values)
- def to_api_repr(self):
+ @property
+ def script_options(self) -> ScriptOptions:
+ """Connection properties which can modify the query behavior.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions
+ """
+ prop = self._get_sub_prop("scriptOptions")
+ if prop is not None:
+ prop = ScriptOptions.from_api_repr(prop)
+ return prop
+
+ @script_options.setter
+ def script_options(self, value: Union[ScriptOptions, None]):
+ if value is not None:
+ value = value.to_api_repr()
+ self._set_sub_prop("scriptOptions", value)
+
+ def to_api_repr(self) -> dict:
"""Build an API representation of the query job config.
Returns:
@@ -718,7 +853,7 @@ def to_api_repr(self):
}
@classmethod
- def from_api_repr(cls, resource, client):
+ def from_api_repr(cls, resource: dict, client) -> "QueryJob":
"""Factory: construct a job given its API representation
Args:
@@ -974,41 +1109,13 @@ def estimated_bytes_processed(self):
result = int(result)
return result
- def done(self, retry=DEFAULT_RETRY, timeout=None, reload=True):
- """Refresh the job and checks if it is complete.
-
- Args:
- retry (Optional[google.api_core.retry.Retry]):
- How to retry the call that retrieves query results.
- timeout (Optional[float]):
- The number of seconds to wait for the underlying HTTP transport
- before using ``retry``.
- reload (Optional[bool]):
- If ``True``, make an API call to refresh the job state of
- unfinished jobs before checking. Default ``True``.
-
- Returns:
- bool: True if the job is complete, False otherwise.
- """
- # Do not refresh if the state is already done, as the job will not
- # change once complete.
- is_done = self.state == _DONE_STATE
- if not reload or is_done:
- return is_done
-
- self._reload_query_results(retry=retry, timeout=timeout)
-
- # If an explicit timeout is not given, fall back to the transport timeout
- # stored in _blocking_poll() in the process of polling for job completion.
- transport_timeout = timeout if timeout is not None else self._transport_timeout
-
- # Only reload the job once we know the query is complete.
- # This will ensure that fields such as the destination table are
- # correctly populated.
- if self._query_results.complete:
- self.reload(retry=retry, timeout=transport_timeout)
-
- return self.state == _DONE_STATE
+ @property
+ def dml_stats(self) -> Optional[DmlStats]:
+ stats = self._job_statistics().get("dmlStats")
+ if stats is None:
+ return None
+ else:
+ return DmlStats.from_api_repr(stats)
def _blocking_poll(self, timeout=None, **kwargs):
self._done_timeout = timeout
@@ -1072,7 +1179,9 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
exc.query_job = self
raise
- def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None):
+ def _reload_query_results(
+ self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ ):
"""Refresh the cached query results.
Args:
@@ -1111,14 +1220,49 @@ def _reload_query_results(self, retry=DEFAULT_RETRY, timeout=None):
timeout=transport_timeout,
)
+ def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None):
+ """Check if the query has finished running and raise if it's not.
+
+ If the query has finished, also reload the job itself.
+ """
+ # If an explicit timeout is not given, fall back to the transport timeout
+ # stored in _blocking_poll() in the process of polling for job completion.
+ transport_timeout = timeout if timeout is not None else self._transport_timeout
+
+ try:
+ self._reload_query_results(retry=retry, timeout=transport_timeout)
+ except exceptions.GoogleAPIError as exc:
+ # Reloading also updates error details on self, thus no need for an
+ # explicit self.set_exception() call if reloading succeeds.
+ try:
+ self.reload(retry=retry, timeout=transport_timeout)
+ except exceptions.GoogleAPIError:
+ # Use the query results reload exception, as it generally contains
+ # much more useful error information.
+ self.set_exception(exc)
+ finally:
+ return
+
+ # Only reload the job once we know the query is complete.
+ # This will ensure that fields such as the destination table are
+ # correctly populated.
+ if not self._query_results.complete:
+ raise polling_future._OperationNotComplete()
+ else:
+ try:
+ self.reload(retry=retry, timeout=transport_timeout)
+ except exceptions.GoogleAPIError as exc:
+ self.set_exception(exc)
+
def result(
self,
- page_size=None,
- max_results=None,
- retry=DEFAULT_RETRY,
- timeout=None,
- start_index=None,
- ):
+ page_size: int = None,
+ max_results: int = None,
+ retry: "retries.Retry" = DEFAULT_RETRY,
+ timeout: float = None,
+ start_index: int = None,
+ job_retry: "retries.Retry" = DEFAULT_JOB_RETRY,
+ ) -> Union["RowIterator", _EmptyRowIterator]:
"""Start the job and wait for it to complete and get the result.
Args:
@@ -1128,7 +1272,13 @@ def result(
max_results (Optional[int]):
The maximum total number of rows from this request.
retry (Optional[google.api_core.retry.Retry]):
- How to retry the call that retrieves rows.
+ How to retry the call that retrieves rows. This only
+ applies to making RPC calls. It isn't used to retry
+ failed jobs. This has a reasonable default that
+ should only be overridden with care. If the job state
+ is ``DONE``, retrying is aborted early even if the
+ results are not available, as this will not change
+ anymore.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
@@ -1136,6 +1286,16 @@ def result(
applies to each individual request.
start_index (Optional[int]):
The zero-based index of the starting row to read.
+ job_retry (Optional[google.api_core.retry.Retry]):
+ How to retry failed jobs. The default retries
+ rate-limit-exceeded errors. Passing ``None`` disables
+ job retry.
+
+ Not all jobs can be retried. If ``job_id`` was
+ provided to the query that created this job, then the
+ job returned by the query will not be retryable, and
+ an exception will be raised if non-``None``
+ non-default ``job_retry`` is also provided.
Returns:
google.cloud.bigquery.table.RowIterator:
@@ -1151,17 +1311,66 @@ def result(
Raises:
google.cloud.exceptions.GoogleAPICallError:
- If the job failed.
+ If the job failed and retries aren't successful.
concurrent.futures.TimeoutError:
If the job did not complete in the given timeout.
+ TypeError:
+ If Non-``None`` and non-default ``job_retry`` is
+ provided and the job is not retryable.
"""
try:
- super(QueryJob, self).result(retry=retry, timeout=timeout)
+ retry_do_query = getattr(self, "_retry_do_query", None)
+ if retry_do_query is not None:
+ if job_retry is DEFAULT_JOB_RETRY:
+ job_retry = self._job_retry
+ else:
+ if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY:
+ raise TypeError(
+ "`job_retry` was provided, but this job is"
+ " not retryable, because a custom `job_id` was"
+ " provided to the query that created this job."
+ )
+
+ first = True
+
+ def do_get_result():
+ nonlocal first
+
+ if first:
+ first = False
+ else:
+ # Note that we won't get here if retry_do_query is
+ # None, because we won't use a retry.
+
+ # The orinal job is failed. Create a new one.
+ job = retry_do_query()
+
+ # If it's already failed, we might as well stop:
+ if job.done() and job.exception() is not None:
+ raise job.exception()
+
+ # Become the new job:
+ self.__dict__.clear()
+ self.__dict__.update(job.__dict__)
+
+ # This shouldn't be necessary, because once we have a good
+ # job, it should stay good,and we shouldn't have to retry.
+ # But let's be paranoid. :)
+ self._retry_do_query = retry_do_query
+ self._job_retry = job_retry
+
+ super(QueryJob, self).result(retry=retry, timeout=timeout)
+
+ # Since the job could already be "done" (e.g. got a finished job
+ # via client.get_job), the superclass call to done() might not
+ # set the self._query_results cache.
+ self._reload_query_results(retry=retry, timeout=timeout)
+
+ if retry_do_query is not None and job_retry is not None:
+ do_get_result = job_retry(do_get_result)
+
+ do_get_result()
- # Since the job could already be "done" (e.g. got a finished job
- # via client.get_job), the superclass call to done() might not
- # set the self._query_results cache.
- self._reload_query_results(retry=retry, timeout=timeout)
except exceptions.GoogleAPICallError as exc:
exc.message += self._format_for_exception(self.query, self.job_id)
exc.query_job = self
@@ -1193,13 +1402,15 @@ def result(
return rows
# If changing the signature of this method, make sure to apply the same
- # changes to table.RowIterator.to_arrow()
+ # changes to table.RowIterator.to_arrow(), except for the max_results parameter
+ # that should only exist here in the QueryJob method.
def to_arrow(
self,
- progress_bar_type=None,
- bqstorage_client=None,
- create_bqstorage_client=True,
- ):
+ progress_bar_type: str = None,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ create_bqstorage_client: bool = True,
+ max_results: Optional[int] = None,
+ ) -> "pyarrow.Table":
"""[Beta] Create a class:`pyarrow.Table` by loading all pages of a
table or query.
@@ -1240,7 +1451,12 @@ def to_arrow(
This argument does nothing if ``bqstorage_client`` is supplied.
- ..versionadded:: 1.24.0
+ .. versionadded:: 1.24.0
+
+ max_results (Optional[int]):
+ Maximum number of rows to include in the result. No limit by default.
+
+ .. versionadded:: 2.21.0
Returns:
pyarrow.Table
@@ -1252,9 +1468,9 @@ def to_arrow(
ValueError:
If the :mod:`pyarrow` library cannot be imported.
- ..versionadded:: 1.17.0
+ .. versionadded:: 1.17.0
"""
- query_result = wait_for_query(self, progress_bar_type)
+ query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
return query_result.to_arrow(
progress_bar_type=progress_bar_type,
bqstorage_client=bqstorage_client,
@@ -1262,15 +1478,18 @@ def to_arrow(
)
# If changing the signature of this method, make sure to apply the same
- # changes to table.RowIterator.to_dataframe()
+ # changes to table.RowIterator.to_dataframe(), except for the max_results parameter
+ # that should only exist here in the QueryJob method.
def to_dataframe(
self,
- bqstorage_client=None,
- dtypes=None,
- progress_bar_type=None,
- create_bqstorage_client=True,
- date_as_object=True,
- ):
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ max_results: Optional[int] = None,
+ geography_as_object: bool = False,
+ ) -> "pandas.DataFrame":
"""Return a pandas DataFrame from a QueryJob
Args:
@@ -1299,7 +1518,7 @@ def to_dataframe(
:func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
for details.
- ..versionadded:: 1.11.0
+ .. versionadded:: 1.11.0
create_bqstorage_client (Optional[bool]):
If ``True`` (default), create a BigQuery Storage API client
using the default API settings. The BigQuery Storage API
@@ -1308,29 +1527,143 @@ def to_dataframe(
This argument does nothing if ``bqstorage_client`` is supplied.
- ..versionadded:: 1.24.0
+ .. versionadded:: 1.24.0
date_as_object (Optional[bool]):
If ``True`` (default), cast dates to objects. If ``False``, convert
to datetime64[ns] dtype.
- ..versionadded:: 1.26.0
+ .. versionadded:: 1.26.0
+
+ max_results (Optional[int]):
+ Maximum number of rows to include in the result. No limit by default.
+
+ .. versionadded:: 2.21.0
+
+ geography_as_object (Optional[bool]):
+ If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+ geometry objects. If ``False`` (default), don't cast
+ geography data to :mod:`shapely` geometry objects.
+
+ .. versionadded:: 2.24.0
Returns:
- A :class:`~pandas.DataFrame` populated with row data and column
- headers from the query results. The column headers are derived
- from the destination table's schema.
+ pandas.DataFrame:
+ A :class:`~pandas.DataFrame` populated with row data
+ and column headers from the query results. The column
+ headers are derived from the destination table's
+ schema.
Raises:
- ValueError: If the `pandas` library cannot be imported.
+ ValueError:
+ If the :mod:`pandas` library cannot be imported, or
+ the :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported. Also if
+ `geography_as_object` is `True`, but the
+ :mod:`shapely` library cannot be imported.
"""
- query_result = wait_for_query(self, progress_bar_type)
+ query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
return query_result.to_dataframe(
bqstorage_client=bqstorage_client,
dtypes=dtypes,
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
date_as_object=date_as_object,
+ geography_as_object=geography_as_object,
+ )
+
+ # If changing the signature of this method, make sure to apply the same
+ # changes to table.RowIterator.to_dataframe(), except for the max_results parameter
+ # that should only exist here in the QueryJob method.
+ def to_geodataframe(
+ self,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ max_results: Optional[int] = None,
+ geography_column: Optional[str] = None,
+ ) -> "geopandas.GeoDataFrame":
+ """Return a GeoPandas GeoDataFrame from a QueryJob
+
+ Args:
+ bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+ A BigQuery Storage API client. If supplied, use the faster
+ BigQuery Storage API to fetch rows from BigQuery. This
+ API is a billable API.
+
+ This method requires the ``fastavro`` and
+ ``google-cloud-bigquery-storage`` libraries.
+
+ Reading from a specific partition or snapshot is not
+ currently supported by this method.
+
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+ A dictionary of column names pandas ``dtype``s. The provided
+ ``dtype`` is used when constructing the series for the column
+ specified. Otherwise, the default pandas behavior is used.
+
+ progress_bar_type (Optional[str]):
+ If set, use the `tqdm `_ library to
+ display a progress bar while the data downloads. Install the
+ ``tqdm`` package to use this feature.
+
+ See
+ :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`
+ for details.
+
+ .. versionadded:: 1.11.0
+ create_bqstorage_client (Optional[bool]):
+ If ``True`` (default), create a BigQuery Storage API client
+ using the default API settings. The BigQuery Storage API
+ is a faster way to fetch rows from BigQuery. See the
+ ``bqstorage_client`` parameter for more information.
+
+ This argument does nothing if ``bqstorage_client`` is supplied.
+
+ .. versionadded:: 1.24.0
+
+ date_as_object (Optional[bool]):
+ If ``True`` (default), cast dates to objects. If ``False``, convert
+ to datetime64[ns] dtype.
+
+ .. versionadded:: 1.26.0
+
+ max_results (Optional[int]):
+ Maximum number of rows to include in the result. No limit by default.
+
+ .. versionadded:: 2.21.0
+
+ geography_column (Optional[str]):
+ If there are more than one GEOGRAPHY column,
+ identifies which one to use to construct a GeoPandas
+ GeoDataFrame. This option can be ommitted if there's
+ only one GEOGRAPHY column.
+
+ Returns:
+ geopandas.GeoDataFrame:
+ A :class:`geopandas.GeoDataFrame` populated with row
+ data and column headers from the query results. The
+ column headers are derived from the destination
+ table's schema.
+
+ Raises:
+ ValueError:
+ If the :mod:`geopandas` library cannot be imported, or the
+ :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported.
+
+ .. versionadded:: 2.24.0
+ """
+ query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
+ return query_result.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
)
def __iter__(self):
@@ -1350,7 +1683,7 @@ def __init__(self, kind, substeps):
self.substeps = list(substeps)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep":
"""Factory: construct instance from the JSON repr.
Args:
@@ -1380,7 +1713,7 @@ def __init__(self):
self._properties = {}
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "QueryPlanEntry":
"""Factory: construct instance from the JSON repr.
Args:
diff --git a/google/cloud/bigquery/magics/line_arg_parser/lexer.py b/google/cloud/bigquery/magics/line_arg_parser/lexer.py
index 0cb63292c..cd809c389 100644
--- a/google/cloud/bigquery/magics/line_arg_parser/lexer.py
+++ b/google/cloud/bigquery/magics/line_arg_parser/lexer.py
@@ -49,90 +49,45 @@
# the value of an option other than "--params", we do not really care about its
# structure, and thus do not want to use any of the "Python tokens" for pattern matching.
#
-# Since token definition order is important, an OrderedDict is needed with tightly
-# controlled member definitions (i.e. passed as a sequence, and *not* via kwargs).
+# Token definition order is important, thus an OrderedDict is used. In addition, PEP 468
+# guarantees us that the order of kwargs is preserved in Python 3.6+.
token_types = OrderedDict(
- [
- (
- "state_parse_pos_args",
- OrderedDict(
- [
- (
- "GOTO_PARSE_NON_PARAMS_OPTIONS",
- r"(?P(?=--))", # double dash - starting the options list
- ),
- (
- "DEST_VAR",
- r"(?P[^\d\W]\w*)", # essentially a Python ID
- ),
- ]
- ),
- ),
- (
- "state_parse_non_params_options",
- OrderedDict(
- [
- (
- "GOTO_PARSE_PARAMS_OPTION",
- r"(?P(?=--params(?:\s|=|--|$)))", # the --params option
- ),
- ("OPTION_SPEC", r"(?P--\w+)"),
- ("OPTION_EQ", r"(?P=)"),
- ("OPT_VAL", r"(?P\S+?(?=\s|--|$))"),
- ]
- ),
- ),
- (
- "state_parse_params_option",
- OrderedDict(
- [
- (
- "PY_STRING",
- r"(?P(?:{})|(?:{}))".format(
- r"'(?:[^'\\]|\.)*'",
- r'"(?:[^"\\]|\.)*"', # single and double quoted strings
- ),
- ),
- ("PARAMS_OPT_SPEC", r"(?P--params(?=\s|=|--|$))"),
- ("PARAMS_OPT_EQ", r"(?P=)"),
- (
- "GOTO_PARSE_NON_PARAMS_OPTIONS",
- r"(?P(?=--\w+))", # found another option spec
- ),
- ("PY_BOOL", r"(?PTrue|False)"),
- ("DOLLAR_PY_ID", r"(?P\$[^\d\W]\w*)"),
- (
- "PY_NUMBER",
- r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
- ),
- ("SQUOTE", r"(?P')"),
- ("DQUOTE", r'(?P")'),
- ("COLON", r"(?P:)"),
- ("COMMA", r"(?P,)"),
- ("LCURL", r"(?P\{)"),
- ("RCURL", r"(?P})"),
- ("LSQUARE", r"(?P\[)"),
- ("RSQUARE", r"(?P])"),
- ("LPAREN", r"(?P\()"),
- ("RPAREN", r"(?P\))"),
- ]
- ),
- ),
- (
- "common",
- OrderedDict(
- [
- ("WS", r"(?P\s+)"),
- ("EOL", r"(?P$)"),
- (
- # anything not a whitespace or matched by something else
- "UNKNOWN",
- r"(?P\S+)",
- ),
- ]
- ),
+ state_parse_pos_args=OrderedDict(
+ GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--))", # double dash - starting the options list
+ DEST_VAR=r"(?P[^\d\W]\w*)", # essentially a Python ID
+ ),
+ state_parse_non_params_options=OrderedDict(
+ GOTO_PARSE_PARAMS_OPTION=r"(?P(?=--params(?:\s|=|--|$)))", # the --params option
+ OPTION_SPEC=r"(?P--\w+)",
+ OPTION_EQ=r"(?P=)",
+ OPT_VAL=r"(?P\S+?(?=\s|--|$))",
+ ),
+ state_parse_params_option=OrderedDict(
+ PY_STRING=r"(?P(?:{})|(?:{}))".format( # single and double quoted strings
+ r"'(?:[^'\\]|\.)*'", r'"(?:[^"\\]|\.)*"'
),
- ]
+ PARAMS_OPT_SPEC=r"(?P--params(?=\s|=|--|$))",
+ PARAMS_OPT_EQ=r"(?P=)",
+ GOTO_PARSE_NON_PARAMS_OPTIONS=r"(?P(?=--\w+))", # found another option spec
+ PY_BOOL=r"(?PTrue|False)",
+ DOLLAR_PY_ID=r"(?P\$[^\d\W]\w*)",
+ PY_NUMBER=r"(?P-?[1-9]\d*(?:\.\d+)?(:?[e|E][+-]?\d+)?)",
+ SQUOTE=r"(?P')",
+ DQUOTE=r'(?P")',
+ COLON=r"(?P:)",
+ COMMA=r"(?P,)",
+ LCURL=r"(?P\{)",
+ RCURL=r"(?P})",
+ LSQUARE=r"(?P\[)",
+ RSQUARE=r"(?P])",
+ LPAREN=r"(?P\()",
+ RPAREN=r"(?P\))",
+ ),
+ common=OrderedDict(
+ WS=r"(?P\s+)",
+ EOL=r"(?P$)",
+ UNKNOWN=r"(?P\S+)", # anything not a whitespace or matched by something else
+ ),
)
@@ -143,7 +98,7 @@ def _generate_next_value_(name, start, count, last_values):
return name
-TokenType = AutoStrEnum(
+TokenType = AutoStrEnum( # pytype: disable=wrong-arg-types
"TokenType",
[
(name, enum.auto())
diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py
index 8f343ddcc..d368bbeaa 100644
--- a/google/cloud/bigquery/magics/magics.py
+++ b/google/cloud/bigquery/magics/magics.py
@@ -14,6 +14,15 @@
"""IPython Magics
+To use these magics, you must first register them. Run the ``%load_ext`` magic
+in a Jupyter notebook cell.
+
+.. code::
+
+ %load_ext google.cloud.bigquery
+
+This makes the ``%%bigquery`` magic available.
+
.. function:: %%bigquery
IPython cell magic to run a query and display the result as a DataFrame
@@ -606,7 +615,7 @@ def _cell_magic(line, query):
)
raise NameError(msg)
- params = _helpers.to_query_parameters(ast.literal_eval(params_option_value))
+ params = _helpers.to_query_parameters(ast.literal_eval(params_option_value), {})
project = args.project or context.project
@@ -635,7 +644,7 @@ def _cell_magic(line, query):
bqstorage_client_options.api_endpoint = args.bqstorage_api_endpoint
bqstorage_client = _make_bqstorage_client(
- use_bqstorage_api, context.credentials, bqstorage_client_options,
+ client, use_bqstorage_api, bqstorage_client_options,
)
close_transports = functools.partial(_close_transports, client, bqstorage_client)
@@ -662,7 +671,9 @@ def _cell_magic(line, query):
_handle_error(ex, args.destination_var)
return
- result = rows.to_dataframe(bqstorage_client=bqstorage_client)
+ result = rows.to_dataframe(
+ bqstorage_client=bqstorage_client, create_bqstorage_client=False,
+ )
if args.destination_var:
IPython.get_ipython().push({args.destination_var: result})
return
@@ -719,11 +730,15 @@ def _cell_magic(line, query):
if max_results:
result = query_job.result(max_results=max_results).to_dataframe(
- bqstorage_client=bqstorage_client, progress_bar_type=progress_bar
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=progress_bar,
)
else:
result = query_job.to_dataframe(
- bqstorage_client=bqstorage_client, progress_bar_type=progress_bar
+ bqstorage_client=bqstorage_client,
+ create_bqstorage_client=False,
+ progress_bar_type=progress_bar,
)
if args.destination_var:
@@ -753,12 +768,12 @@ def _split_args_line(line):
return params_option_value, rest_of_args
-def _make_bqstorage_client(use_bqstorage_api, credentials, client_options):
+def _make_bqstorage_client(client, use_bqstorage_api, client_options):
if not use_bqstorage_api:
return None
try:
- from google.cloud import bigquery_storage
+ from google.cloud import bigquery_storage # noqa: F401
except ImportError as err:
customized_error = ImportError(
"The default BigQuery Storage API client cannot be used, install "
@@ -776,10 +791,9 @@ def _make_bqstorage_client(use_bqstorage_api, credentials, client_options):
)
raise customized_error from err
- return bigquery_storage.BigQueryReadClient(
- credentials=credentials,
- client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
+ return client._ensure_bqstorage_client(
client_options=client_options,
+ client_info=gapic_client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
)
diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py
index 55846bd1a..2d3f6660f 100644
--- a/google/cloud/bigquery/model.py
+++ b/google/cloud/bigquery/model.py
@@ -279,7 +279,7 @@ def encryption_configuration(self, value):
self._properties["encryptionConfiguration"] = api_repr
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "Model":
"""Factory: construct a model resource given its API representation
Args:
@@ -322,7 +322,7 @@ def _build_resource(self, filter_fields):
def __repr__(self):
return "Model(reference={})".format(repr(self.reference))
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this model.
Returns:
@@ -389,7 +389,9 @@ def from_api_repr(cls, resource):
return ref
@classmethod
- def from_string(cls, model_id, default_project=None):
+ def from_string(
+ cls, model_id: str, default_project: str = None
+ ) -> "ModelReference":
"""Construct a model reference from model ID string.
Args:
@@ -417,7 +419,7 @@ def from_string(cls, model_id, default_project=None):
{"projectId": proj, "datasetId": dset, "modelId": model}
)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this model reference.
Returns:
diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py
index f2ed6337e..1f449f189 100644
--- a/google/cloud/bigquery/query.py
+++ b/google/cloud/bigquery/query.py
@@ -16,6 +16,9 @@
from collections import OrderedDict
import copy
+import datetime
+import decimal
+from typing import Optional, Union
from google.cloud.bigquery.table import _parse_schema_resource
from google.cloud.bigquery._helpers import _rows_from_json
@@ -23,6 +26,11 @@
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM
+_SCALAR_VALUE_TYPE = Optional[
+ Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]
+]
+
+
class UDFResource(object):
"""Describe a single user-defined function (UDF) resource.
@@ -48,12 +56,259 @@ def __ne__(self, other):
return not self == other
+class _AbstractQueryParameterType:
+ """Base class for representing query parameter types.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#queryparametertype
+ """
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter type from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.QueryParameterType: Instance
+ """
+ raise NotImplementedError
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter type.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ raise NotImplementedError
+
+
+class ScalarQueryParameterType(_AbstractQueryParameterType):
+ """Type representation for scalar query parameters.
+
+ Args:
+ type_ (str):
+ One of 'STRING', 'INT64', 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP',
+ 'DATETIME', or 'DATE'.
+ name (Optional[str]):
+ The name of the query parameter. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ description (Optional[str]):
+ The query parameter description. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ """
+
+ def __init__(self, type_, *, name=None, description=None):
+ self._type = type_
+ self.name = name
+ self.description = description
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter type from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.ScalarQueryParameterType: Instance
+ """
+ type_ = resource["type"]
+ return cls(type_)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter type.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ # Name and description are only used if the type is a field inside a struct
+ # type, but it's StructQueryParameterType's responsibilty to use these two
+ # attributes in the API representation when needed. Here we omit them.
+ return {"type": self._type}
+
+ def with_name(self, new_name: Union[str, None]):
+ """Return a copy of the instance with ``name`` set to ``new_name``.
+
+ Args:
+ name (Union[str, None]):
+ The new name of the query parameter type. If ``None``, the existing
+ name is cleared.
+
+ Returns:
+ google.cloud.bigquery.query.ScalarQueryParameterType:
+ A new instance with updated name.
+ """
+ return type(self)(self._type, name=new_name, description=self.description)
+
+ def __repr__(self):
+ name = f", name={self.name!r}" if self.name is not None else ""
+ description = (
+ f", description={self.description!r}"
+ if self.description is not None
+ else ""
+ )
+ return f"{self.__class__.__name__}({self._type!r}{name}{description})"
+
+
+class ArrayQueryParameterType(_AbstractQueryParameterType):
+ """Type representation for array query parameters.
+
+ Args:
+ array_type (Union[ScalarQueryParameterType, StructQueryParameterType]):
+ The type of array elements.
+ name (Optional[str]):
+ The name of the query parameter. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ description (Optional[str]):
+ The query parameter description. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ """
+
+ def __init__(self, array_type, *, name=None, description=None):
+ self._array_type = array_type
+ self.name = name
+ self.description = description
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter type from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.ArrayQueryParameterType: Instance
+ """
+ array_item_type = resource["arrayType"]["type"]
+
+ if array_item_type in {"STRUCT", "RECORD"}:
+ klass = StructQueryParameterType
+ else:
+ klass = ScalarQueryParameterType
+
+ item_type_instance = klass.from_api_repr(resource["arrayType"])
+ return cls(item_type_instance)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter type.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ # Name and description are only used if the type is a field inside a struct
+ # type, but it's StructQueryParameterType's responsibilty to use these two
+ # attributes in the API representation when needed. Here we omit them.
+ return {
+ "type": "ARRAY",
+ "arrayType": self._array_type.to_api_repr(),
+ }
+
+ def __repr__(self):
+ name = f", name={self.name!r}" if self.name is not None else ""
+ description = (
+ f", description={self.description!r}"
+ if self.description is not None
+ else ""
+ )
+ return f"{self.__class__.__name__}({self._array_type!r}{name}{description})"
+
+
+class StructQueryParameterType(_AbstractQueryParameterType):
+ """Type representation for struct query parameters.
+
+ Args:
+ fields (Iterable[Union[ \
+ ArrayQueryParameterType, ScalarQueryParameterType, StructQueryParameterType \
+ ]]):
+ An non-empty iterable describing the struct's field types.
+ name (Optional[str]):
+ The name of the query parameter. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ description (Optional[str]):
+ The query parameter description. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ """
+
+ def __init__(self, *fields, name=None, description=None):
+ if not fields:
+ raise ValueError("Struct type must have at least one field defined.")
+
+ self._fields = fields # fields is a tuple (immutable), no shallow copy needed
+ self.name = name
+ self.description = description
+
+ @property
+ def fields(self):
+ return self._fields # no copy needed, self._fields is an immutable sequence
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter type from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.StructQueryParameterType: Instance
+ """
+ fields = []
+
+ for struct_field in resource["structTypes"]:
+ type_repr = struct_field["type"]
+ if type_repr["type"] in {"STRUCT", "RECORD"}:
+ klass = StructQueryParameterType
+ elif type_repr["type"] == "ARRAY":
+ klass = ArrayQueryParameterType
+ else:
+ klass = ScalarQueryParameterType
+
+ type_instance = klass.from_api_repr(type_repr)
+ type_instance.name = struct_field.get("name")
+ type_instance.description = struct_field.get("description")
+ fields.append(type_instance)
+
+ return cls(*fields)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter type.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ fields = []
+
+ for field in self._fields:
+ item = {"type": field.to_api_repr()}
+ if field.name is not None:
+ item["name"] = field.name
+ if field.description is not None:
+ item["description"] = field.description
+
+ fields.append(item)
+
+ return {
+ "type": "STRUCT",
+ "structTypes": fields,
+ }
+
+ def __repr__(self):
+ name = f", name={self.name!r}" if self.name is not None else ""
+ description = (
+ f", description={self.description!r}"
+ if self.description is not None
+ else ""
+ )
+ items = ", ".join(repr(field) for field in self._fields)
+ return f"{self.__class__.__name__}({items}{name}{description})"
+
+
class _AbstractQueryParameter(object):
"""Base class for named / positional query parameters.
"""
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter":
"""Factory: construct parameter from JSON resource.
Args:
@@ -64,7 +319,7 @@ def from_api_repr(cls, resource):
"""
raise NotImplementedError
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct JSON API representation for the parameter.
Returns:
@@ -77,35 +332,46 @@ class ScalarQueryParameter(_AbstractQueryParameter):
"""Named / positional query parameters for scalar values.
Args:
- name (Optional[str]):
+ name:
Parameter name, used via ``@foo`` syntax. If None, the
parameter can only be addressed via position (``?``).
- type_ (str):
- Name of parameter type. One of 'STRING', 'INT64',
- 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
- 'DATE'.
+ type_:
+ Name of parameter type. See
+ :class:`google.cloud.bigquery.enums.SqlTypeNames` and
+ :class:`google.cloud.bigquery.enums.SqlParameterScalarTypes` for
+ supported types.
- value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]):
+ value:
The scalar parameter value.
"""
- def __init__(self, name, type_, value):
+ def __init__(
+ self,
+ name: Optional[str],
+ type_: Optional[Union[str, ScalarQueryParameterType]],
+ value: _SCALAR_VALUE_TYPE,
+ ):
self.name = name
- self.type_ = type_
+ if isinstance(type_, ScalarQueryParameterType):
+ self.type_ = type_._type
+ else:
+ self.type_ = type_
self.value = value
@classmethod
- def positional(cls, type_, value):
+ def positional(
+ cls, type_: Union[str, ScalarQueryParameterType], value: _SCALAR_VALUE_TYPE
+ ) -> "ScalarQueryParameter":
"""Factory for positional paramater.
Args:
- type_ (str):
+ type_:
Name of parameter type. One of 'STRING', 'INT64',
- 'FLOAT64', 'NUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
+ 'FLOAT64', 'NUMERIC', 'BIGNUMERIC', 'BOOL', 'TIMESTAMP', 'DATETIME', or
'DATE'.
- value (Union[str, int, float, decimal.Decimal, bool, datetime.datetime, datetime.date]):
+ value:
The scalar parameter value.
Returns:
@@ -114,7 +380,7 @@ def positional(cls, type_, value):
return cls(None, type_, value)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter":
"""Factory: construct parameter from JSON resource.
Args:
@@ -136,7 +402,7 @@ def from_api_repr(cls, resource):
return cls(name, type_, converted)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct JSON API representation for the parameter.
Returns:
@@ -184,28 +450,43 @@ class ArrayQueryParameter(_AbstractQueryParameter):
Parameter name, used via ``@foo`` syntax. If None, the
parameter can only be addressed via position (``?``).
- array_type (str):
- Name of type of array elements. One of `'STRING'`, `'INT64'`,
- `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
+ array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]):
+ The type of array elements. If given as a string, it must be one of
+ `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`, `'BOOL'`,
+ `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`.
+ If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty,
+ the exact item type cannot be deduced, thus a ``StructQueryParameterType``
+ instance needs to be passed in.
- values (List[appropriate scalar type]): The parameter array values.
+ values (List[appropriate type]): The parameter array values.
"""
def __init__(self, name, array_type, values):
self.name = name
- self.array_type = array_type
self.values = values
+ if isinstance(array_type, str):
+ if not values and array_type in {"RECORD", "STRUCT"}:
+ raise ValueError(
+ "Missing detailed struct item type info for an empty array, "
+ "please provide a StructQueryParameterType instance."
+ )
+ self.array_type = array_type
+
@classmethod
- def positional(cls, array_type, values):
+ def positional(cls, array_type: str, values: list) -> "ArrayQueryParameter":
"""Factory for positional parameters.
Args:
- array_type (str):
- Name of type of array elements. One of `'STRING'`, `'INT64'`,
- `'FLOAT64'`, `'NUMERIC'`, `'BOOL'`, `'TIMESTAMP'`, or `'DATE'`.
+ array_type (Union[str, ScalarQueryParameterType, StructQueryParameterType]):
+ The type of array elements. If given as a string, it must be one of
+ `'STRING'`, `'INT64'`, `'FLOAT64'`, `'NUMERIC'`, `'BIGNUMERIC'`,
+ `'BOOL'`, `'TIMESTAMP'`, `'DATE'`, or `'STRUCT'`/`'RECORD'`.
+ If the type is ``'STRUCT'``/``'RECORD'`` and ``values`` is empty,
+ the exact item type cannot be deduced, thus a ``StructQueryParameterType``
+ instance needs to be passed in.
- values (List[appropriate scalar type]): The parameter array values.
+ values (List[appropriate type]): The parameter array values.
Returns:
google.cloud.bigquery.query.ArrayQueryParameter: Instance without name
@@ -242,7 +523,7 @@ def _from_api_repr_scalar(cls, resource):
return cls(name, array_type, converted)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "ArrayQueryParameter":
"""Factory: construct parameter from JSON resource.
Args:
@@ -256,29 +537,47 @@ def from_api_repr(cls, resource):
return cls._from_api_repr_struct(resource)
return cls._from_api_repr_scalar(resource)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct JSON API representation for the parameter.
Returns:
Dict: JSON mapping
"""
values = self.values
- if self.array_type == "RECORD" or self.array_type == "STRUCT":
+
+ if self.array_type in {"RECORD", "STRUCT"} or isinstance(
+ self.array_type, StructQueryParameterType
+ ):
reprs = [value.to_api_repr() for value in values]
- a_type = reprs[0]["parameterType"]
a_values = [repr_["parameterValue"] for repr_ in reprs]
+
+ if reprs:
+ a_type = reprs[0]["parameterType"]
+ else:
+ # This assertion always evaluates to True because the
+ # constructor disallows STRUCT/RECORD type defined as a
+ # string with empty values.
+ assert isinstance(self.array_type, StructQueryParameterType)
+ a_type = self.array_type.to_api_repr()
else:
- a_type = {"type": self.array_type}
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.array_type)
+ # Scalar array item type.
+ if isinstance(self.array_type, str):
+ a_type = {"type": self.array_type}
+ else:
+ a_type = self.array_type.to_api_repr()
+
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"])
if converter is not None:
values = [converter(value) for value in values]
a_values = [{"value": value} for value in values]
+
resource = {
"parameterType": {"type": "ARRAY", "arrayType": a_type},
"parameterValue": {"arrayValues": a_values},
}
if self.name is not None:
resource["name"] = self.name
+
return resource
def _key(self):
@@ -289,7 +588,14 @@ def _key(self):
Returns:
Tuple: The contents of this :class:`~google.cloud.bigquery.query.ArrayQueryParameter`.
"""
- return (self.name, self.array_type.upper(), self.values)
+ if isinstance(self.array_type, str):
+ item_type = self.array_type
+ elif isinstance(self.array_type, ScalarQueryParameterType):
+ item_type = self.array_type._type
+ else:
+ item_type = "STRUCT"
+
+ return (self.name, item_type.upper(), self.values)
def __eq__(self, other):
if not isinstance(other, ArrayQueryParameter):
@@ -350,7 +656,7 @@ def positional(cls, *sub_params):
return cls(None, *sub_params)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "StructQueryParameter":
"""Factory: construct parameter from JSON resource.
Args:
@@ -390,7 +696,7 @@ def from_api_repr(cls, resource):
instance.struct_values[key] = converted
return instance
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct JSON API representation for the parameter.
Returns:
@@ -542,7 +848,7 @@ def total_rows(self):
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_rows
Returns:
- Optional[int}: Count generated on the server (None until set by the server).
+ Optional[int]: Count generated on the server (None until set by the server).
"""
total_rows = self._properties.get("totalRows")
if total_rows is not None:
@@ -585,7 +891,7 @@ def rows(self):
Returns:
Optional[List[google.cloud.bigquery.table.Row]]:
- Fields describing the schema (None until set by the server).
+ Rows containing the results of the query.
"""
return _rows_from_json(self._properties.get("rows", ()), self.schema)
diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py
index 4bc4b757f..830582322 100644
--- a/google/cloud/bigquery/retry.py
+++ b/google/cloud/bigquery/retry.py
@@ -14,6 +14,8 @@
from google.api_core import exceptions
from google.api_core import retry
+from google.auth import exceptions as auth_exceptions
+import requests.exceptions
_RETRYABLE_REASONS = frozenset(
@@ -21,11 +23,18 @@
)
_UNSTRUCTURED_RETRYABLE_TYPES = (
+ ConnectionError,
exceptions.TooManyRequests,
exceptions.InternalServerError,
exceptions.BadGateway,
+ requests.exceptions.ChunkedEncodingError,
+ requests.exceptions.ConnectionError,
+ requests.exceptions.Timeout,
+ auth_exceptions.TransportError,
)
+_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds
+
def _should_retry(exc):
"""Predicate for determining when to retry.
@@ -33,10 +42,7 @@ def _should_retry(exc):
We retry if and only if the 'reason' is 'backendError'
or 'rateLimitExceeded'.
"""
- if not hasattr(exc, "errors"):
- return False
-
- if len(exc.errors) == 0:
+ if not hasattr(exc, "errors") or len(exc.errors) == 0:
# Check for unstructured error returns, e.g. from GFE
return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES)
@@ -44,7 +50,7 @@ def _should_retry(exc):
return reason in _RETRYABLE_REASONS
-DEFAULT_RETRY = retry.Retry(predicate=_should_retry)
+DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0)
"""The default retry object.
Any method with a ``retry`` parameter will be retried automatically,
@@ -53,3 +59,28 @@ def _should_retry(exc):
on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds,
pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
"""
+
+DEFAULT_TIMEOUT = 5.0 * 60.0
+"""The default API timeout.
+
+This is the time to wait per request. To adjust the total wait time, set a
+deadline on the retry object.
+"""
+
+job_retry_reasons = "rateLimitExceeded", "backendError"
+
+
+def _job_should_retry(exc):
+ if not hasattr(exc, "errors") or len(exc.errors) == 0:
+ return False
+
+ reason = exc.errors[0]["reason"]
+ return reason in job_retry_reasons
+
+
+DEFAULT_JOB_RETRY = retry.Retry(
+ predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE
+)
+"""
+The default job retry object.
+"""
diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py
new file mode 100644
index 000000000..7353073c8
--- /dev/null
+++ b/google/cloud/bigquery/routine/__init__.py
@@ -0,0 +1,31 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""User-Defined Routines."""
+
+
+from google.cloud.bigquery.enums import DeterminismLevel
+from google.cloud.bigquery.routine.routine import Routine
+from google.cloud.bigquery.routine.routine import RoutineArgument
+from google.cloud.bigquery.routine.routine import RoutineReference
+from google.cloud.bigquery.routine.routine import RoutineType
+
+
+__all__ = (
+ "DeterminismLevel",
+ "Routine",
+ "RoutineArgument",
+ "RoutineReference",
+ "RoutineType",
+)
diff --git a/google/cloud/bigquery/routine.py b/google/cloud/bigquery/routine/routine.py
similarity index 86%
rename from google/cloud/bigquery/routine.py
rename to google/cloud/bigquery/routine/routine.py
index f26f20886..a776212c3 100644
--- a/google/cloud/bigquery/routine.py
+++ b/google/cloud/bigquery/routine/routine.py
@@ -21,6 +21,21 @@
import google.cloud._helpers
from google.cloud.bigquery import _helpers
import google.cloud.bigquery_v2.types
+from google.cloud.bigquery_v2.types import StandardSqlTableType
+
+
+class RoutineType:
+ """The fine-grained type of the routine.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/routines#routinetype
+
+ .. versionadded:: 2.22.0
+ """
+
+ ROUTINE_TYPE_UNSPECIFIED = "ROUTINE_TYPE_UNSPECIFIED"
+ SCALAR_FUNCTION = "SCALAR_FUNCTION"
+ PROCEDURE = "PROCEDURE"
+ TABLE_VALUED_FUNCTION = "TABLE_VALUED_FUNCTION"
class Routine(object):
@@ -48,8 +63,10 @@ class Routine(object):
"modified": "lastModifiedTime",
"reference": "routineReference",
"return_type": "returnType",
+ "return_table_type": "returnTableType",
"type_": "routineType",
"description": "description",
+ "determinism_level": "determinismLevel",
}
def __init__(self, routine_ref, **kwargs):
@@ -203,6 +220,35 @@ def return_type(self, value):
resource = None
self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource
+ @property
+ def return_table_type(self) -> StandardSqlTableType:
+ """The return type of a Table Valued Function (TVF) routine.
+
+ .. versionadded:: 2.22.0
+ """
+ resource = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["return_table_type"]
+ )
+ if not resource:
+ return resource
+
+ output = google.cloud.bigquery_v2.types.StandardSqlTableType()
+ raw_protobuf = json_format.ParseDict(
+ resource, output._pb, ignore_unknown_fields=True
+ )
+ return type(output).wrap(raw_protobuf)
+
+ @return_table_type.setter
+ def return_table_type(self, value):
+ if not value:
+ resource = None
+ else:
+ resource = {
+ "columns": [json_format.MessageToDict(col._pb) for col in value.columns]
+ }
+
+ self._properties[self._PROPERTY_TO_API_FIELD["return_table_type"]] = resource
+
@property
def imported_libraries(self):
"""List[str]: The path of the imported JavaScript libraries.
@@ -253,8 +299,19 @@ def description(self):
def description(self, value):
self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value
+ @property
+ def determinism_level(self):
+ """Optional[str]: (experimental) The determinism level of the JavaScript UDF
+ if defined.
+ """
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["determinism_level"])
+
+ @determinism_level.setter
+ def determinism_level(self, value):
+ self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value
+
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "Routine":
"""Factory: construct a routine given its API representation.
Args:
@@ -269,7 +326,7 @@ def from_api_repr(cls, resource):
ref._properties = resource
return ref
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this routine.
Returns:
@@ -375,7 +432,7 @@ def data_type(self, value):
self._properties[self._PROPERTY_TO_API_FIELD["data_type"]] = resource
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "RoutineArgument":
"""Factory: construct a routine argument given its API representation.
Args:
@@ -389,7 +446,7 @@ def from_api_repr(cls, resource):
ref._properties = resource
return ref
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this routine argument.
Returns:
@@ -426,17 +483,17 @@ def __init__(self):
@property
def project(self):
"""str: ID of the project containing the routine."""
- return self._properties["projectId"]
+ return self._properties["projectId"] # pytype: disable=key-error
@property
def dataset_id(self):
"""str: ID of dataset containing the routine."""
- return self._properties["datasetId"]
+ return self._properties["datasetId"] # pytype: disable=key-error
@property
def routine_id(self):
"""str: The routine ID."""
- return self._properties["routineId"]
+ return self._properties["routineId"] # pytype: disable=key-error
@property
def path(self):
@@ -448,7 +505,7 @@ def path(self):
)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "RoutineReference":
"""Factory: construct a routine reference given its API representation.
Args:
@@ -464,7 +521,9 @@ def from_api_repr(cls, resource):
return ref
@classmethod
- def from_string(cls, routine_id, default_project=None):
+ def from_string(
+ cls, routine_id: str, default_project: str = None
+ ) -> "RoutineReference":
"""Factory: construct a routine reference from routine ID string.
Args:
@@ -492,7 +551,7 @@ def from_string(cls, routine_id, default_project=None):
{"projectId": proj, "datasetId": dset, "routineId": routine}
)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this routine reference.
Returns:
diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py
index c76aded02..157db7ce6 100644
--- a/google/cloud/bigquery/schema.py
+++ b/google/cloud/bigquery/schema.py
@@ -15,10 +15,12 @@
"""Schemas for BigQuery tables / queries."""
import collections
+from typing import Optional
from google.cloud.bigquery_v2 import types
+_DEFAULT_VALUE = object()
_STRUCT_TYPES = ("RECORD", "STRUCT")
# SQL types reference:
@@ -32,6 +34,7 @@
"FLOAT": types.StandardSqlDataType.TypeKind.FLOAT64,
"FLOAT64": types.StandardSqlDataType.TypeKind.FLOAT64,
"NUMERIC": types.StandardSqlDataType.TypeKind.NUMERIC,
+ "BIGNUMERIC": types.StandardSqlDataType.TypeKind.BIGNUMERIC,
"BOOLEAN": types.StandardSqlDataType.TypeKind.BOOL,
"BOOL": types.StandardSqlDataType.TypeKind.BOOL,
"GEOGRAPHY": types.StandardSqlDataType.TypeKind.GEOGRAPHY,
@@ -65,6 +68,15 @@ class SchemaField(object):
policy_tags (Optional[PolicyTagList]): The policy tag list for the field.
+ precision (Optional[int]):
+ Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
+
+ scale (Optional[int]):
+ Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
+
+ max_length (Optional[int]):
+ Maximim length of fields with STRING or BYTES type.
+
"""
def __init__(
@@ -72,19 +84,58 @@ def __init__(
name,
field_type,
mode="NULLABLE",
- description=None,
+ description=_DEFAULT_VALUE,
fields=(),
policy_tags=None,
+ precision=_DEFAULT_VALUE,
+ scale=_DEFAULT_VALUE,
+ max_length=_DEFAULT_VALUE,
):
- self._name = name
- self._field_type = field_type
- self._mode = mode
- self._description = description
+ self._properties = {
+ "name": name,
+ "type": field_type,
+ }
+ if mode is not None:
+ self._properties["mode"] = mode.upper()
+ if description is not _DEFAULT_VALUE:
+ self._properties["description"] = description
+ if precision is not _DEFAULT_VALUE:
+ self._properties["precision"] = precision
+ if scale is not _DEFAULT_VALUE:
+ self._properties["scale"] = scale
+ if max_length is not _DEFAULT_VALUE:
+ self._properties["maxLength"] = max_length
self._fields = tuple(fields)
- self._policy_tags = policy_tags
+
+ self._policy_tags = self._determine_policy_tags(field_type, policy_tags)
+
+ @staticmethod
+ def _determine_policy_tags(
+ field_type: str, given_policy_tags: Optional["PolicyTagList"]
+ ) -> Optional["PolicyTagList"]:
+ """Return the given policy tags, or their suitable representation if `None`.
+
+ Args:
+ field_type: The type of the schema field.
+ given_policy_tags: The policy tags to maybe ajdust.
+ """
+ if given_policy_tags is not None:
+ return given_policy_tags
+
+ if field_type is not None and field_type.upper() in _STRUCT_TYPES:
+ return None
+
+ return PolicyTagList()
+
+ @staticmethod
+ def __get_int(api_repr, name):
+ v = api_repr.get(name, _DEFAULT_VALUE)
+ if v is not _DEFAULT_VALUE:
+ v = int(v)
+ return v
@classmethod
- def from_api_repr(cls, api_repr):
+ def from_api_repr(cls, api_repr: dict) -> "SchemaField":
"""Return a ``SchemaField`` object deserialized from a dictionary.
Args:
@@ -95,24 +146,33 @@ def from_api_repr(cls, api_repr):
Returns:
google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object.
"""
+ field_type = api_repr["type"].upper()
+
# Handle optional properties with default values
mode = api_repr.get("mode", "NULLABLE")
- description = api_repr.get("description")
+ description = api_repr.get("description", _DEFAULT_VALUE)
fields = api_repr.get("fields", ())
+ policy_tags = cls._determine_policy_tags(
+ field_type, PolicyTagList.from_api_repr(api_repr.get("policyTags"))
+ )
+
return cls(
- field_type=api_repr["type"].upper(),
+ field_type=field_type,
fields=[cls.from_api_repr(f) for f in fields],
mode=mode.upper(),
description=description,
name=api_repr["name"],
- policy_tags=PolicyTagList.from_api_repr(api_repr.get("policyTags")),
+ policy_tags=policy_tags,
+ precision=cls.__get_int(api_repr, "precision"),
+ scale=cls.__get_int(api_repr, "scale"),
+ max_length=cls.__get_int(api_repr, "maxLength"),
)
@property
def name(self):
"""str: The name of the field."""
- return self._name
+ return self._properties["name"]
@property
def field_type(self):
@@ -121,7 +181,7 @@ def field_type(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
"""
- return self._field_type
+ return self._properties["type"]
@property
def mode(self):
@@ -130,17 +190,32 @@ def mode(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
"""
- return self._mode
+ return self._properties.get("mode")
@property
def is_nullable(self):
"""bool: whether 'mode' is 'nullable'."""
- return self._mode == "NULLABLE"
+ return self.mode == "NULLABLE"
@property
def description(self):
"""Optional[str]: description for the field."""
- return self._description
+ return self._properties.get("description")
+
+ @property
+ def precision(self):
+ """Optional[int]: Precision (number of digits) for the NUMERIC field."""
+ return self._properties.get("precision")
+
+ @property
+ def scale(self):
+ """Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
+ return self._properties.get("scale")
+
+ @property
+ def max_length(self):
+ """Optional[int]: Maximum length for the STRING or BYTES field."""
+ return self._properties.get("maxLength")
@property
def fields(self):
@@ -157,27 +232,21 @@ def policy_tags(self):
"""
return self._policy_tags
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Return a dictionary representing this schema field.
Returns:
Dict: A dictionary representing the SchemaField in a serialized form.
"""
- # Put together the basic representation. See http://bit.ly/2hOAT5u.
- answer = {
- "mode": self.mode.upper(),
- "name": self.name,
- "type": self.field_type.upper(),
- "description": self.description,
- }
+ answer = self._properties.copy()
# If this is a RECORD type, then sub-fields are also included,
# add this to the serialized representation.
if self.field_type.upper() in _STRUCT_TYPES:
answer["fields"] = [f.to_api_repr() for f in self.fields]
-
- # If this contains a policy tag definition, include that as well:
- if self.policy_tags is not None:
+ else:
+ # Explicitly include policy tag definition (we must not do it for RECORD
+ # fields, because those are not leaf fields).
answer["policyTags"] = self.policy_tags.to_api_repr()
# Done; return the serialized dictionary.
@@ -191,16 +260,32 @@ def _key(self):
Returns:
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
"""
+ field_type = self.field_type.upper()
+ if field_type == "STRING" or field_type == "BYTES":
+ if self.max_length is not None:
+ field_type = f"{field_type}({self.max_length})"
+ elif field_type.endswith("NUMERIC"):
+ if self.precision is not None:
+ if self.scale is not None:
+ field_type = f"{field_type}({self.precision}, {self.scale})"
+ else:
+ field_type = f"{field_type}({self.precision})"
+
+ policy_tags = (
+ () if self._policy_tags is None else tuple(sorted(self._policy_tags.names))
+ )
+
return (
- self._name,
- self._field_type.upper(),
- self._mode.upper(),
- self._description,
+ self.name,
+ field_type,
+ # Mode is always str, if not given it defaults to a str value
+ self.mode.upper(), # pytype: disable=attribute-error
+ self.description,
self._fields,
- self._policy_tags,
+ policy_tags,
)
- def to_standard_sql(self):
+ def to_standard_sql(self) -> types.StandardSqlField:
"""Return the field as the standard SQL field representation object.
Returns:
@@ -268,21 +353,7 @@ def _parse_schema_resource(info):
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
A list of parsed fields, or ``None`` if no "fields" key found.
"""
- if "fields" not in info:
- return ()
-
- schema = []
- for r_field in info["fields"]:
- name = r_field["name"]
- field_type = r_field["type"]
- mode = r_field.get("mode", "NULLABLE")
- description = r_field.get("description")
- sub_fields = _parse_schema_resource(r_field)
- policy_tags = PolicyTagList.from_api_repr(r_field.get("policyTags"))
- schema.append(
- SchemaField(name, field_type, mode, description, sub_fields, policy_tags)
- )
- return schema
+ return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
def _build_schema_resource(fields):
@@ -375,7 +446,7 @@ def __repr__(self):
return "PolicyTagList{}".format(self._key())
@classmethod
- def from_api_repr(cls, api_repr):
+ def from_api_repr(cls, api_repr: dict) -> "PolicyTagList":
"""Return a :class:`PolicyTagList` object deserialized from a dict.
This method creates a new ``PolicyTagList`` instance that points to
@@ -398,7 +469,7 @@ def from_api_repr(cls, api_repr):
names = api_repr.get("names", ())
return cls(names=names)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Return a dictionary representing this object.
This method returns the properties dict of the ``PolicyTagList``
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index a2366b806..609c0b57e 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -19,9 +19,9 @@
import copy
import datetime
import functools
-import logging
import operator
-import pytz
+import typing
+from typing import Any, Dict, Iterable, Iterator, Optional, Tuple
import warnings
try:
@@ -29,6 +29,20 @@
except ImportError: # pragma: NO COVER
pandas = None
+try:
+ import geopandas
+except ImportError:
+ geopandas = None
+else:
+ _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"
+
+try:
+ import shapely.geos
+except ImportError:
+ shapely = None
+else:
+ _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read
+
try:
import pyarrow
except ImportError: # pragma: NO COVER
@@ -40,6 +54,7 @@
import google.cloud._helpers
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import _pandas_helpers
+from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
@@ -47,17 +62,27 @@
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+if typing.TYPE_CHECKING: # pragma: NO COVER
+ # Unconditionally import optional dependencies again to tell pytype that
+ # they are not None, avoiding false "no attribute" errors.
+ import pandas
+ import geopandas
+ import pyarrow
+ from google.cloud import bigquery_storage
-_LOGGER = logging.getLogger(__name__)
-_NO_BQSTORAGE_ERROR = (
- "The google-cloud-bigquery-storage library is not installed, "
- "please install google-cloud-bigquery-storage to use bqstorage features."
-)
_NO_PANDAS_ERROR = (
"The pandas library is not installed, please install "
"pandas to use the to_dataframe() function."
)
+_NO_GEOPANDAS_ERROR = (
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+)
+_NO_SHAPELY_ERROR = (
+ "The shapely library is not installed, please install "
+ "shapely to use the geography_as_object option."
+)
_NO_PYARROW_ERROR = (
"The pyarrow library is not installed, please install "
"pyarrow to use the to_arrow() function."
@@ -143,7 +168,9 @@ def path(self):
)
@classmethod
- def from_string(cls, table_id, default_project=None):
+ def from_string(
+ cls, table_id: str, default_project: str = None
+ ) -> "TableReference":
"""Construct a table reference from table ID string.
Args:
@@ -182,7 +209,7 @@ def from_string(cls, table_id, default_project=None):
)
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "TableReference":
"""Factory: construct a table reference given its API representation
Args:
@@ -200,7 +227,7 @@ def from_api_repr(cls, resource):
table_id = resource["tableId"]
return cls(DatasetReference(project, dataset_id), table_id)
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Construct the API resource representation of this table reference.
Returns:
@@ -212,7 +239,7 @@ def to_api_repr(self):
"tableId": self._table_id,
}
- def to_bqstorage(self):
+ def to_bqstorage(self) -> str:
"""Construct a BigQuery Storage API representation of this table.
Install the ``google-cloud-bigquery-storage`` package to use this
@@ -250,9 +277,16 @@ def _key(self):
return (self._project, self._dataset_id, self._table_id)
def __eq__(self, other):
- if not isinstance(other, TableReference):
+ if isinstance(other, (Table, TableListItem)):
+ return (
+ self.project == other.project
+ and self.dataset_id == other.dataset_id
+ and self.table_id == other.table_id
+ )
+ elif isinstance(other, TableReference):
+ return self._key() == other._key()
+ else:
return NotImplemented
- return self._key() == other._key()
def __ne__(self, other):
return not self == other
@@ -291,16 +325,37 @@ class Table(object):
"""
_PROPERTY_TO_API_FIELD = {
+ "clustering_fields": "clustering",
+ "created": "creationTime",
+ "dataset_id": ["tableReference", "datasetId"],
+ "description": "description",
"encryption_configuration": "encryptionConfiguration",
+ "etag": "etag",
"expires": "expirationTime",
"external_data_configuration": "externalDataConfiguration",
"friendly_name": "friendlyName",
+ "full_table_id": "id",
+ "labels": "labels",
+ "location": "location",
+ "modified": "lastModifiedTime",
"mview_enable_refresh": "materializedView",
+ "mview_last_refresh_time": ["materializedView", "lastRefreshTime"],
"mview_query": "materializedView",
"mview_refresh_interval": "materializedView",
+ "num_bytes": "numBytes",
+ "num_rows": "numRows",
"partition_expiration": "timePartitioning",
"partitioning_type": "timePartitioning",
+ "project": ["tableReference", "projectId"],
+ "range_partitioning": "rangePartitioning",
"time_partitioning": "timePartitioning",
+ "schema": "schema",
+ "snapshot_definition": "snapshotDefinition",
+ "streaming_buffer": "streamingBuffer",
+ "self_link": "selfLink",
+ "table_id": ["tableReference", "tableId"],
+ "time_partitioning": "timePartitioning",
+ "type": "type",
"view_use_legacy_sql": "view",
"view_query": "view",
"require_partition_filter": "requirePartitionFilter",
@@ -316,17 +371,23 @@ def __init__(self, table_ref, schema=None):
@property
def project(self):
"""str: Project bound to the table."""
- return self._properties["tableReference"]["projectId"]
+ return _helpers._get_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["project"]
+ )
@property
def dataset_id(self):
"""str: ID of dataset containing the table."""
- return self._properties["tableReference"]["datasetId"]
+ return _helpers._get_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"]
+ )
@property
def table_id(self):
"""str: ID of the table."""
- return self._properties["tableReference"]["tableId"]
+ return _helpers._get_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["table_id"]
+ )
reference = property(_reference_getter)
@@ -345,11 +406,15 @@ def require_partition_filter(self):
partition filter that can be used for partition elimination to be
specified.
"""
- return self._properties.get("requirePartitionFilter")
+ return self._properties.get(
+ self._PROPERTY_TO_API_FIELD["require_partition_filter"]
+ )
@require_partition_filter.setter
def require_partition_filter(self, value):
- self._properties["requirePartitionFilter"] = value
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["require_partition_filter"]
+ ] = value
@property
def schema(self):
@@ -365,7 +430,7 @@ def schema(self):
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.
"""
- prop = self._properties.get("schema")
+ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
if not prop:
return []
else:
@@ -373,11 +438,13 @@ def schema(self):
@schema.setter
def schema(self, value):
+ api_field = self._PROPERTY_TO_API_FIELD["schema"]
+
if value is None:
- self._properties["schema"] = None
+ self._properties[api_field] = None
else:
value = _to_schema_fields(value)
- self._properties["schema"] = {"fields": _build_schema_resource(value)}
+ self._properties[api_field] = {"fields": _build_schema_resource(value)}
@property
def labels(self):
@@ -390,13 +457,13 @@ def labels(self):
Raises:
ValueError: If ``value`` type is invalid.
"""
- return self._properties.setdefault("labels", {})
+ return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {})
@labels.setter
def labels(self, value):
if not isinstance(value, dict):
raise ValueError("Pass a dict")
- self._properties["labels"] = value
+ self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value
@property
def encryption_configuration(self):
@@ -410,7 +477,9 @@ def encryption_configuration(self):
`_
in the BigQuery documentation.
"""
- prop = self._properties.get("encryptionConfiguration")
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["encryption_configuration"]
+ )
if prop is not None:
prop = EncryptionConfiguration.from_api_repr(prop)
return prop
@@ -420,14 +489,16 @@ def encryption_configuration(self, value):
api_repr = value
if value is not None:
api_repr = value.to_api_repr()
- self._properties["encryptionConfiguration"] = api_repr
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["encryption_configuration"]
+ ] = api_repr
@property
def created(self):
"""Union[datetime.datetime, None]: Datetime at which the table was
created (:data:`None` until set from the server).
"""
- creation_time = self._properties.get("creationTime")
+ creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"])
if creation_time is not None:
# creation_time will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
@@ -439,14 +510,14 @@ def etag(self):
"""Union[str, None]: ETag for the table resource (:data:`None` until
set from the server).
"""
- return self._properties.get("etag")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"])
@property
def modified(self):
"""Union[datetime.datetime, None]: Datetime at which the table was last
modified (:data:`None` until set from the server).
"""
- modified_time = self._properties.get("lastModifiedTime")
+ modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"])
if modified_time is not None:
# modified_time will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
@@ -458,21 +529,25 @@ def num_bytes(self):
"""Union[int, None]: The size of the table in bytes (:data:`None` until
set from the server).
"""
- return _helpers._int_or_none(self._properties.get("numBytes"))
+ return _helpers._int_or_none(
+ self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"])
+ )
@property
def num_rows(self):
"""Union[int, None]: The number of rows in the table (:data:`None`
until set from the server).
"""
- return _helpers._int_or_none(self._properties.get("numRows"))
+ return _helpers._int_or_none(
+ self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"])
+ )
@property
def self_link(self):
"""Union[str, None]: URL for the table resource (:data:`None` until set
from the server).
"""
- return self._properties.get("selfLink")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"])
@property
def full_table_id(self):
@@ -481,7 +556,7 @@ def full_table_id(self):
In the format ``project-id:dataset_id.table_id``.
"""
- return self._properties.get("id")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"])
@property
def table_type(self):
@@ -491,7 +566,7 @@ def table_type(self):
Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or
``'EXTERNAL'``.
"""
- return self._properties.get("type")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["type"])
@property
def range_partitioning(self):
@@ -512,7 +587,9 @@ def range_partitioning(self):
:class:`~google.cloud.bigquery.table.RangePartitioning` or
:data:`None`.
"""
- resource = self._properties.get("rangePartitioning")
+ resource = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["range_partitioning"]
+ )
if resource is not None:
return RangePartitioning(_properties=resource)
@@ -525,7 +602,7 @@ def range_partitioning(self, value):
raise ValueError(
"Expected value to be RangePartitioning or None, got {}.".format(value)
)
- self._properties["rangePartitioning"] = resource
+ self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource
@property
def time_partitioning(self):
@@ -542,7 +619,7 @@ def time_partitioning(self):
:class:`~google.cloud.bigquery.table.TimePartitioning` or
:data:`None`.
"""
- prop = self._properties.get("timePartitioning")
+ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"])
if prop is not None:
return TimePartitioning.from_api_repr(prop)
@@ -555,7 +632,7 @@ def time_partitioning(self, value):
raise ValueError(
"value must be google.cloud.bigquery.table.TimePartitioning " "or None"
)
- self._properties["timePartitioning"] = api_repr
+ self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr
@property
def partitioning_type(self):
@@ -580,9 +657,10 @@ def partitioning_type(self, value):
PendingDeprecationWarning,
stacklevel=2,
)
+ api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"]
if self.time_partitioning is None:
- self._properties["timePartitioning"] = {}
- self._properties["timePartitioning"]["type"] = value
+ self._properties[api_field] = {}
+ self._properties[api_field]["type"] = value
@property
def partition_expiration(self):
@@ -609,9 +687,11 @@ def partition_expiration(self, value):
PendingDeprecationWarning,
stacklevel=2,
)
+ api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"]
+
if self.time_partitioning is None:
- self._properties["timePartitioning"] = {"type": TimePartitioningType.DAY}
- self._properties["timePartitioning"]["expirationMs"] = str(value)
+ self._properties[api_field] = {"type": TimePartitioningType.DAY}
+ self._properties[api_field]["expirationMs"] = str(value)
@property
def clustering_fields(self):
@@ -626,7 +706,7 @@ def clustering_fields(self):
BigQuery supports clustering for both partitioned and
non-partitioned tables.
"""
- prop = self._properties.get("clustering")
+ prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"])
if prop is not None:
return list(prop.get("fields", ()))
@@ -636,12 +716,15 @@ def clustering_fields(self, value):
(Defaults to :data:`None`).
"""
+ api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"]
+
if value is not None:
- prop = self._properties.setdefault("clustering", {})
+ prop = self._properties.setdefault(api_field, {})
prop["fields"] = value
else:
- if "clustering" in self._properties:
- del self._properties["clustering"]
+ # In order to allow unsetting clustering fields completely, we explicitly
+ # set this property to None (as oposed to merely removing the key).
+ self._properties[api_field] = None
@property
def description(self):
@@ -651,13 +734,13 @@ def description(self):
Raises:
ValueError: For invalid value types.
"""
- return self._properties.get("description")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["description"])
@description.setter
def description(self, value):
if not isinstance(value, str) and value is not None:
raise ValueError("Pass a string, or None")
- self._properties["description"] = value
+ self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value
@property
def expires(self):
@@ -667,7 +750,7 @@ def expires(self):
Raises:
ValueError: For invalid value types.
"""
- expiration_time = self._properties.get("expirationTime")
+ expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"])
if expiration_time is not None:
# expiration_time will be in milliseconds.
return google.cloud._helpers._datetime_from_microseconds(
@@ -679,7 +762,9 @@ def expires(self, value):
if not isinstance(value, datetime.datetime) and value is not None:
raise ValueError("Pass a datetime, or None")
value_ms = google.cloud._helpers._millis_from_datetime(value)
- self._properties["expirationTime"] = _helpers._str_or_none(value_ms)
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["expires"]
+ ] = _helpers._str_or_none(value_ms)
@property
def friendly_name(self):
@@ -688,13 +773,13 @@ def friendly_name(self):
Raises:
ValueError: For invalid value types.
"""
- return self._properties.get("friendlyName")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"])
@friendly_name.setter
def friendly_name(self, value):
if not isinstance(value, str) and value is not None:
raise ValueError("Pass a string, or None")
- self._properties["friendlyName"] = value
+ self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value
@property
def location(self):
@@ -702,7 +787,7 @@ def location(self):
Defaults to :data:`None`.
"""
- return self._properties.get("location")
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["location"])
@property
def view_query(self):
@@ -715,14 +800,17 @@ def view_query(self):
Raises:
ValueError: For invalid value types.
"""
- return _helpers._get_sub_prop(self._properties, ["view", "query"])
+ api_field = self._PROPERTY_TO_API_FIELD["view_query"]
+ return _helpers._get_sub_prop(self._properties, [api_field, "query"])
@view_query.setter
def view_query(self, value):
if not isinstance(value, str):
raise ValueError("Pass a string")
- _helpers._set_sub_prop(self._properties, ["view", "query"], value)
- view = self._properties["view"]
+
+ api_field = self._PROPERTY_TO_API_FIELD["view_query"]
+ _helpers._set_sub_prop(self._properties, [api_field, "query"], value)
+ view = self._properties[api_field]
# The service defaults useLegacySql to True, but this
# client uses Standard SQL by default.
if view.get("useLegacySql") is None:
@@ -731,7 +819,7 @@ def view_query(self, value):
@view_query.deleter
def view_query(self):
"""Delete SQL query defining the table as a view."""
- self._properties.pop("view", None)
+ self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None)
view_use_legacy_sql = property(_view_use_legacy_sql_getter)
@@ -739,27 +827,29 @@ def view_query(self):
def view_use_legacy_sql(self, value):
if not isinstance(value, bool):
raise ValueError("Pass a boolean")
- if self._properties.get("view") is None:
- self._properties["view"] = {}
- self._properties["view"]["useLegacySql"] = value
+
+ api_field = self._PROPERTY_TO_API_FIELD["view_query"]
+ if self._properties.get(api_field) is None:
+ self._properties[api_field] = {}
+ self._properties[api_field]["useLegacySql"] = value
@property
def mview_query(self):
"""Optional[str]: SQL query defining the table as a materialized
view (defaults to :data:`None`).
"""
- return _helpers._get_sub_prop(self._properties, ["materializedView", "query"])
+ api_field = self._PROPERTY_TO_API_FIELD["mview_query"]
+ return _helpers._get_sub_prop(self._properties, [api_field, "query"])
@mview_query.setter
def mview_query(self, value):
- _helpers._set_sub_prop(
- self._properties, ["materializedView", "query"], str(value)
- )
+ api_field = self._PROPERTY_TO_API_FIELD["mview_query"]
+ _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value))
@mview_query.deleter
def mview_query(self):
"""Delete SQL query defining the table as a materialized view."""
- self._properties.pop("materializedView", None)
+ self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None)
@property
def mview_last_refresh_time(self):
@@ -767,7 +857,7 @@ def mview_last_refresh_time(self):
refreshed (:data:`None` until set from the server).
"""
refresh_time = _helpers._get_sub_prop(
- self._properties, ["materializedView", "lastRefreshTime"]
+ self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"]
)
if refresh_time is not None:
# refresh_time will be in milliseconds.
@@ -780,14 +870,14 @@ def mview_enable_refresh(self):
"""Optional[bool]: Enable automatic refresh of the materialized view
when the base table is updated. The default value is :data:`True`.
"""
- return _helpers._get_sub_prop(
- self._properties, ["materializedView", "enableRefresh"]
- )
+ api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]
+ return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"])
@mview_enable_refresh.setter
def mview_enable_refresh(self, value):
+ api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]
return _helpers._set_sub_prop(
- self._properties, ["materializedView", "enableRefresh"], value
+ self._properties, [api_field, "enableRefresh"], value
)
@property
@@ -796,8 +886,9 @@ def mview_refresh_interval(self):
materialized view will be refreshed. The default value is 1800000
milliseconds (30 minutes).
"""
+ api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]
refresh_interval = _helpers._get_sub_prop(
- self._properties, ["materializedView", "refreshIntervalMs"]
+ self._properties, [api_field, "refreshIntervalMs"]
)
if refresh_interval is not None:
return datetime.timedelta(milliseconds=int(refresh_interval))
@@ -809,10 +900,9 @@ def mview_refresh_interval(self, value):
else:
refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1))
+ api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]
_helpers._set_sub_prop(
- self._properties,
- ["materializedView", "refreshIntervalMs"],
- refresh_interval_ms,
+ self._properties, [api_field, "refreshIntervalMs"], refresh_interval_ms,
)
@property
@@ -820,7 +910,7 @@ def streaming_buffer(self):
"""google.cloud.bigquery.StreamingBuffer: Information about a table's
streaming buffer.
"""
- sb = self._properties.get("streamingBuffer")
+ sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"])
if sb is not None:
return StreamingBuffer(sb)
@@ -832,7 +922,9 @@ def external_data_configuration(self):
Raises:
ValueError: For invalid value types.
"""
- prop = self._properties.get("externalDataConfiguration")
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["external_data_configuration"]
+ )
if prop is not None:
prop = ExternalConfig.from_api_repr(prop)
return prop
@@ -844,10 +936,25 @@ def external_data_configuration(self, value):
api_repr = value
if value is not None:
api_repr = value.to_api_repr()
- self._properties["externalDataConfiguration"] = api_repr
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["external_data_configuration"]
+ ] = api_repr
+
+ @property
+ def snapshot_definition(self) -> Optional["SnapshotDefinition"]:
+ """Information about the snapshot. This value is set via snapshot creation.
+
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition
+ """
+ snapshot_info = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["snapshot_definition"]
+ )
+ if snapshot_info is not None:
+ snapshot_info = SnapshotDefinition(snapshot_info)
+ return snapshot_info
@classmethod
- def from_string(cls, full_table_id):
+ def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
Args:
@@ -871,7 +978,7 @@ def from_string(cls, full_table_id):
return cls(TableReference.from_string(full_table_id))
@classmethod
- def from_api_repr(cls, resource):
+ def from_api_repr(cls, resource: dict) -> "Table":
"""Factory: construct a table given its API representation
Args:
@@ -897,9 +1004,15 @@ def from_api_repr(cls, resource):
"Resource lacks required identity information:"
'["tableReference"]["tableId"]'
)
- project_id = resource["tableReference"]["projectId"]
- table_id = resource["tableReference"]["tableId"]
- dataset_id = resource["tableReference"]["datasetId"]
+ project_id = _helpers._get_sub_prop(
+ resource, cls._PROPERTY_TO_API_FIELD["project"]
+ )
+ table_id = _helpers._get_sub_prop(
+ resource, cls._PROPERTY_TO_API_FIELD["table_id"]
+ )
+ dataset_id = _helpers._get_sub_prop(
+ resource, cls._PROPERTY_TO_API_FIELD["dataset_id"]
+ )
dataset_ref = dataset.DatasetReference(project_id, dataset_id)
table = cls(dataset_ref.table(table_id))
@@ -907,7 +1020,7 @@ def from_api_repr(cls, resource):
return table
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Constructs the API resource of this table
Returns:
@@ -915,7 +1028,7 @@ def to_api_repr(self):
"""
return copy.deepcopy(self._properties)
- def to_bqstorage(self):
+ def to_bqstorage(self) -> str:
"""Construct a BigQuery Storage API representation of this table.
Returns:
@@ -927,6 +1040,24 @@ def _build_resource(self, filter_fields):
"""Generate a resource for ``update``."""
return _helpers._build_resource_from_properties(self, filter_fields)
+ def __eq__(self, other):
+ if isinstance(other, Table):
+ return (
+ self._properties["tableReference"]
+ == other._properties["tableReference"]
+ )
+ elif isinstance(other, (TableReference, TableListItem)):
+ return (
+ self.project == other.project
+ and self.dataset_id == other.dataset_id
+ and self.table_id == other.table_id
+ )
+ else:
+ return NotImplemented
+
+ def __hash__(self):
+ return hash((self.project, self.dataset_id, self.table_id))
+
def __repr__(self):
return "Table({})".format(repr(self.reference))
@@ -1104,7 +1235,7 @@ def clustering_fields(self):
return list(prop.get("fields", ()))
@classmethod
- def from_string(cls, full_table_id):
+ def from_string(cls, full_table_id: str) -> "TableListItem":
"""Construct a table from fully-qualified table ID.
Args:
@@ -1129,7 +1260,7 @@ def from_string(cls, full_table_id):
{"tableReference": TableReference.from_string(full_table_id).to_api_repr()}
)
- def to_bqstorage(self):
+ def to_bqstorage(self) -> str:
"""Construct a BigQuery Storage API representation of this table.
Returns:
@@ -1137,7 +1268,7 @@ def to_bqstorage(self):
"""
return self.reference.to_bqstorage()
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Constructs the API resource of this table
Returns:
@@ -1145,6 +1276,19 @@ def to_api_repr(self):
"""
return copy.deepcopy(self._properties)
+ def __eq__(self, other):
+ if isinstance(other, (Table, TableReference, TableListItem)):
+ return (
+ self.project == other.project
+ and self.dataset_id == other.dataset_id
+ and self.table_id == other.table_id
+ )
+ else:
+ return NotImplemented
+
+ def __hash__(self):
+ return hash((self.project, self.dataset_id, self.table_id))
+
def _row_from_mapping(mapping, schema):
"""Convert a mapping to a row tuple using the schema.
@@ -1204,6 +1348,29 @@ def __init__(self, resource):
)
+class SnapshotDefinition:
+ """Information about base table and snapshot time of the snapshot.
+
+ See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition
+
+ Args:
+ resource: Snapshot definition representation returned from the API.
+ """
+
+ def __init__(self, resource: Dict[str, Any]):
+ self.base_table_reference = None
+ if "baseTableReference" in resource:
+ self.base_table_reference = TableReference.from_api_repr(
+ resource["baseTableReference"]
+ )
+
+ self.snapshot_time = None
+ if "snapshotTime" in resource:
+ self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime(
+ resource["snapshotTime"]
+ )
+
+
class Row(object):
"""A BigQuery row.
@@ -1231,7 +1398,7 @@ def values(self):
"""
return copy.deepcopy(self._xxx_values)
- def keys(self):
+ def keys(self) -> Iterable[str]:
"""Return the keys for using a row as a dict.
Returns:
@@ -1244,7 +1411,7 @@ def keys(self):
"""
return self._xxx_field_to_index.keys()
- def items(self):
+ def items(self) -> Iterable[Tuple[str, Any]]:
"""Return items as ``(key, value)`` pairs.
Returns:
@@ -1259,7 +1426,7 @@ def items(self):
for key, index in self._xxx_field_to_index.items():
yield (key, copy.deepcopy(self._xxx_values[index]))
- def get(self, key, default=None):
+ def get(self, key: str, default: Any = None) -> Any:
"""Return a value for key, with a default value if it does not exist.
Args:
@@ -1345,7 +1512,9 @@ class RowIterator(HTTPIterator):
"""A class for iterating through HTTP/JSON API row list responses.
Args:
- client (google.cloud.bigquery.Client): The API client.
+ client (Optional[google.cloud.bigquery.Client]):
+ The API client instance. This should always be non-`None`, except for
+ subclasses that do not use it, namely the ``_EmptyRowIterator``.
api_request (Callable[google.cloud._http.JSONConnection.api_request]):
The function to use to make API requests.
path (str): The method path to query for the list of items.
@@ -1410,7 +1579,7 @@ def __init__(
self._field_to_index = _helpers._field_to_index_mapping(schema)
self._page_size = page_size
self._preserve_order = False
- self._project = client.project
+ self._project = client.project if client is not None else None
self._schema = schema
self._selected_fields = selected_fields
self._table = table
@@ -1443,11 +1612,17 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
return False
if self.max_results is not None:
- warnings.warn(
- "Cannot use bqstorage_client if max_results is set, "
- "reverting to fetching data with the REST endpoint.",
- stacklevel=2,
- )
+ return False
+
+ try:
+ from google.cloud import bigquery_storage # noqa: F401
+ except ImportError:
+ return False
+
+ try:
+ _helpers.BQ_STORAGE_VERSIONS.verify_version()
+ except LegacyBigQueryStorageError as exc:
+ warnings.warn(str(exc))
return False
return True
@@ -1484,19 +1659,37 @@ def total_rows(self):
"""int: The total number of rows in the table."""
return self._total_rows
+ def _maybe_warn_max_results(
+ self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"],
+ ):
+ """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set.
+
+ This helper method should be used directly in the relevant top-level public
+ methods, so that the warning is issued for the correct line in user code.
+
+ Args:
+ bqstorage_client:
+ The BigQuery Storage client intended to use for downloading result rows.
+ """
+ if bqstorage_client is not None and self.max_results is not None:
+ warnings.warn(
+ "Cannot use bqstorage_client if max_results is set, "
+ "reverting to fetching data with the REST endpoint.",
+ stacklevel=3,
+ )
+
def _to_page_iterable(
self, bqstorage_download, tabledata_list_download, bqstorage_client=None
):
if not self._validate_bqstorage(bqstorage_client, False):
bqstorage_client = None
- if bqstorage_client is not None:
- for item in bqstorage_download():
- yield item
- return
-
- for item in tabledata_list_download():
- yield item
+ result_pages = (
+ bqstorage_download()
+ if bqstorage_client is not None
+ else tabledata_list_download()
+ )
+ yield from result_pages
def _to_arrow_iterable(self, bqstorage_client=None):
"""Create an iterable of arrow RecordBatches, to process the table as a stream."""
@@ -1521,10 +1714,10 @@ def _to_arrow_iterable(self, bqstorage_client=None):
# changes to job.QueryJob.to_arrow()
def to_arrow(
self,
- progress_bar_type=None,
- bqstorage_client=None,
- create_bqstorage_client=True,
- ):
+ progress_bar_type: str = None,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ create_bqstorage_client: bool = True,
+ ) -> "pyarrow.Table":
"""[Beta] Create a class:`pyarrow.Table` by loading all pages of a
table or query.
@@ -1565,7 +1758,7 @@ def to_arrow(
This argument does nothing if ``bqstorage_client`` is supplied.
- ..versionadded:: 1.24.0
+ .. versionadded:: 1.24.0
Returns:
pyarrow.Table
@@ -1576,18 +1769,20 @@ def to_arrow(
Raises:
ValueError: If the :mod:`pyarrow` library cannot be imported.
- ..versionadded:: 1.17.0
+ .. versionadded:: 1.17.0
"""
if pyarrow is None:
raise ValueError(_NO_PYARROW_ERROR)
+ self._maybe_warn_max_results(bqstorage_client)
+
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
owns_bqstorage_client = False
if not bqstorage_client and create_bqstorage_client:
- bqstorage_client = self.client._create_bqstorage_client()
+ bqstorage_client = self.client._ensure_bqstorage_client()
owns_bqstorage_client = bqstorage_client is not None
try:
@@ -1622,7 +1817,12 @@ def to_arrow(
arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema)
return pyarrow.Table.from_batches(record_batches, schema=arrow_schema)
- def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
+ def to_dataframe_iterable(
+ self,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT,
+ ) -> "pandas.DataFrame":
"""Create an iterable of pandas DataFrames, to process the table as a stream.
Args:
@@ -1642,6 +1842,17 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
``dtype`` is used when constructing the series for the column
specified. Otherwise, the default pandas behavior is used.
+ max_queue_size (Optional[int]):
+ The maximum number of result pages to hold in the internal queue when
+ streaming query results over the BigQuery Storage API. Ignored if
+ Storage API is not used.
+
+ By default, the max queue size is set to the number of BQ Storage streams
+ created by the server. If ``max_queue_size`` is :data:`None`, the queue
+ size is infinite.
+
+ .. versionadded:: 2.14.0
+
Returns:
pandas.DataFrame:
A generator of :class:`~pandas.DataFrame`.
@@ -1655,6 +1866,8 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
if dtypes is None:
dtypes = {}
+ self._maybe_warn_max_results(bqstorage_client)
+
column_names = [field.name for field in self._schema]
bqstorage_download = functools.partial(
_pandas_helpers.download_dataframe_bqstorage,
@@ -1665,6 +1878,7 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
dtypes,
preserve_order=self._preserve_order,
selected_fields=self._selected_fields,
+ max_queue_size=max_queue_size,
)
tabledata_list_download = functools.partial(
_pandas_helpers.download_dataframe_row_iterator,
@@ -1682,12 +1896,13 @@ def to_dataframe_iterable(self, bqstorage_client=None, dtypes=None):
# changes to job.QueryJob.to_dataframe()
def to_dataframe(
self,
- bqstorage_client=None,
- dtypes=None,
- progress_bar_type=None,
- create_bqstorage_client=True,
- date_as_object=True,
- ):
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ geography_as_object: bool = False,
+ ) -> "pandas.DataFrame":
"""Create a pandas DataFrame by loading all pages of a query.
Args:
@@ -1725,7 +1940,7 @@ def to_dataframe(
Use the :func:`tqdm.tqdm_gui` function to display a
progress bar as a graphical dialog box.
- ..versionadded:: 1.11.0
+ .. versionadded:: 1.11.0
create_bqstorage_client (Optional[bool]):
If ``True`` (default), create a BigQuery Storage API client
using the default API settings. The BigQuery Storage API
@@ -1734,13 +1949,20 @@ def to_dataframe(
This argument does nothing if ``bqstorage_client`` is supplied.
- ..versionadded:: 1.24.0
+ .. versionadded:: 1.24.0
date_as_object (Optional[bool]):
If ``True`` (default), cast dates to objects. If ``False``, convert
to datetime64[ns] dtype.
- ..versionadded:: 1.26.0
+ .. versionadded:: 1.26.0
+
+ geography_as_object (Optional[bool]):
+ If ``True``, convert GEOGRAPHY data to :mod:`shapely`
+ geometry objects. If ``False`` (default), don't cast
+ geography data to :mod:`shapely` geometry objects.
+
+ .. versionadded:: 2.24.0
Returns:
pandas.DataFrame:
@@ -1750,16 +1972,23 @@ def to_dataframe(
Raises:
ValueError:
- If the :mod:`pandas` library cannot be imported, or the
- :mod:`google.cloud.bigquery_storage_v1` module is
- required but cannot be imported.
+ If the :mod:`pandas` library cannot be imported, or
+ the :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported. Also if
+ `geography_as_object` is `True`, but the
+ :mod:`shapely` library cannot be imported.
"""
if pandas is None:
raise ValueError(_NO_PANDAS_ERROR)
+ if geography_as_object and shapely is None:
+ raise ValueError(_NO_SHAPELY_ERROR)
+
if dtypes is None:
dtypes = {}
+ self._maybe_warn_max_results(bqstorage_client)
+
if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
@@ -1775,7 +2004,7 @@ def to_dataframe(
# Pandas, we set the timestamp_as_object parameter to True, if necessary.
types_to_check = {
pyarrow.timestamp("us"),
- pyarrow.timestamp("us", tz=pytz.UTC),
+ pyarrow.timestamp("us", tz=datetime.timezone.utc),
}
for column in record_batch:
@@ -1795,10 +2024,138 @@ def to_dataframe(
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column])
+ if geography_as_object:
+ for field in self.schema:
+ if field.field_type.upper() == "GEOGRAPHY":
+ df[field.name] = df[field.name].dropna().apply(_read_wkt)
+
return df
+ # If changing the signature of this method, make sure to apply the same
+ # changes to job.QueryJob.to_geodataframe()
+ def to_geodataframe(
+ self,
+ bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ dtypes: Dict[str, Any] = None,
+ progress_bar_type: str = None,
+ create_bqstorage_client: bool = True,
+ date_as_object: bool = True,
+ geography_column: Optional[str] = None,
+ ) -> "geopandas.GeoDataFrame":
+ """Create a GeoPandas GeoDataFrame by loading all pages of a query.
+
+ Args:
+ bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):
+ A BigQuery Storage API client. If supplied, use the faster
+ BigQuery Storage API to fetch rows from BigQuery.
+
+ This method requires the ``pyarrow`` and
+ ``google-cloud-bigquery-storage`` libraries.
+
+ This method only exposes a subset of the capabilities of the
+ BigQuery Storage API. For full access to all features
+ (projections, filters, snapshots) use the Storage API directly.
+
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+ A dictionary of column names pandas ``dtype``s. The provided
+ ``dtype`` is used when constructing the series for the column
+ specified. Otherwise, the default pandas behavior is used.
+ progress_bar_type (Optional[str]):
+ If set, use the `tqdm `_ library to
+ display a progress bar while the data downloads. Install the
+ ``tqdm`` package to use this feature.
+
+ Possible values of ``progress_bar_type`` include:
+
+ ``None``
+ No progress bar.
+ ``'tqdm'``
+ Use the :func:`tqdm.tqdm` function to print a progress bar
+ to :data:`sys.stderr`.
+ ``'tqdm_notebook'``
+ Use the :func:`tqdm.tqdm_notebook` function to display a
+ progress bar as a Jupyter notebook widget.
+ ``'tqdm_gui'``
+ Use the :func:`tqdm.tqdm_gui` function to display a
+ progress bar as a graphical dialog box.
+
+ create_bqstorage_client (Optional[bool]):
+ If ``True`` (default), create a BigQuery Storage API client
+ using the default API settings. The BigQuery Storage API
+ is a faster way to fetch rows from BigQuery. See the
+ ``bqstorage_client`` parameter for more information.
+
+ This argument does nothing if ``bqstorage_client`` is supplied.
+
+ date_as_object (Optional[bool]):
+ If ``True`` (default), cast dates to objects. If ``False``, convert
+ to datetime64[ns] dtype.
+
+ geography_column (Optional[str]):
+ If there are more than one GEOGRAPHY column,
+ identifies which one to use to construct a geopandas
+ GeoDataFrame. This option can be ommitted if there's
+ only one GEOGRAPHY column.
+
+ Returns:
+ geopandas.GeoDataFrame:
+ A :class:`geopandas.GeoDataFrame` populated with row
+ data and column headers from the query results. The
+ column headers are derived from the destination
+ table's schema.
+
+ Raises:
+ ValueError:
+ If the :mod:`geopandas` library cannot be imported, or the
+ :mod:`google.cloud.bigquery_storage_v1` module is
+ required but cannot be imported.
+
+ .. versionadded:: 2.24.0
+ """
+ if geopandas is None:
+ raise ValueError(_NO_GEOPANDAS_ERROR)
+
+ geography_columns = set(
+ field.name
+ for field in self.schema
+ if field.field_type.upper() == "GEOGRAPHY"
+ )
+ if not geography_columns:
+ raise TypeError(
+ "There must be at least one GEOGRAPHY column"
+ " to create a GeoDataFrame"
+ )
+
+ if geography_column:
+ if geography_column not in geography_columns:
+ raise ValueError(
+ f"The given geography column, {geography_column}, doesn't name"
+ f" a GEOGRAPHY column in the result."
+ )
+ elif len(geography_columns) == 1:
+ [geography_column] = geography_columns
+ else:
+ raise ValueError(
+ "There is more than one GEOGRAPHY column in the result. "
+ "The geography_column argument must be used to specify which "
+ "one to use to create a GeoDataFrame"
+ )
+
+ df = self.to_dataframe(
+ bqstorage_client,
+ dtypes,
+ progress_bar_type,
+ create_bqstorage_client,
+ date_as_object,
+ geography_as_object=True,
+ )
+
+ return geopandas.GeoDataFrame(
+ df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column
+ )
+
-class _EmptyRowIterator(object):
+class _EmptyRowIterator(RowIterator):
"""An empty row iterator.
This class prevents API requests when there are no rows to fetch or rows
@@ -1810,12 +2167,24 @@ class _EmptyRowIterator(object):
pages = ()
total_rows = 0
+ def __init__(
+ self, client=None, api_request=None, path=None, schema=(), *args, **kwargs
+ ):
+ super().__init__(
+ client=client,
+ api_request=api_request,
+ path=path,
+ schema=schema,
+ *args,
+ **kwargs,
+ )
+
def to_arrow(
self,
progress_bar_type=None,
bqstorage_client=None,
create_bqstorage_client=True,
- ):
+ ) -> "pyarrow.Table":
"""[Beta] Create an empty class:`pyarrow.Table`.
Args:
@@ -1837,7 +2206,8 @@ def to_dataframe(
progress_bar_type=None,
create_bqstorage_client=True,
date_as_object=True,
- ):
+ geography_as_object=False,
+ ) -> "pandas.DataFrame":
"""Create an empty dataframe.
Args:
@@ -1854,6 +2224,62 @@ def to_dataframe(
raise ValueError(_NO_PANDAS_ERROR)
return pandas.DataFrame()
+ def to_geodataframe(
+ self,
+ bqstorage_client=None,
+ dtypes=None,
+ progress_bar_type=None,
+ create_bqstorage_client=True,
+ date_as_object=True,
+ geography_column: Optional[str] = None,
+ ) -> "pandas.DataFrame":
+ """Create an empty dataframe.
+
+ Args:
+ bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.
+ dtypes (Any): Ignored. Added for compatibility with RowIterator.
+ progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
+ create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+ date_as_object (bool): Ignored. Added for compatibility with RowIterator.
+
+ Returns:
+ pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
+ """
+ if geopandas is None:
+ raise ValueError(_NO_GEOPANDAS_ERROR)
+ return geopandas.GeoDataFrame(crs=_COORDINATE_REFERENCE_SYSTEM)
+
+ def to_dataframe_iterable(
+ self,
+ bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
+ dtypes: Optional[Dict[str, Any]] = None,
+ max_queue_size: Optional[int] = None,
+ ) -> Iterator["pandas.DataFrame"]:
+ """Create an iterable of pandas DataFrames, to process the table as a stream.
+
+ .. versionadded:: 2.21.0
+
+ Args:
+ bqstorage_client:
+ Ignored. Added for compatibility with RowIterator.
+
+ dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):
+ Ignored. Added for compatibility with RowIterator.
+
+ max_queue_size:
+ Ignored. Added for compatibility with RowIterator.
+
+ Returns:
+ An iterator yielding a single empty :class:`~pandas.DataFrame`.
+
+ Raises:
+ ValueError:
+ If the :mod:`pandas` library cannot be imported.
+ """
+ if pandas is None:
+ raise ValueError(_NO_PANDAS_ERROR)
+ return iter((pandas.DataFrame(),))
+
def __iter__(self):
return iter(())
@@ -2148,7 +2574,7 @@ def require_partition_filter(self, value):
self._properties["requirePartitionFilter"] = value
@classmethod
- def from_api_repr(cls, api_repr):
+ def from_api_repr(cls, api_repr: dict) -> "TimePartitioning":
"""Return a :class:`TimePartitioning` object deserialized from a dict.
This method creates a new ``TimePartitioning`` instance that points to
@@ -2176,7 +2602,7 @@ def from_api_repr(cls, api_repr):
instance._properties = api_repr
return instance
- def to_api_repr(self):
+ def to_api_repr(self) -> dict:
"""Return a dictionary representing this object.
This method returns the properties dict of the ``TimePartitioning``
diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py
index d962613e0..21cbec9fe 100644
--- a/google/cloud/bigquery/version.py
+++ b/google/cloud/bigquery/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "2.7.0"
+__version__ = "2.25.1"
diff --git a/google/cloud/bigquery_v2/__init__.py b/google/cloud/bigquery_v2/__init__.py
index ebcc26bef..f9957efa9 100644
--- a/google/cloud/bigquery_v2/__init__.py
+++ b/google/cloud/bigquery_v2/__init__.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -27,9 +26,9 @@
from .types.standard_sql import StandardSqlDataType
from .types.standard_sql import StandardSqlField
from .types.standard_sql import StandardSqlStructType
+from .types.standard_sql import StandardSqlTableType
from .types.table_reference import TableReference
-
__all__ = (
"DeleteModelRequest",
"EncryptionConfiguration",
@@ -42,5 +41,6 @@
"StandardSqlDataType",
"StandardSqlField",
"StandardSqlStructType",
+ "StandardSqlTableType",
"TableReference",
)
diff --git a/google/cloud/bigquery_v2/gapic_metadata.json b/google/cloud/bigquery_v2/gapic_metadata.json
new file mode 100644
index 000000000..3251a2630
--- /dev/null
+++ b/google/cloud/bigquery_v2/gapic_metadata.json
@@ -0,0 +1,63 @@
+ {
+ "comment": "This file maps proto services/RPCs to the corresponding library clients/methods",
+ "language": "python",
+ "libraryPackage": "google.cloud.bigquery_v2",
+ "protoPackage": "google.cloud.bigquery.v2",
+ "schema": "1.0",
+ "services": {
+ "ModelService": {
+ "clients": {
+ "grpc": {
+ "libraryClient": "ModelServiceClient",
+ "rpcs": {
+ "DeleteModel": {
+ "methods": [
+ "delete_model"
+ ]
+ },
+ "GetModel": {
+ "methods": [
+ "get_model"
+ ]
+ },
+ "ListModels": {
+ "methods": [
+ "list_models"
+ ]
+ },
+ "PatchModel": {
+ "methods": [
+ "patch_model"
+ ]
+ }
+ }
+ },
+ "grpc-async": {
+ "libraryClient": "ModelServiceAsyncClient",
+ "rpcs": {
+ "DeleteModel": {
+ "methods": [
+ "delete_model"
+ ]
+ },
+ "GetModel": {
+ "methods": [
+ "get_model"
+ ]
+ },
+ "ListModels": {
+ "methods": [
+ "list_models"
+ ]
+ },
+ "PatchModel": {
+ "methods": [
+ "patch_model"
+ ]
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/google/cloud/bigquery_v2/proto/encryption_config.proto b/google/cloud/bigquery_v2/proto/encryption_config.proto
deleted file mode 100644
index 1c0512a17..000000000
--- a/google/cloud/bigquery_v2/proto/encryption_config.proto
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/field_behavior.proto";
-import "google/protobuf/wrappers.proto";
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "EncryptionConfigProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-message EncryptionConfiguration {
- // Optional. Describes the Cloud KMS encryption key that will be used to
- // protect destination BigQuery table. The BigQuery Service Account associated
- // with your project requires access to this encryption key.
- google.protobuf.StringValue kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];
-}
diff --git a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py b/google/cloud/bigquery_v2/proto/encryption_config_pb2.py
deleted file mode 100644
index 5ae21ea6f..000000000
--- a/google/cloud/bigquery_v2/proto/encryption_config_pb2.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: google/cloud/bigquery_v2/proto/encryption_config.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2
-from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2
-from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name="google/cloud/bigquery_v2/proto/encryption_config.proto",
- package="google.cloud.bigquery.v2",
- syntax="proto3",
- serialized_options=b"\n\034com.google.cloud.bigquery.v2B\025EncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery",
- create_key=_descriptor._internal_create_key,
- serialized_pb=b'\n6google/cloud/bigquery_v2/proto/encryption_config.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"R\n\x17\x45ncryptionConfiguration\x12\x37\n\x0ckms_key_name\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.StringValueB\x03\xe0\x41\x01\x42w\n\x1c\x63om.google.cloud.bigquery.v2B\x15\x45ncryptionConfigProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3',
- dependencies=[
- google_dot_api_dot_field__behavior__pb2.DESCRIPTOR,
- google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR,
- google_dot_api_dot_annotations__pb2.DESCRIPTOR,
- ],
-)
-
-
-_ENCRYPTIONCONFIGURATION = _descriptor.Descriptor(
- name="EncryptionConfiguration",
- full_name="google.cloud.bigquery.v2.EncryptionConfiguration",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="kms_key_name",
- full_name="google.cloud.bigquery.v2.EncryptionConfiguration.kms_key_name",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=179,
- serialized_end=261,
-)
-
-_ENCRYPTIONCONFIGURATION.fields_by_name[
- "kms_key_name"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._STRINGVALUE
-DESCRIPTOR.message_types_by_name["EncryptionConfiguration"] = _ENCRYPTIONCONFIGURATION
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-EncryptionConfiguration = _reflection.GeneratedProtocolMessageType(
- "EncryptionConfiguration",
- (_message.Message,),
- {
- "DESCRIPTOR": _ENCRYPTIONCONFIGURATION,
- "__module__": "google.cloud.bigquery_v2.proto.encryption_config_pb2",
- "__doc__": """Encryption configuration.
-
- Attributes:
- kms_key_name:
- Optional. Describes the Cloud KMS encryption key that will be
- used to protect destination BigQuery table. The BigQuery
- Service Account associated with your project requires access
- to this encryption key.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.EncryptionConfiguration)
- },
-)
-_sym_db.RegisterMessage(EncryptionConfiguration)
-
-
-DESCRIPTOR._options = None
-_ENCRYPTIONCONFIGURATION.fields_by_name["kms_key_name"]._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/google/cloud/bigquery_v2/proto/location_metadata.proto b/google/cloud/bigquery_v2/proto/location_metadata.proto
deleted file mode 100644
index 95a3133c5..000000000
--- a/google/cloud/bigquery_v2/proto/location_metadata.proto
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2019 Google LLC.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "LocationMetadataProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-
-// BigQuery-specific metadata about a location. This will be set on
-// google.cloud.location.Location.metadata in Cloud Location API
-// responses.
-message LocationMetadata {
- // The legacy BigQuery location ID, e.g. “EU” for the “europe” location.
- // This is for any API consumers that need the legacy “US” and “EU” locations.
- string legacy_location_id = 1;
-}
diff --git a/google/cloud/bigquery_v2/proto/model.proto b/google/cloud/bigquery_v2/proto/model.proto
deleted file mode 100644
index 2d400dddd..000000000
--- a/google/cloud/bigquery_v2/proto/model.proto
+++ /dev/null
@@ -1,1208 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/client.proto";
-import "google/api/field_behavior.proto";
-import "google/cloud/bigquery/v2/encryption_config.proto";
-import "google/cloud/bigquery/v2/model_reference.proto";
-import "google/cloud/bigquery/v2/standard_sql.proto";
-import "google/cloud/bigquery/v2/table_reference.proto";
-import "google/protobuf/empty.proto";
-import "google/protobuf/timestamp.proto";
-import "google/protobuf/wrappers.proto";
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "ModelProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-service ModelService {
- option (google.api.default_host) = "bigquery.googleapis.com";
- option (google.api.oauth_scopes) =
- "https://www.googleapis.com/auth/bigquery,"
- "https://www.googleapis.com/auth/bigquery.readonly,"
- "https://www.googleapis.com/auth/cloud-platform,"
- "https://www.googleapis.com/auth/cloud-platform.read-only";
-
- // Gets the specified model resource by model ID.
- rpc GetModel(GetModelRequest) returns (Model) {
- option (google.api.method_signature) = "project_id,dataset_id,model_id";
- }
-
- // Lists all models in the specified dataset. Requires the READER dataset
- // role.
- rpc ListModels(ListModelsRequest) returns (ListModelsResponse) {
- option (google.api.method_signature) = "project_id,dataset_id,max_results";
- }
-
- // Patch specific fields in the specified model.
- rpc PatchModel(PatchModelRequest) returns (Model) {
- option (google.api.method_signature) = "project_id,dataset_id,model_id,model";
- }
-
- // Deletes the model specified by modelId from the dataset.
- rpc DeleteModel(DeleteModelRequest) returns (google.protobuf.Empty) {
- option (google.api.method_signature) = "project_id,dataset_id,model_id";
- }
-}
-
-message Model {
- message SeasonalPeriod {
- enum SeasonalPeriodType {
- SEASONAL_PERIOD_TYPE_UNSPECIFIED = 0;
-
- // No seasonality
- NO_SEASONALITY = 1;
-
- // Daily period, 24 hours.
- DAILY = 2;
-
- // Weekly period, 7 days.
- WEEKLY = 3;
-
- // Monthly period, 30 days or irregular.
- MONTHLY = 4;
-
- // Quarterly period, 90 days or irregular.
- QUARTERLY = 5;
-
- // Yearly period, 365 days or irregular.
- YEARLY = 6;
- }
-
-
- }
-
- message KmeansEnums {
- // Indicates the method used to initialize the centroids for KMeans
- // clustering algorithm.
- enum KmeansInitializationMethod {
- KMEANS_INITIALIZATION_METHOD_UNSPECIFIED = 0;
-
- // Initializes the centroids randomly.
- RANDOM = 1;
-
- // Initializes the centroids using data specified in
- // kmeans_initialization_column.
- CUSTOM = 2;
-
- // Initializes with kmeans++.
- KMEANS_PLUS_PLUS = 3;
- }
-
-
- }
-
- // Evaluation metrics for regression and explicit feedback type matrix
- // factorization models.
- message RegressionMetrics {
- // Mean absolute error.
- google.protobuf.DoubleValue mean_absolute_error = 1;
-
- // Mean squared error.
- google.protobuf.DoubleValue mean_squared_error = 2;
-
- // Mean squared log error.
- google.protobuf.DoubleValue mean_squared_log_error = 3;
-
- // Median absolute error.
- google.protobuf.DoubleValue median_absolute_error = 4;
-
- // R^2 score.
- google.protobuf.DoubleValue r_squared = 5;
- }
-
- // Aggregate metrics for classification/classifier models. For multi-class
- // models, the metrics are either macro-averaged or micro-averaged. When
- // macro-averaged, the metrics are calculated for each label and then an
- // unweighted average is taken of those values. When micro-averaged, the
- // metric is calculated globally by counting the total number of correctly
- // predicted rows.
- message AggregateClassificationMetrics {
- // Precision is the fraction of actual positive predictions that had
- // positive actual labels. For multiclass this is a macro-averaged
- // metric treating each class as a binary classifier.
- google.protobuf.DoubleValue precision = 1;
-
- // Recall is the fraction of actual positive labels that were given a
- // positive prediction. For multiclass this is a macro-averaged metric.
- google.protobuf.DoubleValue recall = 2;
-
- // Accuracy is the fraction of predictions given the correct label. For
- // multiclass this is a micro-averaged metric.
- google.protobuf.DoubleValue accuracy = 3;
-
- // Threshold at which the metrics are computed. For binary
- // classification models this is the positive class threshold.
- // For multi-class classfication models this is the confidence
- // threshold.
- google.protobuf.DoubleValue threshold = 4;
-
- // The F1 score is an average of recall and precision. For multiclass
- // this is a macro-averaged metric.
- google.protobuf.DoubleValue f1_score = 5;
-
- // Logarithmic Loss. For multiclass this is a macro-averaged metric.
- google.protobuf.DoubleValue log_loss = 6;
-
- // Area Under a ROC Curve. For multiclass this is a macro-averaged
- // metric.
- google.protobuf.DoubleValue roc_auc = 7;
- }
-
- // Evaluation metrics for binary classification/classifier models.
- message BinaryClassificationMetrics {
- // Confusion matrix for binary classification models.
- message BinaryConfusionMatrix {
- // Threshold value used when computing each of the following metric.
- google.protobuf.DoubleValue positive_class_threshold = 1;
-
- // Number of true samples predicted as true.
- google.protobuf.Int64Value true_positives = 2;
-
- // Number of false samples predicted as true.
- google.protobuf.Int64Value false_positives = 3;
-
- // Number of true samples predicted as false.
- google.protobuf.Int64Value true_negatives = 4;
-
- // Number of false samples predicted as false.
- google.protobuf.Int64Value false_negatives = 5;
-
- // The fraction of actual positive predictions that had positive actual
- // labels.
- google.protobuf.DoubleValue precision = 6;
-
- // The fraction of actual positive labels that were given a positive
- // prediction.
- google.protobuf.DoubleValue recall = 7;
-
- // The equally weighted average of recall and precision.
- google.protobuf.DoubleValue f1_score = 8;
-
- // The fraction of predictions given the correct label.
- google.protobuf.DoubleValue accuracy = 9;
- }
-
- // Aggregate classification metrics.
- AggregateClassificationMetrics aggregate_classification_metrics = 1;
-
- // Binary confusion matrix at multiple thresholds.
- repeated BinaryConfusionMatrix binary_confusion_matrix_list = 2;
-
- // Label representing the positive class.
- string positive_label = 3;
-
- // Label representing the negative class.
- string negative_label = 4;
- }
-
- // Evaluation metrics for multi-class classification/classifier models.
- message MultiClassClassificationMetrics {
- // Confusion matrix for multi-class classification models.
- message ConfusionMatrix {
- // A single entry in the confusion matrix.
- message Entry {
- // The predicted label. For confidence_threshold > 0, we will
- // also add an entry indicating the number of items under the
- // confidence threshold.
- string predicted_label = 1;
-
- // Number of items being predicted as this label.
- google.protobuf.Int64Value item_count = 2;
- }
-
- // A single row in the confusion matrix.
- message Row {
- // The original label of this row.
- string actual_label = 1;
-
- // Info describing predicted label distribution.
- repeated Entry entries = 2;
- }
-
- // Confidence threshold used when computing the entries of the
- // confusion matrix.
- google.protobuf.DoubleValue confidence_threshold = 1;
-
- // One row per actual label.
- repeated Row rows = 2;
- }
-
- // Aggregate classification metrics.
- AggregateClassificationMetrics aggregate_classification_metrics = 1;
-
- // Confusion matrix at different thresholds.
- repeated ConfusionMatrix confusion_matrix_list = 2;
- }
-
- // Evaluation metrics for clustering models.
- message ClusteringMetrics {
- // Message containing the information about one cluster.
- message Cluster {
- // Representative value of a single feature within the cluster.
- message FeatureValue {
- // Representative value of a categorical feature.
- message CategoricalValue {
- // Represents the count of a single category within the cluster.
- message CategoryCount {
- // The name of category.
- string category = 1;
-
- // The count of training samples matching the category within the
- // cluster.
- google.protobuf.Int64Value count = 2;
- }
-
- // Counts of all categories for the categorical feature. If there are
- // more than ten categories, we return top ten (by count) and return
- // one more CategoryCount with category "_OTHER_" and count as
- // aggregate counts of remaining categories.
- repeated CategoryCount category_counts = 1;
- }
-
- // The feature column name.
- string feature_column = 1;
-
- oneof value {
- // The numerical feature value. This is the centroid value for this
- // feature.
- google.protobuf.DoubleValue numerical_value = 2;
-
- // The categorical feature value.
- CategoricalValue categorical_value = 3;
- }
- }
-
- // Centroid id.
- int64 centroid_id = 1;
-
- // Values of highly variant features for this cluster.
- repeated FeatureValue feature_values = 2;
-
- // Count of training data rows that were assigned to this cluster.
- google.protobuf.Int64Value count = 3;
- }
-
- // Davies-Bouldin index.
- google.protobuf.DoubleValue davies_bouldin_index = 1;
-
- // Mean of squared distances between each sample to its cluster centroid.
- google.protobuf.DoubleValue mean_squared_distance = 2;
-
- // [Beta] Information for all clusters.
- repeated Cluster clusters = 3;
- }
-
- // Evaluation metrics used by weighted-ALS models specified by
- // feedback_type=implicit.
- message RankingMetrics {
- // Calculates a precision per user for all the items by ranking them and
- // then averages all the precisions across all the users.
- google.protobuf.DoubleValue mean_average_precision = 1;
-
- // Similar to the mean squared error computed in regression and explicit
- // recommendation models except instead of computing the rating directly,
- // the output from evaluate is computed against a preference which is 1 or 0
- // depending on if the rating exists or not.
- google.protobuf.DoubleValue mean_squared_error = 2;
-
- // A metric to determine the goodness of a ranking calculated from the
- // predicted confidence by comparing it to an ideal rank measured by the
- // original ratings.
- google.protobuf.DoubleValue normalized_discounted_cumulative_gain = 3;
-
- // Determines the goodness of a ranking by computing the percentile rank
- // from the predicted confidence and dividing it by the original rank.
- google.protobuf.DoubleValue average_rank = 4;
- }
-
- // Model evaluation metrics for ARIMA forecasting models.
- message ArimaForecastingMetrics {
- // Model evaluation metrics for a single ARIMA forecasting model.
- message ArimaSingleModelForecastingMetrics {
- // Non-seasonal order.
- ArimaOrder non_seasonal_order = 1;
-
- // Arima fitting metrics.
- ArimaFittingMetrics arima_fitting_metrics = 2;
-
- // Is arima model fitted with drift or not. It is always false when d
- // is not 1.
- bool has_drift = 3;
-
- // The id to indicate different time series.
- string time_series_id = 4;
-
- // Seasonal periods. Repeated because multiple periods are supported
- // for one time series.
- repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 5;
- }
-
- // Non-seasonal order.
- repeated ArimaOrder non_seasonal_order = 1;
-
- // Arima model fitting metrics.
- repeated ArimaFittingMetrics arima_fitting_metrics = 2;
-
- // Seasonal periods. Repeated because multiple periods are supported for one
- // time series.
- repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 3;
-
- // Whether Arima model fitted with drift or not. It is always false when d
- // is not 1.
- repeated bool has_drift = 4;
-
- // Id to differentiate different time series for the large-scale case.
- repeated string time_series_id = 5;
-
- // Repeated as there can be many metric sets (one for each model) in
- // auto-arima and the large-scale case.
- repeated ArimaSingleModelForecastingMetrics arima_single_model_forecasting_metrics = 6;
- }
-
- // Evaluation metrics of a model. These are either computed on all training
- // data or just the eval data based on whether eval data was used during
- // training. These are not present for imported models.
- message EvaluationMetrics {
- oneof metrics {
- // Populated for regression models and explicit feedback type matrix
- // factorization models.
- RegressionMetrics regression_metrics = 1;
-
- // Populated for binary classification/classifier models.
- BinaryClassificationMetrics binary_classification_metrics = 2;
-
- // Populated for multi-class classification/classifier models.
- MultiClassClassificationMetrics multi_class_classification_metrics = 3;
-
- // Populated for clustering models.
- ClusteringMetrics clustering_metrics = 4;
-
- // Populated for implicit feedback type matrix factorization models.
- RankingMetrics ranking_metrics = 5;
-
- // Populated for ARIMA models.
- ArimaForecastingMetrics arima_forecasting_metrics = 6;
- }
- }
-
- // Data split result. This contains references to the training and evaluation
- // data tables that were used to train the model.
- message DataSplitResult {
- // Table reference of the training data after split.
- TableReference training_table = 1;
-
- // Table reference of the evaluation data after split.
- TableReference evaluation_table = 2;
- }
-
- // Arima order, can be used for both non-seasonal and seasonal parts.
- message ArimaOrder {
- // Order of the autoregressive part.
- int64 p = 1;
-
- // Order of the differencing part.
- int64 d = 2;
-
- // Order of the moving-average part.
- int64 q = 3;
- }
-
- // ARIMA model fitting metrics.
- message ArimaFittingMetrics {
- // Log-likelihood.
- double log_likelihood = 1;
-
- // AIC.
- double aic = 2;
-
- // Variance.
- double variance = 3;
- }
-
- // Global explanations containing the top most important features
- // after training.
- message GlobalExplanation {
- // Explanation for a single feature.
- message Explanation {
- // Full name of the feature. For non-numerical features, will be
- // formatted like .. Overall size of
- // feature name will always be truncated to first 120 characters.
- string feature_name = 1;
-
- // Attribution of feature.
- google.protobuf.DoubleValue attribution = 2;
- }
-
- // A list of the top global explanations. Sorted by absolute value of
- // attribution in descending order.
- repeated Explanation explanations = 1;
-
- // Class label for this set of global explanations. Will be empty/null for
- // binary logistic and linear regression models. Sorted alphabetically in
- // descending order.
- string class_label = 2;
- }
-
- // Information about a single training query run for the model.
- message TrainingRun {
- message TrainingOptions {
- // The maximum number of iterations in training. Used only for iterative
- // training algorithms.
- int64 max_iterations = 1;
-
- // Type of loss function used during training run.
- LossType loss_type = 2;
-
- // Learning rate in training. Used only for iterative training algorithms.
- double learn_rate = 3;
-
- // L1 regularization coefficient.
- google.protobuf.DoubleValue l1_regularization = 4;
-
- // L2 regularization coefficient.
- google.protobuf.DoubleValue l2_regularization = 5;
-
- // When early_stop is true, stops training when accuracy improvement is
- // less than 'min_relative_progress'. Used only for iterative training
- // algorithms.
- google.protobuf.DoubleValue min_relative_progress = 6;
-
- // Whether to train a model from the last checkpoint.
- google.protobuf.BoolValue warm_start = 7;
-
- // Whether to stop early when the loss doesn't improve significantly
- // any more (compared to min_relative_progress). Used only for iterative
- // training algorithms.
- google.protobuf.BoolValue early_stop = 8;
-
- // Name of input label columns in training data.
- repeated string input_label_columns = 9;
-
- // The data split type for training and evaluation, e.g. RANDOM.
- DataSplitMethod data_split_method = 10;
-
- // The fraction of evaluation data over the whole input data. The rest
- // of data will be used as training data. The format should be double.
- // Accurate to two decimal places.
- // Default value is 0.2.
- double data_split_eval_fraction = 11;
-
- // The column to split data with. This column won't be used as a
- // feature.
- // 1. When data_split_method is CUSTOM, the corresponding column should
- // be boolean. The rows with true value tag are eval data, and the false
- // are training data.
- // 2. When data_split_method is SEQ, the first DATA_SPLIT_EVAL_FRACTION
- // rows (from smallest to largest) in the corresponding column are used
- // as training data, and the rest are eval data. It respects the order
- // in Orderable data types:
- // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- string data_split_column = 12;
-
- // The strategy to determine learn rate for the current iteration.
- LearnRateStrategy learn_rate_strategy = 13;
-
- // Specifies the initial learning rate for the line search learn rate
- // strategy.
- double initial_learn_rate = 16;
-
- // Weights associated with each label class, for rebalancing the
- // training data. Only applicable for classification models.
- map label_class_weights = 17;
-
- // User column specified for matrix factorization models.
- string user_column = 18;
-
- // Item column specified for matrix factorization models.
- string item_column = 19;
-
- // Distance type for clustering models.
- DistanceType distance_type = 20;
-
- // Number of clusters for clustering models.
- int64 num_clusters = 21;
-
- // [Beta] Google Cloud Storage URI from which the model was imported. Only
- // applicable for imported models.
- string model_uri = 22;
-
- // Optimization strategy for training linear regression models.
- OptimizationStrategy optimization_strategy = 23;
-
- // Hidden units for dnn models.
- repeated int64 hidden_units = 24;
-
- // Batch size for dnn models.
- int64 batch_size = 25;
-
- // Dropout probability for dnn models.
- google.protobuf.DoubleValue dropout = 26;
-
- // Maximum depth of a tree for boosted tree models.
- int64 max_tree_depth = 27;
-
- // Subsample fraction of the training data to grow tree to prevent
- // overfitting for boosted tree models.
- double subsample = 28;
-
- // Minimum split loss for boosted tree models.
- google.protobuf.DoubleValue min_split_loss = 29;
-
- // Num factors specified for matrix factorization models.
- int64 num_factors = 30;
-
- // Feedback type that specifies which algorithm to run for matrix
- // factorization.
- FeedbackType feedback_type = 31;
-
- // Hyperparameter for matrix factoration when implicit feedback type is
- // specified.
- google.protobuf.DoubleValue wals_alpha = 32;
-
- // The method used to initialize the centroids for kmeans algorithm.
- KmeansEnums.KmeansInitializationMethod kmeans_initialization_method = 33;
-
- // The column used to provide the initial centroids for kmeans algorithm
- // when kmeans_initialization_method is CUSTOM.
- string kmeans_initialization_column = 34;
-
- // Column to be designated as time series timestamp for ARIMA model.
- string time_series_timestamp_column = 35;
-
- // Column to be designated as time series data for ARIMA model.
- string time_series_data_column = 36;
-
- // Whether to enable auto ARIMA or not.
- bool auto_arima = 37;
-
- // A specification of the non-seasonal part of the ARIMA model: the three
- // components (p, d, q) are the AR order, the degree of differencing, and
- // the MA order.
- ArimaOrder non_seasonal_order = 38;
-
- // The data frequency of a time series.
- DataFrequency data_frequency = 39;
-
- // Include drift when fitting an ARIMA model.
- bool include_drift = 41;
-
- // The geographical region based on which the holidays are considered in
- // time series modeling. If a valid value is specified, then holiday
- // effects modeling is enabled.
- HolidayRegion holiday_region = 42;
-
- // The id column that will be used to indicate different time series to
- // forecast in parallel.
- string time_series_id_column = 43;
-
- // The number of periods ahead that need to be forecasted.
- int64 horizon = 44;
-
- // Whether to preserve the input structs in output feature names.
- // Suppose there is a struct A with field b.
- // When false (default), the output feature name is A_b.
- // When true, the output feature name is A.b.
- bool preserve_input_structs = 45;
-
- // The max value of non-seasonal p and q.
- int64 auto_arima_max_order = 46;
- }
-
- // Information about a single iteration of the training run.
- message IterationResult {
- // Information about a single cluster for clustering model.
- message ClusterInfo {
- // Centroid id.
- int64 centroid_id = 1;
-
- // Cluster radius, the average distance from centroid
- // to each point assigned to the cluster.
- google.protobuf.DoubleValue cluster_radius = 2;
-
- // Cluster size, the total number of points assigned to the cluster.
- google.protobuf.Int64Value cluster_size = 3;
- }
-
- // (Auto-)arima fitting result. Wrap everything in ArimaResult for easier
- // refactoring if we want to use model-specific iteration results.
- message ArimaResult {
- // Arima coefficients.
- message ArimaCoefficients {
- // Auto-regressive coefficients, an array of double.
- repeated double auto_regressive_coefficients = 1;
-
- // Moving-average coefficients, an array of double.
- repeated double moving_average_coefficients = 2;
-
- // Intercept coefficient, just a double not an array.
- double intercept_coefficient = 3;
- }
-
- // Arima model information.
- message ArimaModelInfo {
- // Non-seasonal order.
- ArimaOrder non_seasonal_order = 1;
-
- // Arima coefficients.
- ArimaCoefficients arima_coefficients = 2;
-
- // Arima fitting metrics.
- ArimaFittingMetrics arima_fitting_metrics = 3;
-
- // Whether Arima model fitted with drift or not. It is always false
- // when d is not 1.
- bool has_drift = 4;
-
- // The id to indicate different time series.
- string time_series_id = 5;
-
- // Seasonal periods. Repeated because multiple periods are supported
- // for one time series.
- repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 6;
- }
-
- // This message is repeated because there are multiple arima models
- // fitted in auto-arima. For non-auto-arima model, its size is one.
- repeated ArimaModelInfo arima_model_info = 1;
-
- // Seasonal periods. Repeated because multiple periods are supported for
- // one time series.
- repeated SeasonalPeriod.SeasonalPeriodType seasonal_periods = 2;
- }
-
- // Index of the iteration, 0 based.
- google.protobuf.Int32Value index = 1;
-
- // Time taken to run the iteration in milliseconds.
- google.protobuf.Int64Value duration_ms = 4;
-
- // Loss computed on the training data at the end of iteration.
- google.protobuf.DoubleValue training_loss = 5;
-
- // Loss computed on the eval data at the end of iteration.
- google.protobuf.DoubleValue eval_loss = 6;
-
- // Learn rate used for this iteration.
- double learn_rate = 7;
-
- // Information about top clusters for clustering models.
- repeated ClusterInfo cluster_infos = 8;
-
- ArimaResult arima_result = 9;
- }
-
- // Options that were used for this training run, includes
- // user specified and default options that were used.
- TrainingOptions training_options = 1;
-
- // The start time of this training run.
- google.protobuf.Timestamp start_time = 8;
-
- // Output of each iteration run, results.size() <= max_iterations.
- repeated IterationResult results = 6;
-
- // The evaluation metrics over training/eval data that were computed at the
- // end of training.
- EvaluationMetrics evaluation_metrics = 7;
-
- // Data split result of the training run. Only set when the input data is
- // actually split.
- DataSplitResult data_split_result = 9;
-
- // Global explanations for important features of the model. For multi-class
- // models, there is one entry for each label class. For other models, there
- // is only one entry in the list.
- repeated GlobalExplanation global_explanations = 10;
- }
-
- // Indicates the type of the Model.
- enum ModelType {
- MODEL_TYPE_UNSPECIFIED = 0;
-
- // Linear regression model.
- LINEAR_REGRESSION = 1;
-
- // Logistic regression based classification model.
- LOGISTIC_REGRESSION = 2;
-
- // K-means clustering model.
- KMEANS = 3;
-
- // Matrix factorization model.
- MATRIX_FACTORIZATION = 4;
-
- // [Beta] DNN classifier model.
- DNN_CLASSIFIER = 5;
-
- // [Beta] An imported TensorFlow model.
- TENSORFLOW = 6;
-
- // [Beta] DNN regressor model.
- DNN_REGRESSOR = 7;
-
- // [Beta] Boosted tree regressor model.
- BOOSTED_TREE_REGRESSOR = 9;
-
- // [Beta] Boosted tree classifier model.
- BOOSTED_TREE_CLASSIFIER = 10;
-
- // [Beta] ARIMA model.
- ARIMA = 11;
-
- // [Beta] AutoML Tables regression model.
- AUTOML_REGRESSOR = 12;
-
- // [Beta] AutoML Tables classification model.
- AUTOML_CLASSIFIER = 13;
- }
-
- // Loss metric to evaluate model training performance.
- enum LossType {
- LOSS_TYPE_UNSPECIFIED = 0;
-
- // Mean squared loss, used for linear regression.
- MEAN_SQUARED_LOSS = 1;
-
- // Mean log loss, used for logistic regression.
- MEAN_LOG_LOSS = 2;
- }
-
- // Distance metric used to compute the distance between two points.
- enum DistanceType {
- DISTANCE_TYPE_UNSPECIFIED = 0;
-
- // Eculidean distance.
- EUCLIDEAN = 1;
-
- // Cosine distance.
- COSINE = 2;
- }
-
- // Indicates the method to split input data into multiple tables.
- enum DataSplitMethod {
- DATA_SPLIT_METHOD_UNSPECIFIED = 0;
-
- // Splits data randomly.
- RANDOM = 1;
-
- // Splits data with the user provided tags.
- CUSTOM = 2;
-
- // Splits data sequentially.
- SEQUENTIAL = 3;
-
- // Data split will be skipped.
- NO_SPLIT = 4;
-
- // Splits data automatically: Uses NO_SPLIT if the data size is small.
- // Otherwise uses RANDOM.
- AUTO_SPLIT = 5;
- }
-
- // Type of supported data frequency for time series forecasting models.
- enum DataFrequency {
- DATA_FREQUENCY_UNSPECIFIED = 0;
-
- // Automatically inferred from timestamps.
- AUTO_FREQUENCY = 1;
-
- // Yearly data.
- YEARLY = 2;
-
- // Quarterly data.
- QUARTERLY = 3;
-
- // Monthly data.
- MONTHLY = 4;
-
- // Weekly data.
- WEEKLY = 5;
-
- // Daily data.
- DAILY = 6;
-
- // Hourly data.
- HOURLY = 7;
- }
-
- // Type of supported holiday regions for time series forecasting models.
- enum HolidayRegion {
- // Holiday region unspecified.
- HOLIDAY_REGION_UNSPECIFIED = 0;
-
- // Global.
- GLOBAL = 1;
-
- // North America.
- NA = 2;
-
- // Japan and Asia Pacific: Korea, Greater China, India, Australia, and New
- // Zealand.
- JAPAC = 3;
-
- // Europe, the Middle East and Africa.
- EMEA = 4;
-
- // Latin America and the Caribbean.
- LAC = 5;
-
- // United Arab Emirates
- AE = 6;
-
- // Argentina
- AR = 7;
-
- // Austria
- AT = 8;
-
- // Australia
- AU = 9;
-
- // Belgium
- BE = 10;
-
- // Brazil
- BR = 11;
-
- // Canada
- CA = 12;
-
- // Switzerland
- CH = 13;
-
- // Chile
- CL = 14;
-
- // China
- CN = 15;
-
- // Colombia
- CO = 16;
-
- // Czechoslovakia
- CS = 17;
-
- // Czech Republic
- CZ = 18;
-
- // Germany
- DE = 19;
-
- // Denmark
- DK = 20;
-
- // Algeria
- DZ = 21;
-
- // Ecuador
- EC = 22;
-
- // Estonia
- EE = 23;
-
- // Egypt
- EG = 24;
-
- // Spain
- ES = 25;
-
- // Finland
- FI = 26;
-
- // France
- FR = 27;
-
- // Great Britain (United Kingdom)
- GB = 28;
-
- // Greece
- GR = 29;
-
- // Hong Kong
- HK = 30;
-
- // Hungary
- HU = 31;
-
- // Indonesia
- ID = 32;
-
- // Ireland
- IE = 33;
-
- // Israel
- IL = 34;
-
- // India
- IN = 35;
-
- // Iran
- IR = 36;
-
- // Italy
- IT = 37;
-
- // Japan
- JP = 38;
-
- // Korea (South)
- KR = 39;
-
- // Latvia
- LV = 40;
-
- // Morocco
- MA = 41;
-
- // Mexico
- MX = 42;
-
- // Malaysia
- MY = 43;
-
- // Nigeria
- NG = 44;
-
- // Netherlands
- NL = 45;
-
- // Norway
- NO = 46;
-
- // New Zealand
- NZ = 47;
-
- // Peru
- PE = 48;
-
- // Philippines
- PH = 49;
-
- // Pakistan
- PK = 50;
-
- // Poland
- PL = 51;
-
- // Portugal
- PT = 52;
-
- // Romania
- RO = 53;
-
- // Serbia
- RS = 54;
-
- // Russian Federation
- RU = 55;
-
- // Saudi Arabia
- SA = 56;
-
- // Sweden
- SE = 57;
-
- // Singapore
- SG = 58;
-
- // Slovenia
- SI = 59;
-
- // Slovakia
- SK = 60;
-
- // Thailand
- TH = 61;
-
- // Turkey
- TR = 62;
-
- // Taiwan
- TW = 63;
-
- // Ukraine
- UA = 64;
-
- // United States
- US = 65;
-
- // Venezuela
- VE = 66;
-
- // Viet Nam
- VN = 67;
-
- // South Africa
- ZA = 68;
- }
-
- // Indicates the learning rate optimization strategy to use.
- enum LearnRateStrategy {
- LEARN_RATE_STRATEGY_UNSPECIFIED = 0;
-
- // Use line search to determine learning rate.
- LINE_SEARCH = 1;
-
- // Use a constant learning rate.
- CONSTANT = 2;
- }
-
- // Indicates the optimization strategy used for training.
- enum OptimizationStrategy {
- OPTIMIZATION_STRATEGY_UNSPECIFIED = 0;
-
- // Uses an iterative batch gradient descent algorithm.
- BATCH_GRADIENT_DESCENT = 1;
-
- // Uses a normal equation to solve linear regression problem.
- NORMAL_EQUATION = 2;
- }
-
- // Indicates the training algorithm to use for matrix factorization models.
- enum FeedbackType {
- FEEDBACK_TYPE_UNSPECIFIED = 0;
-
- // Use weighted-als for implicit feedback problems.
- IMPLICIT = 1;
-
- // Use nonweighted-als for explicit feedback problems.
- EXPLICIT = 2;
- }
-
- // Output only. A hash of this resource.
- string etag = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Required. Unique identifier for this model.
- ModelReference model_reference = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Output only. The time when this model was created, in millisecs since the epoch.
- int64 creation_time = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Output only. The time when this model was last modified, in millisecs since the epoch.
- int64 last_modified_time = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Optional. A user-friendly description of this model.
- string description = 12 [(google.api.field_behavior) = OPTIONAL];
-
- // Optional. A descriptive name for this model.
- string friendly_name = 14 [(google.api.field_behavior) = OPTIONAL];
-
- // The labels associated with this model. You can use these to organize
- // and group your models. Label keys and values can be no longer
- // than 63 characters, can only contain lowercase letters, numeric
- // characters, underscores and dashes. International characters are allowed.
- // Label values are optional. Label keys must start with a letter and each
- // label in the list must have a different key.
- map labels = 15;
-
- // Optional. The time when this model expires, in milliseconds since the epoch.
- // If not present, the model will persist indefinitely. Expired models
- // will be deleted and their storage reclaimed. The defaultTableExpirationMs
- // property of the encapsulating dataset can be used to set a default
- // expirationTime on newly created models.
- int64 expiration_time = 16 [(google.api.field_behavior) = OPTIONAL];
-
- // Output only. The geographic location where the model resides. This value
- // is inherited from the dataset.
- string location = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Custom encryption configuration (e.g., Cloud KMS keys). This shows the
- // encryption configuration of the model data while stored in BigQuery
- // storage. This field can be used with PatchModel to update encryption key
- // for an already encrypted model.
- EncryptionConfiguration encryption_configuration = 17;
-
- // Output only. Type of the model resource.
- ModelType model_type = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Output only. Information for all training runs in increasing order of start_time.
- repeated TrainingRun training_runs = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Output only. Input feature columns that were used to train this model.
- repeated StandardSqlField feature_columns = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
-
- // Output only. Label columns that were used to train this model.
- // The output of the model will have a "predicted_" prefix to these columns.
- repeated StandardSqlField label_columns = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
-}
-
-message GetModelRequest {
- // Required. Project ID of the requested model.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Dataset ID of the requested model.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Model ID of the requested model.
- string model_id = 3 [(google.api.field_behavior) = REQUIRED];
-}
-
-message PatchModelRequest {
- // Required. Project ID of the model to patch.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Dataset ID of the model to patch.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Model ID of the model to patch.
- string model_id = 3 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Patched model.
- // Follows RFC5789 patch semantics. Missing fields are not updated.
- // To clear a field, explicitly set to default value.
- Model model = 4 [(google.api.field_behavior) = REQUIRED];
-}
-
-message DeleteModelRequest {
- // Required. Project ID of the model to delete.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Dataset ID of the model to delete.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Model ID of the model to delete.
- string model_id = 3 [(google.api.field_behavior) = REQUIRED];
-}
-
-message ListModelsRequest {
- // Required. Project ID of the models to list.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. Dataset ID of the models to list.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // The maximum number of results to return in a single response page.
- // Leverage the page tokens to iterate through the entire collection.
- google.protobuf.UInt32Value max_results = 3;
-
- // Page token, returned by a previous call to request the next page of
- // results
- string page_token = 4;
-}
-
-message ListModelsResponse {
- // Models in the requested dataset. Only the following fields are populated:
- // model_reference, model_type, creation_time, last_modified_time and
- // labels.
- repeated Model models = 1;
-
- // A token to request the next page of results.
- string next_page_token = 2;
-}
diff --git a/google/cloud/bigquery_v2/proto/model_pb2.py b/google/cloud/bigquery_v2/proto/model_pb2.py
deleted file mode 100644
index 7b66be8f7..000000000
--- a/google/cloud/bigquery_v2/proto/model_pb2.py
+++ /dev/null
@@ -1,4298 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: google/cloud/bigquery_v2/proto/model.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from google.api import client_pb2 as google_dot_api_dot_client__pb2
-from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2
-from google.cloud.bigquery_v2.proto import (
- encryption_config_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2,
-)
-from google.cloud.bigquery_v2.proto import (
- model_reference_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2,
-)
-from google.cloud.bigquery_v2.proto import (
- standard_sql_pb2 as google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2,
-)
-from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2
-from google.protobuf import timestamp_pb2 as google_dot_protobuf_dot_timestamp__pb2
-from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2
-from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name="google/cloud/bigquery_v2/proto/model.proto",
- package="google.cloud.bigquery.v2",
- syntax="proto3",
- serialized_options=b"\n\034com.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery",
- create_key=_descriptor._internal_create_key,
- serialized_pb=b'\n*google/cloud/bigquery_v2/proto/model.proto\x12\x18google.cloud.bigquery.v2\x1a\x17google/api/client.proto\x1a\x1fgoogle/api/field_behavior.proto\x1a\x36google/cloud/bigquery_v2/proto/encryption_config.proto\x1a\x34google/cloud/bigquery_v2/proto/model_reference.proto\x1a\x31google/cloud/bigquery_v2/proto/standard_sql.proto\x1a\x1bgoogle/protobuf/empty.proto\x1a\x1fgoogle/protobuf/timestamp.proto\x1a\x1egoogle/protobuf/wrappers.proto\x1a\x1cgoogle/api/annotations.proto"\x9b\x35\n\x05Model\x12\x11\n\x04\x65tag\x18\x01 \x01(\tB\x03\xe0\x41\x03\x12\x46\n\x0fmodel_reference\x18\x02 \x01(\x0b\x32(.google.cloud.bigquery.v2.ModelReferenceB\x03\xe0\x41\x02\x12\x1a\n\rcreation_time\x18\x05 \x01(\x03\x42\x03\xe0\x41\x03\x12\x1f\n\x12last_modified_time\x18\x06 \x01(\x03\x42\x03\xe0\x41\x03\x12\x18\n\x0b\x64\x65scription\x18\x0c \x01(\tB\x03\xe0\x41\x01\x12\x1a\n\rfriendly_name\x18\x0e \x01(\tB\x03\xe0\x41\x01\x12;\n\x06labels\x18\x0f \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.LabelsEntry\x12\x1c\n\x0f\x65xpiration_time\x18\x10 \x01(\x03\x42\x03\xe0\x41\x01\x12\x15\n\x08location\x18\r \x01(\tB\x03\xe0\x41\x03\x12S\n\x18\x65ncryption_configuration\x18\x11 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.EncryptionConfiguration\x12\x42\n\nmodel_type\x18\x07 \x01(\x0e\x32).google.cloud.bigquery.v2.Model.ModelTypeB\x03\xe0\x41\x03\x12G\n\rtraining_runs\x18\t \x03(\x0b\x32+.google.cloud.bigquery.v2.Model.TrainingRunB\x03\xe0\x41\x03\x12H\n\x0f\x66\x65\x61ture_columns\x18\n \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x12\x46\n\rlabel_columns\x18\x0b \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldB\x03\xe0\x41\x03\x1aq\n\x0bKmeansEnums"b\n\x1aKmeansInitializationMethod\x12,\n(KMEANS_INITIALIZATION_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x1a\xb4\x02\n\x11RegressionMetrics\x12\x39\n\x13mean_absolute_error\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x38\n\x12mean_squared_error\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12<\n\x16mean_squared_log_error\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15median_absolute_error\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tr_squared\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\xef\x02\n\x1e\x41ggregateClassificationMetrics\x12/\n\tprecision\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tthreshold\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08log_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12-\n\x07roc_auc\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x9f\x06\n\x1b\x42inaryClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12w\n\x1c\x62inary_confusion_matrix_list\x18\x02 \x03(\x0b\x32Q.google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix\x12\x16\n\x0epositive_label\x18\x03 \x01(\t\x12\x16\n\x0enegative_label\x18\x04 \x01(\t\x1a\xec\x03\n\x15\x42inaryConfusionMatrix\x12>\n\x18positive_class_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x33\n\x0etrue_positives\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_positives\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\x0etrue_negatives\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x34\n\x0f\x66\x61lse_negatives\x18\x05 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12/\n\tprecision\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12,\n\x06recall\x18\x07 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x66\x31_score\x18\x08 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\x08\x61\x63\x63uracy\x18\t \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x1a\x87\x05\n\x1fMultiClassClassificationMetrics\x12h\n aggregate_classification_metrics\x18\x01 \x01(\x0b\x32>.google.cloud.bigquery.v2.Model.AggregateClassificationMetrics\x12n\n\x15\x63onfusion_matrix_list\x18\x02 \x03(\x0b\x32O.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix\x1a\x89\x03\n\x0f\x43onfusionMatrix\x12:\n\x14\x63onfidence_threshold\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x61\n\x04rows\x18\x02 \x03(\x0b\x32S.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row\x1aQ\n\x05\x45ntry\x12\x17\n\x0fpredicted_label\x18\x01 \x01(\t\x12/\n\nitem_count\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\x83\x01\n\x03Row\x12\x14\n\x0c\x61\x63tual_label\x18\x01 \x01(\t\x12\x66\n\x07\x65ntries\x18\x02 \x03(\x0b\x32U.google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry\x1a\xcb\x06\n\x11\x43lusteringMetrics\x12:\n\x14\x64\x61vies_bouldin_index\x18\x01 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15mean_squared_distance\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12K\n\x08\x63lusters\x18\x03 \x03(\x0b\x32\x39.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster\x1a\xef\x04\n\x07\x43luster\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12^\n\x0e\x66\x65\x61ture_values\x18\x02 \x03(\x0b\x32\x46.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue\x12*\n\x05\x63ount\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a\xc2\x03\n\x0c\x46\x65\x61tureValue\x12\x16\n\x0e\x66\x65\x61ture_column\x18\x01 \x01(\t\x12\x37\n\x0fnumerical_value\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValueH\x00\x12t\n\x11\x63\x61tegorical_value\x18\x03 \x01(\x0b\x32W.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValueH\x00\x1a\xe1\x01\n\x10\x43\x61tegoricalValue\x12~\n\x0f\x63\x61tegory_counts\x18\x01 \x03(\x0b\x32\x65.google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount\x1aM\n\rCategoryCount\x12\x10\n\x08\x63\x61tegory\x18\x01 \x01(\t\x12*\n\x05\x63ount\x18\x02 \x01(\x0b\x32\x1b.google.protobuf.Int64ValueB\x07\n\x05value\x1a\x95\x03\n\x11\x45valuationMetrics\x12O\n\x12regression_metrics\x18\x01 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.RegressionMetricsH\x00\x12\x64\n\x1d\x62inary_classification_metrics\x18\x02 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.BinaryClassificationMetricsH\x00\x12m\n"multi_class_classification_metrics\x18\x03 \x01(\x0b\x32?.google.cloud.bigquery.v2.Model.MultiClassClassificationMetricsH\x00\x12O\n\x12\x63lustering_metrics\x18\x04 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.ClusteringMetricsH\x00\x42\t\n\x07metrics\x1a\xab\x0f\n\x0bTrainingRun\x12U\n\x10training_options\x18\x01 \x01(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions\x12.\n\nstart_time\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.Timestamp\x12L\n\x07results\x18\x06 \x03(\x0b\x32;.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult\x12M\n\x12\x65valuation_metrics\x18\x07 \x01(\x0b\x32\x31.google.cloud.bigquery.v2.Model.EvaluationMetrics\x1a\x9d\t\n\x0fTrainingOptions\x12\x16\n\x0emax_iterations\x18\x01 \x01(\x03\x12;\n\tloss_type\x18\x02 \x01(\x0e\x32(.google.cloud.bigquery.v2.Model.LossType\x12\x12\n\nlearn_rate\x18\x03 \x01(\x01\x12\x37\n\x11l1_regularization\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x37\n\x11l2_regularization\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12;\n\x15min_relative_progress\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12.\n\nwarm_start\x18\x07 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12.\n\nearly_stop\x18\x08 \x01(\x0b\x32\x1a.google.protobuf.BoolValue\x12\x1b\n\x13input_label_columns\x18\t \x03(\t\x12J\n\x11\x64\x61ta_split_method\x18\n \x01(\x0e\x32/.google.cloud.bigquery.v2.Model.DataSplitMethod\x12 \n\x18\x64\x61ta_split_eval_fraction\x18\x0b \x01(\x01\x12\x19\n\x11\x64\x61ta_split_column\x18\x0c \x01(\t\x12N\n\x13learn_rate_strategy\x18\r \x01(\x0e\x32\x31.google.cloud.bigquery.v2.Model.LearnRateStrategy\x12\x1a\n\x12initial_learn_rate\x18\x10 \x01(\x01\x12o\n\x13label_class_weights\x18\x11 \x03(\x0b\x32R.google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry\x12\x43\n\rdistance_type\x18\x14 \x01(\x0e\x32,.google.cloud.bigquery.v2.Model.DistanceType\x12\x14\n\x0cnum_clusters\x18\x15 \x01(\x03\x12\x11\n\tmodel_uri\x18\x16 \x01(\t\x12S\n\x15optimization_strategy\x18\x17 \x01(\x0e\x32\x34.google.cloud.bigquery.v2.Model.OptimizationStrategy\x12l\n\x1ckmeans_initialization_method\x18! \x01(\x0e\x32\x46.google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod\x12$\n\x1ckmeans_initialization_column\x18" \x01(\t\x1a\x38\n\x16LabelClassWeightsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01:\x02\x38\x01\x1a\xd7\x03\n\x0fIterationResult\x12*\n\x05index\x18\x01 \x01(\x0b\x32\x1b.google.protobuf.Int32Value\x12\x30\n\x0b\x64uration_ms\x18\x04 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x12\x33\n\rtraining_loss\x18\x05 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\teval_loss\x18\x06 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x12\n\nlearn_rate\x18\x07 \x01(\x01\x12^\n\rcluster_infos\x18\x08 \x03(\x0b\x32G.google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo\x1a\x8b\x01\n\x0b\x43lusterInfo\x12\x13\n\x0b\x63\x65ntroid_id\x18\x01 \x01(\x03\x12\x34\n\x0e\x63luster_radius\x18\x02 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12\x31\n\x0c\x63luster_size\x18\x03 \x01(\x0b\x32\x1b.google.protobuf.Int64Value\x1a-\n\x0bLabelsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01"s\n\tModelType\x12\x1a\n\x16MODEL_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11LINEAR_REGRESSION\x10\x01\x12\x17\n\x13LOGISTIC_REGRESSION\x10\x02\x12\n\n\x06KMEANS\x10\x03\x12\x0e\n\nTENSORFLOW\x10\x06"O\n\x08LossType\x12\x19\n\x15LOSS_TYPE_UNSPECIFIED\x10\x00\x12\x15\n\x11MEAN_SQUARED_LOSS\x10\x01\x12\x11\n\rMEAN_LOG_LOSS\x10\x02"H\n\x0c\x44istanceType\x12\x1d\n\x19\x44ISTANCE_TYPE_UNSPECIFIED\x10\x00\x12\r\n\tEUCLIDEAN\x10\x01\x12\n\n\x06\x43OSINE\x10\x02"z\n\x0f\x44\x61taSplitMethod\x12!\n\x1d\x44\x41TA_SPLIT_METHOD_UNSPECIFIED\x10\x00\x12\n\n\x06RANDOM\x10\x01\x12\n\n\x06\x43USTOM\x10\x02\x12\x0e\n\nSEQUENTIAL\x10\x03\x12\x0c\n\x08NO_SPLIT\x10\x04\x12\x0e\n\nAUTO_SPLIT\x10\x05"W\n\x11LearnRateStrategy\x12#\n\x1fLEARN_RATE_STRATEGY_UNSPECIFIED\x10\x00\x12\x0f\n\x0bLINE_SEARCH\x10\x01\x12\x0c\n\x08\x43ONSTANT\x10\x02"n\n\x14OptimizationStrategy\x12%\n!OPTIMIZATION_STRATEGY_UNSPECIFIED\x10\x00\x12\x1a\n\x16\x42\x41TCH_GRADIENT_DESCENT\x10\x01\x12\x13\n\x0fNORMAL_EQUATION\x10\x02"Z\n\x0fGetModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x91\x01\n\x11PatchModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x12\x33\n\x05model\x18\x04 \x01(\x0b\x32\x1f.google.cloud.bigquery.v2.ModelB\x03\xe0\x41\x02"]\n\x12\x44\x65leteModelRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02"\x8c\x01\n\x11ListModelsRequest\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x31\n\x0bmax_results\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.UInt32Value\x12\x12\n\npage_token\x18\x04 \x01(\t"^\n\x12ListModelsResponse\x12/\n\x06models\x18\x01 \x03(\x0b\x32\x1f.google.cloud.bigquery.v2.Model\x12\x17\n\x0fnext_page_token\x18\x02 \x01(\t2\xfa\x05\n\x0cModelService\x12y\n\x08GetModel\x12).google.cloud.bigquery.v2.GetModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"!\xda\x41\x1eproject_id,dataset_id,model_id\x12\x8d\x01\n\nListModels\x12+.google.cloud.bigquery.v2.ListModelsRequest\x1a,.google.cloud.bigquery.v2.ListModelsResponse"$\xda\x41!project_id,dataset_id,max_results\x12\x83\x01\n\nPatchModel\x12+.google.cloud.bigquery.v2.PatchModelRequest\x1a\x1f.google.cloud.bigquery.v2.Model"\'\xda\x41$project_id,dataset_id,model_id,model\x12v\n\x0b\x44\x65leteModel\x12,.google.cloud.bigquery.v2.DeleteModelRequest\x1a\x16.google.protobuf.Empty"!\xda\x41\x1eproject_id,dataset_id,model_id\x1a\xe0\x01\xca\x41\x17\x62igquery.googleapis.com\xd2\x41\xc2\x01https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-onlyBl\n\x1c\x63om.google.cloud.bigquery.v2B\nModelProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3',
- dependencies=[
- google_dot_api_dot_client__pb2.DESCRIPTOR,
- google_dot_api_dot_field__behavior__pb2.DESCRIPTOR,
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2.DESCRIPTOR,
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2.DESCRIPTOR,
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2.DESCRIPTOR,
- google_dot_protobuf_dot_empty__pb2.DESCRIPTOR,
- google_dot_protobuf_dot_timestamp__pb2.DESCRIPTOR,
- google_dot_protobuf_dot_wrappers__pb2.DESCRIPTOR,
- google_dot_api_dot_annotations__pb2.DESCRIPTOR,
- ],
-)
-
-
-_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD = _descriptor.EnumDescriptor(
- name="KmeansInitializationMethod",
- full_name="google.cloud.bigquery.v2.Model.KmeansEnums.KmeansInitializationMethod",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="KMEANS_INITIALIZATION_METHOD_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="RANDOM",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="CUSTOM",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=1132,
- serialized_end=1230,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD)
-
-_MODEL_MODELTYPE = _descriptor.EnumDescriptor(
- name="ModelType",
- full_name="google.cloud.bigquery.v2.Model.ModelType",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="MODEL_TYPE_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="LINEAR_REGRESSION",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="LOGISTIC_REGRESSION",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="KMEANS",
- index=3,
- number=3,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="TENSORFLOW",
- index=4,
- number=6,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=6632,
- serialized_end=6747,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_MODELTYPE)
-
-_MODEL_LOSSTYPE = _descriptor.EnumDescriptor(
- name="LossType",
- full_name="google.cloud.bigquery.v2.Model.LossType",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="LOSS_TYPE_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="MEAN_SQUARED_LOSS",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="MEAN_LOG_LOSS",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=6749,
- serialized_end=6828,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_LOSSTYPE)
-
-_MODEL_DISTANCETYPE = _descriptor.EnumDescriptor(
- name="DistanceType",
- full_name="google.cloud.bigquery.v2.Model.DistanceType",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="DISTANCE_TYPE_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="EUCLIDEAN",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="COSINE",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=6830,
- serialized_end=6902,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_DISTANCETYPE)
-
-_MODEL_DATASPLITMETHOD = _descriptor.EnumDescriptor(
- name="DataSplitMethod",
- full_name="google.cloud.bigquery.v2.Model.DataSplitMethod",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="DATA_SPLIT_METHOD_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="RANDOM",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="CUSTOM",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="SEQUENTIAL",
- index=3,
- number=3,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="NO_SPLIT",
- index=4,
- number=4,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="AUTO_SPLIT",
- index=5,
- number=5,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=6904,
- serialized_end=7026,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_DATASPLITMETHOD)
-
-_MODEL_LEARNRATESTRATEGY = _descriptor.EnumDescriptor(
- name="LearnRateStrategy",
- full_name="google.cloud.bigquery.v2.Model.LearnRateStrategy",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="LEARN_RATE_STRATEGY_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="LINE_SEARCH",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="CONSTANT",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=7028,
- serialized_end=7115,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_LEARNRATESTRATEGY)
-
-_MODEL_OPTIMIZATIONSTRATEGY = _descriptor.EnumDescriptor(
- name="OptimizationStrategy",
- full_name="google.cloud.bigquery.v2.Model.OptimizationStrategy",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="OPTIMIZATION_STRATEGY_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="BATCH_GRADIENT_DESCENT",
- index=1,
- number=1,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="NORMAL_EQUATION",
- index=2,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=7117,
- serialized_end=7227,
-)
-_sym_db.RegisterEnumDescriptor(_MODEL_OPTIMIZATIONSTRATEGY)
-
-
-_MODEL_KMEANSENUMS = _descriptor.Descriptor(
- name="KmeansEnums",
- full_name="google.cloud.bigquery.v2.Model.KmeansEnums",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[],
- extensions=[],
- nested_types=[],
- enum_types=[_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD,],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=1117,
- serialized_end=1230,
-)
-
-_MODEL_REGRESSIONMETRICS = _descriptor.Descriptor(
- name="RegressionMetrics",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="mean_absolute_error",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_absolute_error",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="mean_squared_error",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_error",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="mean_squared_log_error",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.mean_squared_log_error",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="median_absolute_error",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.median_absolute_error",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="r_squared",
- full_name="google.cloud.bigquery.v2.Model.RegressionMetrics.r_squared",
- index=4,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=1233,
- serialized_end=1541,
-)
-
-_MODEL_AGGREGATECLASSIFICATIONMETRICS = _descriptor.Descriptor(
- name="AggregateClassificationMetrics",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="precision",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.precision",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="recall",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.recall",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="accuracy",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.accuracy",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="threshold",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.threshold",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="f1_score",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.f1_score",
- index=4,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="log_loss",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.log_loss",
- index=5,
- number=6,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="roc_auc",
- full_name="google.cloud.bigquery.v2.Model.AggregateClassificationMetrics.roc_auc",
- index=6,
- number=7,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=1544,
- serialized_end=1911,
-)
-
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX = _descriptor.Descriptor(
- name="BinaryConfusionMatrix",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="positive_class_threshold",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.positive_class_threshold",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="true_positives",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_positives",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="false_positives",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_positives",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="true_negatives",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.true_negatives",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="false_negatives",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.false_negatives",
- index=4,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="precision",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.precision",
- index=5,
- number=6,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="recall",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.recall",
- index=6,
- number=7,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="f1_score",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.f1_score",
- index=7,
- number=8,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="accuracy",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix.accuracy",
- index=8,
- number=9,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=2221,
- serialized_end=2713,
-)
-
-_MODEL_BINARYCLASSIFICATIONMETRICS = _descriptor.Descriptor(
- name="BinaryClassificationMetrics",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="aggregate_classification_metrics",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.aggregate_classification_metrics",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="binary_confusion_matrix_list",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.binary_confusion_matrix_list",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="positive_label",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.positive_label",
- index=2,
- number=3,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="negative_label",
- full_name="google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.negative_label",
- index=3,
- number=4,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=1914,
- serialized_end=2713,
-)
-
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY = _descriptor.Descriptor(
- name="Entry",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="predicted_label",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.predicted_label",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="item_count",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry.item_count",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=3148,
- serialized_end=3229,
-)
-
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW = _descriptor.Descriptor(
- name="Row",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="actual_label",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.actual_label",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="entries",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row.entries",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=3232,
- serialized_end=3363,
-)
-
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX = _descriptor.Descriptor(
- name="ConfusionMatrix",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="confidence_threshold",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.confidence_threshold",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="rows",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.rows",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY,
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW,
- ],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=2970,
- serialized_end=3363,
-)
-
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS = _descriptor.Descriptor(
- name="MultiClassClassificationMetrics",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="aggregate_classification_metrics",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.aggregate_classification_metrics",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="confusion_matrix_list",
- full_name="google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.confusion_matrix_list",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=2716,
- serialized_end=3363,
-)
-
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT = _descriptor.Descriptor(
- name="CategoryCount",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="category",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.category",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="count",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount.count",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=4123,
- serialized_end=4200,
-)
-
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE = _descriptor.Descriptor(
- name="CategoricalValue",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="category_counts",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.category_counts",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT,
- ],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=3975,
- serialized_end=4200,
-)
-
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE = _descriptor.Descriptor(
- name="FeatureValue",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="feature_column",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.feature_column",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="numerical_value",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.numerical_value",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="categorical_value",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.categorical_value",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name="value",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.value",
- index=0,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[],
- ),
- ],
- serialized_start=3759,
- serialized_end=4209,
-)
-
-_MODEL_CLUSTERINGMETRICS_CLUSTER = _descriptor.Descriptor(
- name="Cluster",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="centroid_id",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.centroid_id",
- index=0,
- number=1,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="feature_values",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.feature_values",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="count",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.count",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=3586,
- serialized_end=4209,
-)
-
-_MODEL_CLUSTERINGMETRICS = _descriptor.Descriptor(
- name="ClusteringMetrics",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="davies_bouldin_index",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.davies_bouldin_index",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="mean_squared_distance",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.mean_squared_distance",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="clusters",
- full_name="google.cloud.bigquery.v2.Model.ClusteringMetrics.clusters",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_CLUSTERINGMETRICS_CLUSTER,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=3366,
- serialized_end=4209,
-)
-
-_MODEL_EVALUATIONMETRICS = _descriptor.Descriptor(
- name="EvaluationMetrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="regression_metrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.regression_metrics",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="binary_classification_metrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.binary_classification_metrics",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="multi_class_classification_metrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.multi_class_classification_metrics",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="clustering_metrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.clustering_metrics",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name="metrics",
- full_name="google.cloud.bigquery.v2.Model.EvaluationMetrics.metrics",
- index=0,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[],
- ),
- ],
- serialized_start=4212,
- serialized_end=4617,
-)
-
-_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY = _descriptor.Descriptor(
- name="LabelClassWeightsEntry",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="key",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.key",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="value",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry.value",
- index=1,
- number=2,
- type=1,
- cpp_type=5,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=b"8\001",
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=6053,
- serialized_end=6109,
-)
-
-_MODEL_TRAININGRUN_TRAININGOPTIONS = _descriptor.Descriptor(
- name="TrainingOptions",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="max_iterations",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.max_iterations",
- index=0,
- number=1,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="loss_type",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.loss_type",
- index=1,
- number=2,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="learn_rate",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate",
- index=2,
- number=3,
- type=1,
- cpp_type=5,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="l1_regularization",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l1_regularization",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="l2_regularization",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.l2_regularization",
- index=4,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="min_relative_progress",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.min_relative_progress",
- index=5,
- number=6,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="warm_start",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.warm_start",
- index=6,
- number=7,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="early_stop",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.early_stop",
- index=7,
- number=8,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="input_label_columns",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.input_label_columns",
- index=8,
- number=9,
- type=9,
- cpp_type=9,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="data_split_method",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_method",
- index=9,
- number=10,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="data_split_eval_fraction",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_eval_fraction",
- index=10,
- number=11,
- type=1,
- cpp_type=5,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="data_split_column",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.data_split_column",
- index=11,
- number=12,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="learn_rate_strategy",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.learn_rate_strategy",
- index=12,
- number=13,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="initial_learn_rate",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.initial_learn_rate",
- index=13,
- number=16,
- type=1,
- cpp_type=5,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="label_class_weights",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.label_class_weights",
- index=14,
- number=17,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="distance_type",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.distance_type",
- index=15,
- number=20,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="num_clusters",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.num_clusters",
- index=16,
- number=21,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_uri",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.model_uri",
- index=17,
- number=22,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="optimization_strategy",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.optimization_strategy",
- index=18,
- number=23,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="kmeans_initialization_method",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_method",
- index=19,
- number=33,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="kmeans_initialization_column",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.kmeans_initialization_column",
- index=20,
- number=34,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=4928,
- serialized_end=6109,
-)
-
-_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO = _descriptor.Descriptor(
- name="ClusterInfo",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="centroid_id",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.centroid_id",
- index=0,
- number=1,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="cluster_radius",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_radius",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="cluster_size",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo.cluster_size",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=6444,
- serialized_end=6583,
-)
-
-_MODEL_TRAININGRUN_ITERATIONRESULT = _descriptor.Descriptor(
- name="IterationResult",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="index",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.index",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="duration_ms",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.duration_ms",
- index=1,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="training_loss",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.training_loss",
- index=2,
- number=5,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="eval_loss",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.eval_loss",
- index=3,
- number=6,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="learn_rate",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.learn_rate",
- index=4,
- number=7,
- type=1,
- cpp_type=5,
- label=1,
- has_default_value=False,
- default_value=float(0),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="cluster_infos",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.cluster_infos",
- index=5,
- number=8,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=6112,
- serialized_end=6583,
-)
-
-_MODEL_TRAININGRUN = _descriptor.Descriptor(
- name="TrainingRun",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="training_options",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.training_options",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="start_time",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.start_time",
- index=1,
- number=8,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="results",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.results",
- index=2,
- number=6,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="evaluation_metrics",
- full_name="google.cloud.bigquery.v2.Model.TrainingRun.evaluation_metrics",
- index=3,
- number=7,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _MODEL_TRAININGRUN_TRAININGOPTIONS,
- _MODEL_TRAININGRUN_ITERATIONRESULT,
- ],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=4620,
- serialized_end=6583,
-)
-
-_MODEL_LABELSENTRY = _descriptor.Descriptor(
- name="LabelsEntry",
- full_name="google.cloud.bigquery.v2.Model.LabelsEntry",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="key",
- full_name="google.cloud.bigquery.v2.Model.LabelsEntry.key",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="value",
- full_name="google.cloud.bigquery.v2.Model.LabelsEntry.value",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=b"8\001",
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=6585,
- serialized_end=6630,
-)
-
-_MODEL = _descriptor.Descriptor(
- name="Model",
- full_name="google.cloud.bigquery.v2.Model",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="etag",
- full_name="google.cloud.bigquery.v2.Model.etag",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_reference",
- full_name="google.cloud.bigquery.v2.Model.model_reference",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="creation_time",
- full_name="google.cloud.bigquery.v2.Model.creation_time",
- index=2,
- number=5,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="last_modified_time",
- full_name="google.cloud.bigquery.v2.Model.last_modified_time",
- index=3,
- number=6,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="description",
- full_name="google.cloud.bigquery.v2.Model.description",
- index=4,
- number=12,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="friendly_name",
- full_name="google.cloud.bigquery.v2.Model.friendly_name",
- index=5,
- number=14,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="labels",
- full_name="google.cloud.bigquery.v2.Model.labels",
- index=6,
- number=15,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="expiration_time",
- full_name="google.cloud.bigquery.v2.Model.expiration_time",
- index=7,
- number=16,
- type=3,
- cpp_type=2,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="location",
- full_name="google.cloud.bigquery.v2.Model.location",
- index=8,
- number=13,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="encryption_configuration",
- full_name="google.cloud.bigquery.v2.Model.encryption_configuration",
- index=9,
- number=17,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_type",
- full_name="google.cloud.bigquery.v2.Model.model_type",
- index=10,
- number=7,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="training_runs",
- full_name="google.cloud.bigquery.v2.Model.training_runs",
- index=11,
- number=9,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="feature_columns",
- full_name="google.cloud.bigquery.v2.Model.feature_columns",
- index=12,
- number=10,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="label_columns",
- full_name="google.cloud.bigquery.v2.Model.label_columns",
- index=13,
- number=11,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\003",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[
- _MODEL_KMEANSENUMS,
- _MODEL_REGRESSIONMETRICS,
- _MODEL_AGGREGATECLASSIFICATIONMETRICS,
- _MODEL_BINARYCLASSIFICATIONMETRICS,
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS,
- _MODEL_CLUSTERINGMETRICS,
- _MODEL_EVALUATIONMETRICS,
- _MODEL_TRAININGRUN,
- _MODEL_LABELSENTRY,
- ],
- enum_types=[
- _MODEL_MODELTYPE,
- _MODEL_LOSSTYPE,
- _MODEL_DISTANCETYPE,
- _MODEL_DATASPLITMETHOD,
- _MODEL_LEARNRATESTRATEGY,
- _MODEL_OPTIMIZATIONSTRATEGY,
- ],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=416,
- serialized_end=7227,
-)
-
-
-_GETMODELREQUEST = _descriptor.Descriptor(
- name="GetModelRequest",
- full_name="google.cloud.bigquery.v2.GetModelRequest",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="project_id",
- full_name="google.cloud.bigquery.v2.GetModelRequest.project_id",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="dataset_id",
- full_name="google.cloud.bigquery.v2.GetModelRequest.dataset_id",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_id",
- full_name="google.cloud.bigquery.v2.GetModelRequest.model_id",
- index=2,
- number=3,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=7229,
- serialized_end=7319,
-)
-
-
-_PATCHMODELREQUEST = _descriptor.Descriptor(
- name="PatchModelRequest",
- full_name="google.cloud.bigquery.v2.PatchModelRequest",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="project_id",
- full_name="google.cloud.bigquery.v2.PatchModelRequest.project_id",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="dataset_id",
- full_name="google.cloud.bigquery.v2.PatchModelRequest.dataset_id",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_id",
- full_name="google.cloud.bigquery.v2.PatchModelRequest.model_id",
- index=2,
- number=3,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model",
- full_name="google.cloud.bigquery.v2.PatchModelRequest.model",
- index=3,
- number=4,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=7322,
- serialized_end=7467,
-)
-
-
-_DELETEMODELREQUEST = _descriptor.Descriptor(
- name="DeleteModelRequest",
- full_name="google.cloud.bigquery.v2.DeleteModelRequest",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="project_id",
- full_name="google.cloud.bigquery.v2.DeleteModelRequest.project_id",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="dataset_id",
- full_name="google.cloud.bigquery.v2.DeleteModelRequest.dataset_id",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_id",
- full_name="google.cloud.bigquery.v2.DeleteModelRequest.model_id",
- index=2,
- number=3,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=7469,
- serialized_end=7562,
-)
-
-
-_LISTMODELSREQUEST = _descriptor.Descriptor(
- name="ListModelsRequest",
- full_name="google.cloud.bigquery.v2.ListModelsRequest",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="project_id",
- full_name="google.cloud.bigquery.v2.ListModelsRequest.project_id",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="dataset_id",
- full_name="google.cloud.bigquery.v2.ListModelsRequest.dataset_id",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="max_results",
- full_name="google.cloud.bigquery.v2.ListModelsRequest.max_results",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="page_token",
- full_name="google.cloud.bigquery.v2.ListModelsRequest.page_token",
- index=3,
- number=4,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=7565,
- serialized_end=7705,
-)
-
-
-_LISTMODELSRESPONSE = _descriptor.Descriptor(
- name="ListModelsResponse",
- full_name="google.cloud.bigquery.v2.ListModelsResponse",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="models",
- full_name="google.cloud.bigquery.v2.ListModelsResponse.models",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="next_page_token",
- full_name="google.cloud.bigquery.v2.ListModelsResponse.next_page_token",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=7707,
- serialized_end=7801,
-)
-
-_MODEL_KMEANSENUMS.containing_type = _MODEL
-_MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD.containing_type = _MODEL_KMEANSENUMS
-_MODEL_REGRESSIONMETRICS.fields_by_name[
- "mean_absolute_error"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_REGRESSIONMETRICS.fields_by_name[
- "mean_squared_error"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_REGRESSIONMETRICS.fields_by_name[
- "mean_squared_log_error"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_REGRESSIONMETRICS.fields_by_name[
- "median_absolute_error"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_REGRESSIONMETRICS.fields_by_name[
- "r_squared"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_REGRESSIONMETRICS.containing_type = _MODEL
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "precision"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "recall"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "accuracy"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "threshold"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "f1_score"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "log_loss"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.fields_by_name[
- "roc_auc"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_AGGREGATECLASSIFICATIONMETRICS.containing_type = _MODEL
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "positive_class_threshold"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "true_positives"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "false_positives"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "true_negatives"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "false_negatives"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "precision"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "recall"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "f1_score"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.fields_by_name[
- "accuracy"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX.containing_type = (
- _MODEL_BINARYCLASSIFICATIONMETRICS
-)
-_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[
- "aggregate_classification_metrics"
-].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS
-_MODEL_BINARYCLASSIFICATIONMETRICS.fields_by_name[
- "binary_confusion_matrix_list"
-].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX
-_MODEL_BINARYCLASSIFICATIONMETRICS.containing_type = _MODEL
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.fields_by_name[
- "item_count"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY.containing_type = (
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX
-)
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.fields_by_name[
- "entries"
-].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW.containing_type = (
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX
-)
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[
- "confidence_threshold"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.fields_by_name[
- "rows"
-].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX.containing_type = (
- _MODEL_MULTICLASSCLASSIFICATIONMETRICS
-)
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[
- "aggregate_classification_metrics"
-].message_type = _MODEL_AGGREGATECLASSIFICATIONMETRICS
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS.fields_by_name[
- "confusion_matrix_list"
-].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX
-_MODEL_MULTICLASSCLASSIFICATIONMETRICS.containing_type = _MODEL
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.fields_by_name[
- "count"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT.containing_type = (
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.fields_by_name[
- "category_counts"
-].message_type = (
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE.containing_type = (
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[
- "numerical_value"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[
- "categorical_value"
-].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.containing_type = (
- _MODEL_CLUSTERINGMETRICS_CLUSTER
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append(
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["numerical_value"]
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[
- "numerical_value"
-].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[
- "value"
-]
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name["value"].fields.append(
- _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name["categorical_value"]
-)
-_MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.fields_by_name[
- "categorical_value"
-].containing_oneof = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE.oneofs_by_name[
- "value"
-]
-_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[
- "feature_values"
-].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE
-_MODEL_CLUSTERINGMETRICS_CLUSTER.fields_by_name[
- "count"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_CLUSTERINGMETRICS_CLUSTER.containing_type = _MODEL_CLUSTERINGMETRICS
-_MODEL_CLUSTERINGMETRICS.fields_by_name[
- "davies_bouldin_index"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_CLUSTERINGMETRICS.fields_by_name[
- "mean_squared_distance"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_CLUSTERINGMETRICS.fields_by_name[
- "clusters"
-].message_type = _MODEL_CLUSTERINGMETRICS_CLUSTER
-_MODEL_CLUSTERINGMETRICS.containing_type = _MODEL
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "regression_metrics"
-].message_type = _MODEL_REGRESSIONMETRICS
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "binary_classification_metrics"
-].message_type = _MODEL_BINARYCLASSIFICATIONMETRICS
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "multi_class_classification_metrics"
-].message_type = _MODEL_MULTICLASSCLASSIFICATIONMETRICS
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "clustering_metrics"
-].message_type = _MODEL_CLUSTERINGMETRICS
-_MODEL_EVALUATIONMETRICS.containing_type = _MODEL
-_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append(
- _MODEL_EVALUATIONMETRICS.fields_by_name["regression_metrics"]
-)
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "regression_metrics"
-].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"]
-_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append(
- _MODEL_EVALUATIONMETRICS.fields_by_name["binary_classification_metrics"]
-)
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "binary_classification_metrics"
-].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"]
-_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append(
- _MODEL_EVALUATIONMETRICS.fields_by_name["multi_class_classification_metrics"]
-)
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "multi_class_classification_metrics"
-].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"]
-_MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"].fields.append(
- _MODEL_EVALUATIONMETRICS.fields_by_name["clustering_metrics"]
-)
-_MODEL_EVALUATIONMETRICS.fields_by_name[
- "clustering_metrics"
-].containing_oneof = _MODEL_EVALUATIONMETRICS.oneofs_by_name["metrics"]
-_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY.containing_type = (
- _MODEL_TRAININGRUN_TRAININGOPTIONS
-)
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "loss_type"
-].enum_type = _MODEL_LOSSTYPE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "l1_regularization"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "l2_regularization"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "min_relative_progress"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "warm_start"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "early_stop"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._BOOLVALUE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "data_split_method"
-].enum_type = _MODEL_DATASPLITMETHOD
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "learn_rate_strategy"
-].enum_type = _MODEL_LEARNRATESTRATEGY
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "label_class_weights"
-].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "distance_type"
-].enum_type = _MODEL_DISTANCETYPE
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "optimization_strategy"
-].enum_type = _MODEL_OPTIMIZATIONSTRATEGY
-_MODEL_TRAININGRUN_TRAININGOPTIONS.fields_by_name[
- "kmeans_initialization_method"
-].enum_type = _MODEL_KMEANSENUMS_KMEANSINITIALIZATIONMETHOD
-_MODEL_TRAININGRUN_TRAININGOPTIONS.containing_type = _MODEL_TRAININGRUN
-_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[
- "cluster_radius"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.fields_by_name[
- "cluster_size"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO.containing_type = (
- _MODEL_TRAININGRUN_ITERATIONRESULT
-)
-_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[
- "index"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT32VALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[
- "duration_ms"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._INT64VALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[
- "training_loss"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[
- "eval_loss"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._DOUBLEVALUE
-_MODEL_TRAININGRUN_ITERATIONRESULT.fields_by_name[
- "cluster_infos"
-].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO
-_MODEL_TRAININGRUN_ITERATIONRESULT.containing_type = _MODEL_TRAININGRUN
-_MODEL_TRAININGRUN.fields_by_name[
- "training_options"
-].message_type = _MODEL_TRAININGRUN_TRAININGOPTIONS
-_MODEL_TRAININGRUN.fields_by_name[
- "start_time"
-].message_type = google_dot_protobuf_dot_timestamp__pb2._TIMESTAMP
-_MODEL_TRAININGRUN.fields_by_name[
- "results"
-].message_type = _MODEL_TRAININGRUN_ITERATIONRESULT
-_MODEL_TRAININGRUN.fields_by_name[
- "evaluation_metrics"
-].message_type = _MODEL_EVALUATIONMETRICS
-_MODEL_TRAININGRUN.containing_type = _MODEL
-_MODEL_LABELSENTRY.containing_type = _MODEL
-_MODEL.fields_by_name[
- "model_reference"
-].message_type = (
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_model__reference__pb2._MODELREFERENCE
-)
-_MODEL.fields_by_name["labels"].message_type = _MODEL_LABELSENTRY
-_MODEL.fields_by_name[
- "encryption_configuration"
-].message_type = (
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_encryption__config__pb2._ENCRYPTIONCONFIGURATION
-)
-_MODEL.fields_by_name["model_type"].enum_type = _MODEL_MODELTYPE
-_MODEL.fields_by_name["training_runs"].message_type = _MODEL_TRAININGRUN
-_MODEL.fields_by_name[
- "feature_columns"
-].message_type = (
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD
-)
-_MODEL.fields_by_name[
- "label_columns"
-].message_type = (
- google_dot_cloud_dot_bigquery__v2_dot_proto_dot_standard__sql__pb2._STANDARDSQLFIELD
-)
-_MODEL_MODELTYPE.containing_type = _MODEL
-_MODEL_LOSSTYPE.containing_type = _MODEL
-_MODEL_DISTANCETYPE.containing_type = _MODEL
-_MODEL_DATASPLITMETHOD.containing_type = _MODEL
-_MODEL_LEARNRATESTRATEGY.containing_type = _MODEL
-_MODEL_OPTIMIZATIONSTRATEGY.containing_type = _MODEL
-_PATCHMODELREQUEST.fields_by_name["model"].message_type = _MODEL
-_LISTMODELSREQUEST.fields_by_name[
- "max_results"
-].message_type = google_dot_protobuf_dot_wrappers__pb2._UINT32VALUE
-_LISTMODELSRESPONSE.fields_by_name["models"].message_type = _MODEL
-DESCRIPTOR.message_types_by_name["Model"] = _MODEL
-DESCRIPTOR.message_types_by_name["GetModelRequest"] = _GETMODELREQUEST
-DESCRIPTOR.message_types_by_name["PatchModelRequest"] = _PATCHMODELREQUEST
-DESCRIPTOR.message_types_by_name["DeleteModelRequest"] = _DELETEMODELREQUEST
-DESCRIPTOR.message_types_by_name["ListModelsRequest"] = _LISTMODELSREQUEST
-DESCRIPTOR.message_types_by_name["ListModelsResponse"] = _LISTMODELSRESPONSE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-Model = _reflection.GeneratedProtocolMessageType(
- "Model",
- (_message.Message,),
- {
- "KmeansEnums": _reflection.GeneratedProtocolMessageType(
- "KmeansEnums",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_KMEANSENUMS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2"
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.KmeansEnums)
- },
- ),
- "RegressionMetrics": _reflection.GeneratedProtocolMessageType(
- "RegressionMetrics",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_REGRESSIONMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Evaluation metrics for regression and explicit feedback type matrix
- factorization models.
-
- Attributes:
- mean_absolute_error:
- Mean absolute error.
- mean_squared_error:
- Mean squared error.
- mean_squared_log_error:
- Mean squared log error.
- median_absolute_error:
- Median absolute error.
- r_squared:
- R^2 score.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.RegressionMetrics)
- },
- ),
- "AggregateClassificationMetrics": _reflection.GeneratedProtocolMessageType(
- "AggregateClassificationMetrics",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_AGGREGATECLASSIFICATIONMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Aggregate metrics for classification/classifier models. For multi-
- class models, the metrics are either macro-averaged or micro-averaged.
- When macro-averaged, the metrics are calculated for each label and
- then an unweighted average is taken of those values. When micro-
- averaged, the metric is calculated globally by counting the total
- number of correctly predicted rows.
-
- Attributes:
- precision:
- Precision is the fraction of actual positive predictions that
- had positive actual labels. For multiclass this is a macro-
- averaged metric treating each class as a binary classifier.
- recall:
- Recall is the fraction of actual positive labels that were
- given a positive prediction. For multiclass this is a macro-
- averaged metric.
- accuracy:
- Accuracy is the fraction of predictions given the correct
- label. For multiclass this is a micro-averaged metric.
- threshold:
- Threshold at which the metrics are computed. For binary
- classification models this is the positive class threshold.
- For multi-class classfication models this is the confidence
- threshold.
- f1_score:
- The F1 score is an average of recall and precision. For
- multiclass this is a macro-averaged metric.
- log_loss:
- Logarithmic Loss. For multiclass this is a macro-averaged
- metric.
- roc_auc:
- Area Under a ROC Curve. For multiclass this is a macro-
- averaged metric.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.AggregateClassificationMetrics)
- },
- ),
- "BinaryClassificationMetrics": _reflection.GeneratedProtocolMessageType(
- "BinaryClassificationMetrics",
- (_message.Message,),
- {
- "BinaryConfusionMatrix": _reflection.GeneratedProtocolMessageType(
- "BinaryConfusionMatrix",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS_BINARYCONFUSIONMATRIX,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Confusion matrix for binary classification models.
-
- Attributes:
- positive_class_threshold:
- Threshold value used when computing each of the following
- metric.
- true_positives:
- Number of true samples predicted as true.
- false_positives:
- Number of false samples predicted as true.
- true_negatives:
- Number of true samples predicted as false.
- false_negatives:
- Number of false samples predicted as false.
- precision:
- The fraction of actual positive predictions that had positive
- actual labels.
- recall:
- The fraction of actual positive labels that were given a
- positive prediction.
- f1_score:
- The equally weighted average of recall and precision.
- accuracy:
- The fraction of predictions given the correct label.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics.BinaryConfusionMatrix)
- },
- ),
- "DESCRIPTOR": _MODEL_BINARYCLASSIFICATIONMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Evaluation metrics for binary classification/classifier models.
-
- Attributes:
- aggregate_classification_metrics:
- Aggregate classification metrics.
- binary_confusion_matrix_list:
- Binary confusion matrix at multiple thresholds.
- positive_label:
- Label representing the positive class.
- negative_label:
- Label representing the negative class.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.BinaryClassificationMetrics)
- },
- ),
- "MultiClassClassificationMetrics": _reflection.GeneratedProtocolMessageType(
- "MultiClassClassificationMetrics",
- (_message.Message,),
- {
- "ConfusionMatrix": _reflection.GeneratedProtocolMessageType(
- "ConfusionMatrix",
- (_message.Message,),
- {
- "Entry": _reflection.GeneratedProtocolMessageType(
- "Entry",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ENTRY,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """A single entry in the confusion matrix.
-
- Attributes:
- predicted_label:
- The predicted label. For confidence_threshold > 0, we will
- also add an entry indicating the number of items under the
- confidence threshold.
- item_count:
- Number of items being predicted as this label.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry)
- },
- ),
- "Row": _reflection.GeneratedProtocolMessageType(
- "Row",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX_ROW,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """A single row in the confusion matrix.
-
- Attributes:
- actual_label:
- The original label of this row.
- entries:
- Info describing predicted label distribution.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row)
- },
- ),
- "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS_CONFUSIONMATRIX,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Confusion matrix for multi-class classification models.
-
- Attributes:
- confidence_threshold:
- Confidence threshold used when computing the entries of the
- confusion matrix.
- rows:
- One row per actual label.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics.ConfusionMatrix)
- },
- ),
- "DESCRIPTOR": _MODEL_MULTICLASSCLASSIFICATIONMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Evaluation metrics for multi-class classification/classifier models.
-
- Attributes:
- aggregate_classification_metrics:
- Aggregate classification metrics.
- confusion_matrix_list:
- Confusion matrix at different thresholds.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.MultiClassClassificationMetrics)
- },
- ),
- "ClusteringMetrics": _reflection.GeneratedProtocolMessageType(
- "ClusteringMetrics",
- (_message.Message,),
- {
- "Cluster": _reflection.GeneratedProtocolMessageType(
- "Cluster",
- (_message.Message,),
- {
- "FeatureValue": _reflection.GeneratedProtocolMessageType(
- "FeatureValue",
- (_message.Message,),
- {
- "CategoricalValue": _reflection.GeneratedProtocolMessageType(
- "CategoricalValue",
- (_message.Message,),
- {
- "CategoryCount": _reflection.GeneratedProtocolMessageType(
- "CategoryCount",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE_CATEGORYCOUNT,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Represents the count of a single category within the cluster.
-
- Attributes:
- category:
- The name of category.
- count:
- The count of training samples matching the category within the
- cluster.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount)
- },
- ),
- "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE_CATEGORICALVALUE,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Representative value of a categorical feature.
-
- Attributes:
- category_counts:
- Counts of all categories for the categorical feature. If there
- are more than ten categories, we return top ten (by count) and
- return one more CategoryCount with category ``*OTHER*`` and
- count as aggregate counts of remaining categories.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue)
- },
- ),
- "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER_FEATUREVALUE,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Representative value of a single feature within the cluster.
-
- Attributes:
- feature_column:
- The feature column name.
- numerical_value:
- The numerical feature value. This is the centroid value for
- this feature.
- categorical_value:
- The categorical feature value.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster.FeatureValue)
- },
- ),
- "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS_CLUSTER,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Message containing the information about one cluster.
-
- Attributes:
- centroid_id:
- Centroid id.
- feature_values:
- Values of highly variant features for this cluster.
- count:
- Count of training data rows that were assigned to this
- cluster.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics.Cluster)
- },
- ),
- "DESCRIPTOR": _MODEL_CLUSTERINGMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Evaluation metrics for clustering models.
-
- Attributes:
- davies_bouldin_index:
- Davies-Bouldin index.
- mean_squared_distance:
- Mean of squared distances between each sample to its cluster
- centroid.
- clusters:
- [Beta] Information for all clusters.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.ClusteringMetrics)
- },
- ),
- "EvaluationMetrics": _reflection.GeneratedProtocolMessageType(
- "EvaluationMetrics",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_EVALUATIONMETRICS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Evaluation metrics of a model. These are either computed on all
- training data or just the eval data based on whether eval data was
- used during training. These are not present for imported models.
-
- Attributes:
- regression_metrics:
- Populated for regression models and explicit feedback type
- matrix factorization models.
- binary_classification_metrics:
- Populated for binary classification/classifier models.
- multi_class_classification_metrics:
- Populated for multi-class classification/classifier models.
- clustering_metrics:
- Populated for clustering models.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.EvaluationMetrics)
- },
- ),
- "TrainingRun": _reflection.GeneratedProtocolMessageType(
- "TrainingRun",
- (_message.Message,),
- {
- "TrainingOptions": _reflection.GeneratedProtocolMessageType(
- "TrainingOptions",
- (_message.Message,),
- {
- "LabelClassWeightsEntry": _reflection.GeneratedProtocolMessageType(
- "LabelClassWeightsEntry",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2"
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry)
- },
- ),
- "DESCRIPTOR": _MODEL_TRAININGRUN_TRAININGOPTIONS,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- max_iterations:
- The maximum number of iterations in training. Used only for
- iterative training algorithms.
- loss_type:
- Type of loss function used during training run.
- learn_rate:
- Learning rate in training. Used only for iterative training
- algorithms.
- l1_regularization:
- L1 regularization coefficient.
- l2_regularization:
- L2 regularization coefficient.
- min_relative_progress:
- When early_stop is true, stops training when accuracy
- improvement is less than ‘min_relative_progress’. Used only
- for iterative training algorithms.
- warm_start:
- Whether to train a model from the last checkpoint.
- early_stop:
- Whether to stop early when the loss doesn’t improve
- significantly any more (compared to min_relative_progress).
- Used only for iterative training algorithms.
- input_label_columns:
- Name of input label columns in training data.
- data_split_method:
- The data split type for training and evaluation, e.g. RANDOM.
- data_split_eval_fraction:
- The fraction of evaluation data over the whole input data. The
- rest of data will be used as training data. The format should
- be double. Accurate to two decimal places. Default value is
- 0.2.
- data_split_column:
- The column to split data with. This column won’t be used as a
- feature. 1. When data_split_method is CUSTOM, the
- corresponding column should be boolean. The rows with true
- value tag are eval data, and the false are training data. 2.
- When data_split_method is SEQ, the first
- DATA_SPLIT_EVAL_FRACTION rows (from smallest to largest) in
- the corresponding column are used as training data, and the
- rest are eval data. It respects the order in Orderable data
- types:
- https://cloud.google.com/bigquery/docs/reference/standard-
- sql/data-types#data-type-properties
- learn_rate_strategy:
- The strategy to determine learn rate for the current
- iteration.
- initial_learn_rate:
- Specifies the initial learning rate for the line search learn
- rate strategy.
- label_class_weights:
- Weights associated with each label class, for rebalancing the
- training data. Only applicable for classification models.
- distance_type:
- Distance type for clustering models.
- num_clusters:
- Number of clusters for clustering models.
- model_uri:
- [Beta] Google Cloud Storage URI from which the model was
- imported. Only applicable for imported models.
- optimization_strategy:
- Optimization strategy for training linear regression models.
- kmeans_initialization_method:
- The method used to initialize the centroids for kmeans
- algorithm.
- kmeans_initialization_column:
- The column used to provide the initial centroids for kmeans
- algorithm when kmeans_initialization_method is CUSTOM.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.TrainingOptions)
- },
- ),
- "IterationResult": _reflection.GeneratedProtocolMessageType(
- "IterationResult",
- (_message.Message,),
- {
- "ClusterInfo": _reflection.GeneratedProtocolMessageType(
- "ClusterInfo",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT_CLUSTERINFO,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Information about a single cluster for clustering model.
-
- Attributes:
- centroid_id:
- Centroid id.
- cluster_radius:
- Cluster radius, the average distance from centroid to each
- point assigned to the cluster.
- cluster_size:
- Cluster size, the total number of points assigned to the
- cluster.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult.ClusterInfo)
- },
- ),
- "DESCRIPTOR": _MODEL_TRAININGRUN_ITERATIONRESULT,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Information about a single iteration of the training run.
-
- Attributes:
- index:
- Index of the iteration, 0 based.
- duration_ms:
- Time taken to run the iteration in milliseconds.
- training_loss:
- Loss computed on the training data at the end of iteration.
- eval_loss:
- Loss computed on the eval data at the end of iteration.
- learn_rate:
- Learn rate used for this iteration.
- cluster_infos:
- Information about top clusters for clustering models.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun.IterationResult)
- },
- ),
- "DESCRIPTOR": _MODEL_TRAININGRUN,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Information about a single training query run for the model.
-
- Attributes:
- training_options:
- Options that were used for this training run, includes user
- specified and default options that were used.
- start_time:
- The start time of this training run.
- results:
- Output of each iteration run, results.size() <=
- max_iterations.
- evaluation_metrics:
- The evaluation metrics over training/eval data that were
- computed at the end of training.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.TrainingRun)
- },
- ),
- "LabelsEntry": _reflection.GeneratedProtocolMessageType(
- "LabelsEntry",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODEL_LABELSENTRY,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2"
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model.LabelsEntry)
- },
- ),
- "DESCRIPTOR": _MODEL,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- etag:
- Output only. A hash of this resource.
- model_reference:
- Required. Unique identifier for this model.
- creation_time:
- Output only. The time when this model was created, in
- millisecs since the epoch.
- last_modified_time:
- Output only. The time when this model was last modified, in
- millisecs since the epoch.
- description:
- Optional. A user-friendly description of this model.
- friendly_name:
- Optional. A descriptive name for this model.
- labels:
- The labels associated with this model. You can use these to
- organize and group your models. Label keys and values can be
- no longer than 63 characters, can only contain lowercase
- letters, numeric characters, underscores and dashes.
- International characters are allowed. Label values are
- optional. Label keys must start with a letter and each label
- in the list must have a different key.
- expiration_time:
- Optional. The time when this model expires, in milliseconds
- since the epoch. If not present, the model will persist
- indefinitely. Expired models will be deleted and their storage
- reclaimed. The defaultTableExpirationMs property of the
- encapsulating dataset can be used to set a default
- expirationTime on newly created models.
- location:
- Output only. The geographic location where the model resides.
- This value is inherited from the dataset.
- encryption_configuration:
- Custom encryption configuration (e.g., Cloud KMS keys). This
- shows the encryption configuration of the model data while
- stored in BigQuery storage.
- model_type:
- Output only. Type of the model resource.
- training_runs:
- Output only. Information for all training runs in increasing
- order of start_time.
- feature_columns:
- Output only. Input feature columns that were used to train
- this model.
- label_columns:
- Output only. Label columns that were used to train this model.
- The output of the model will have a ``predicted\_`` prefix to
- these columns.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.Model)
- },
-)
-_sym_db.RegisterMessage(Model)
-_sym_db.RegisterMessage(Model.KmeansEnums)
-_sym_db.RegisterMessage(Model.RegressionMetrics)
-_sym_db.RegisterMessage(Model.AggregateClassificationMetrics)
-_sym_db.RegisterMessage(Model.BinaryClassificationMetrics)
-_sym_db.RegisterMessage(Model.BinaryClassificationMetrics.BinaryConfusionMatrix)
-_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics)
-_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix)
-_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry)
-_sym_db.RegisterMessage(Model.MultiClassClassificationMetrics.ConfusionMatrix.Row)
-_sym_db.RegisterMessage(Model.ClusteringMetrics)
-_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster)
-_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue)
-_sym_db.RegisterMessage(Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue)
-_sym_db.RegisterMessage(
- Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount
-)
-_sym_db.RegisterMessage(Model.EvaluationMetrics)
-_sym_db.RegisterMessage(Model.TrainingRun)
-_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions)
-_sym_db.RegisterMessage(Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry)
-_sym_db.RegisterMessage(Model.TrainingRun.IterationResult)
-_sym_db.RegisterMessage(Model.TrainingRun.IterationResult.ClusterInfo)
-_sym_db.RegisterMessage(Model.LabelsEntry)
-
-GetModelRequest = _reflection.GeneratedProtocolMessageType(
- "GetModelRequest",
- (_message.Message,),
- {
- "DESCRIPTOR": _GETMODELREQUEST,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- project_id:
- Required. Project ID of the requested model.
- dataset_id:
- Required. Dataset ID of the requested model.
- model_id:
- Required. Model ID of the requested model.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.GetModelRequest)
- },
-)
-_sym_db.RegisterMessage(GetModelRequest)
-
-PatchModelRequest = _reflection.GeneratedProtocolMessageType(
- "PatchModelRequest",
- (_message.Message,),
- {
- "DESCRIPTOR": _PATCHMODELREQUEST,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- project_id:
- Required. Project ID of the model to patch.
- dataset_id:
- Required. Dataset ID of the model to patch.
- model_id:
- Required. Model ID of the model to patch.
- model:
- Required. Patched model. Follows RFC5789 patch semantics.
- Missing fields are not updated. To clear a field, explicitly
- set to default value.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.PatchModelRequest)
- },
-)
-_sym_db.RegisterMessage(PatchModelRequest)
-
-DeleteModelRequest = _reflection.GeneratedProtocolMessageType(
- "DeleteModelRequest",
- (_message.Message,),
- {
- "DESCRIPTOR": _DELETEMODELREQUEST,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- project_id:
- Required. Project ID of the model to delete.
- dataset_id:
- Required. Dataset ID of the model to delete.
- model_id:
- Required. Model ID of the model to delete.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.DeleteModelRequest)
- },
-)
-_sym_db.RegisterMessage(DeleteModelRequest)
-
-ListModelsRequest = _reflection.GeneratedProtocolMessageType(
- "ListModelsRequest",
- (_message.Message,),
- {
- "DESCRIPTOR": _LISTMODELSREQUEST,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- project_id:
- Required. Project ID of the models to list.
- dataset_id:
- Required. Dataset ID of the models to list.
- max_results:
- The maximum number of results to return in a single response
- page. Leverage the page tokens to iterate through the entire
- collection.
- page_token:
- Page token, returned by a previous call to request the next
- page of results
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsRequest)
- },
-)
-_sym_db.RegisterMessage(ListModelsRequest)
-
-ListModelsResponse = _reflection.GeneratedProtocolMessageType(
- "ListModelsResponse",
- (_message.Message,),
- {
- "DESCRIPTOR": _LISTMODELSRESPONSE,
- "__module__": "google.cloud.bigquery_v2.proto.model_pb2",
- "__doc__": """Protocol buffer.
-
- Attributes:
- models:
- Models in the requested dataset. Only the following fields are
- populated: model_reference, model_type, creation_time,
- last_modified_time and labels.
- next_page_token:
- A token to request the next page of results.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ListModelsResponse)
- },
-)
-_sym_db.RegisterMessage(ListModelsResponse)
-
-
-DESCRIPTOR._options = None
-_MODEL_TRAININGRUN_TRAININGOPTIONS_LABELCLASSWEIGHTSENTRY._options = None
-_MODEL_LABELSENTRY._options = None
-_MODEL.fields_by_name["etag"]._options = None
-_MODEL.fields_by_name["model_reference"]._options = None
-_MODEL.fields_by_name["creation_time"]._options = None
-_MODEL.fields_by_name["last_modified_time"]._options = None
-_MODEL.fields_by_name["description"]._options = None
-_MODEL.fields_by_name["friendly_name"]._options = None
-_MODEL.fields_by_name["expiration_time"]._options = None
-_MODEL.fields_by_name["location"]._options = None
-_MODEL.fields_by_name["model_type"]._options = None
-_MODEL.fields_by_name["training_runs"]._options = None
-_MODEL.fields_by_name["feature_columns"]._options = None
-_MODEL.fields_by_name["label_columns"]._options = None
-_GETMODELREQUEST.fields_by_name["project_id"]._options = None
-_GETMODELREQUEST.fields_by_name["dataset_id"]._options = None
-_GETMODELREQUEST.fields_by_name["model_id"]._options = None
-_PATCHMODELREQUEST.fields_by_name["project_id"]._options = None
-_PATCHMODELREQUEST.fields_by_name["dataset_id"]._options = None
-_PATCHMODELREQUEST.fields_by_name["model_id"]._options = None
-_PATCHMODELREQUEST.fields_by_name["model"]._options = None
-_DELETEMODELREQUEST.fields_by_name["project_id"]._options = None
-_DELETEMODELREQUEST.fields_by_name["dataset_id"]._options = None
-_DELETEMODELREQUEST.fields_by_name["model_id"]._options = None
-_LISTMODELSREQUEST.fields_by_name["project_id"]._options = None
-_LISTMODELSREQUEST.fields_by_name["dataset_id"]._options = None
-
-_MODELSERVICE = _descriptor.ServiceDescriptor(
- name="ModelService",
- full_name="google.cloud.bigquery.v2.ModelService",
- file=DESCRIPTOR,
- index=0,
- serialized_options=b"\312A\027bigquery.googleapis.com\322A\302\001https://www.googleapis.com/auth/bigquery,https://www.googleapis.com/auth/bigquery.readonly,https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/cloud-platform.read-only",
- create_key=_descriptor._internal_create_key,
- serialized_start=7804,
- serialized_end=8566,
- methods=[
- _descriptor.MethodDescriptor(
- name="GetModel",
- full_name="google.cloud.bigquery.v2.ModelService.GetModel",
- index=0,
- containing_service=None,
- input_type=_GETMODELREQUEST,
- output_type=_MODEL,
- serialized_options=b"\332A\036project_id,dataset_id,model_id",
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.MethodDescriptor(
- name="ListModels",
- full_name="google.cloud.bigquery.v2.ModelService.ListModels",
- index=1,
- containing_service=None,
- input_type=_LISTMODELSREQUEST,
- output_type=_LISTMODELSRESPONSE,
- serialized_options=b"\332A!project_id,dataset_id,max_results",
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.MethodDescriptor(
- name="PatchModel",
- full_name="google.cloud.bigquery.v2.ModelService.PatchModel",
- index=2,
- containing_service=None,
- input_type=_PATCHMODELREQUEST,
- output_type=_MODEL,
- serialized_options=b"\332A$project_id,dataset_id,model_id,model",
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.MethodDescriptor(
- name="DeleteModel",
- full_name="google.cloud.bigquery.v2.ModelService.DeleteModel",
- index=3,
- containing_service=None,
- input_type=_DELETEMODELREQUEST,
- output_type=google_dot_protobuf_dot_empty__pb2._EMPTY,
- serialized_options=b"\332A\036project_id,dataset_id,model_id",
- create_key=_descriptor._internal_create_key,
- ),
- ],
-)
-_sym_db.RegisterServiceDescriptor(_MODELSERVICE)
-
-DESCRIPTOR.services_by_name["ModelService"] = _MODELSERVICE
-
-# @@protoc_insertion_point(module_scope)
diff --git a/google/cloud/bigquery_v2/proto/model_reference.proto b/google/cloud/bigquery_v2/proto/model_reference.proto
deleted file mode 100644
index c3d1a49a8..000000000
--- a/google/cloud/bigquery_v2/proto/model_reference.proto
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/field_behavior.proto";
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "ModelReferenceProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-// Id path of a model.
-message ModelReference {
- // Required. The ID of the project containing this model.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. The ID of the dataset containing this model.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Required. The ID of the model. The ID must contain only
- // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
- // length is 1,024 characters.
- string model_id = 3 [(google.api.field_behavior) = REQUIRED];
-}
diff --git a/google/cloud/bigquery_v2/proto/model_reference_pb2.py b/google/cloud/bigquery_v2/proto/model_reference_pb2.py
deleted file mode 100644
index 2411c4863..000000000
--- a/google/cloud/bigquery_v2/proto/model_reference_pb2.py
+++ /dev/null
@@ -1,142 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: google/cloud/bigquery_v2/proto/model_reference.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2
-from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name="google/cloud/bigquery_v2/proto/model_reference.proto",
- package="google.cloud.bigquery.v2",
- syntax="proto3",
- serialized_options=b"\n\034com.google.cloud.bigquery.v2B\023ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery",
- create_key=_descriptor._internal_create_key,
- serialized_pb=b'\n4google/cloud/bigquery_v2/proto/model_reference.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"Y\n\x0eModelReference\x12\x17\n\nproject_id\x18\x01 \x01(\tB\x03\xe0\x41\x02\x12\x17\n\ndataset_id\x18\x02 \x01(\tB\x03\xe0\x41\x02\x12\x15\n\x08model_id\x18\x03 \x01(\tB\x03\xe0\x41\x02\x42u\n\x1c\x63om.google.cloud.bigquery.v2B\x13ModelReferenceProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3',
- dependencies=[
- google_dot_api_dot_field__behavior__pb2.DESCRIPTOR,
- google_dot_api_dot_annotations__pb2.DESCRIPTOR,
- ],
-)
-
-
-_MODELREFERENCE = _descriptor.Descriptor(
- name="ModelReference",
- full_name="google.cloud.bigquery.v2.ModelReference",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="project_id",
- full_name="google.cloud.bigquery.v2.ModelReference.project_id",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="dataset_id",
- full_name="google.cloud.bigquery.v2.ModelReference.dataset_id",
- index=1,
- number=2,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="model_id",
- full_name="google.cloud.bigquery.v2.ModelReference.model_id",
- index=2,
- number=3,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=145,
- serialized_end=234,
-)
-
-DESCRIPTOR.message_types_by_name["ModelReference"] = _MODELREFERENCE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-ModelReference = _reflection.GeneratedProtocolMessageType(
- "ModelReference",
- (_message.Message,),
- {
- "DESCRIPTOR": _MODELREFERENCE,
- "__module__": "google.cloud.bigquery_v2.proto.model_reference_pb2",
- "__doc__": """Id path of a model.
-
- Attributes:
- project_id:
- Required. The ID of the project containing this model.
- dataset_id:
- Required. The ID of the dataset containing this model.
- model_id:
- Required. The ID of the model. The ID must contain only
- letters (a-z, A-Z), numbers (0-9), or underscores (_). The
- maximum length is 1,024 characters.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.ModelReference)
- },
-)
-_sym_db.RegisterMessage(ModelReference)
-
-
-DESCRIPTOR._options = None
-_MODELREFERENCE.fields_by_name["project_id"]._options = None
-_MODELREFERENCE.fields_by_name["dataset_id"]._options = None
-_MODELREFERENCE.fields_by_name["model_id"]._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/google/cloud/bigquery_v2/proto/standard_sql.proto b/google/cloud/bigquery_v2/proto/standard_sql.proto
deleted file mode 100644
index 1514eccbb..000000000
--- a/google/cloud/bigquery_v2/proto/standard_sql.proto
+++ /dev/null
@@ -1,112 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/field_behavior.proto";
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "StandardSqlProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-// The type of a variable, e.g., a function argument.
-// Examples:
-// INT64: {type_kind="INT64"}
-// ARRAY: {type_kind="ARRAY", array_element_type="STRING"}
-// STRUCT>:
-// {type_kind="STRUCT",
-// struct_type={fields=[
-// {name="x", type={type_kind="STRING"}},
-// {name="y", type={type_kind="ARRAY", array_element_type="DATE"}}
-// ]}}
-message StandardSqlDataType {
- enum TypeKind {
- // Invalid type.
- TYPE_KIND_UNSPECIFIED = 0;
-
- // Encoded as a string in decimal format.
- INT64 = 2;
-
- // Encoded as a boolean "false" or "true".
- BOOL = 5;
-
- // Encoded as a number, or string "NaN", "Infinity" or "-Infinity".
- FLOAT64 = 7;
-
- // Encoded as a string value.
- STRING = 8;
-
- // Encoded as a base64 string per RFC 4648, section 4.
- BYTES = 9;
-
- // Encoded as an RFC 3339 timestamp with mandatory "Z" time zone string:
- // 1985-04-12T23:20:50.52Z
- TIMESTAMP = 19;
-
- // Encoded as RFC 3339 full-date format string: 1985-04-12
- DATE = 10;
-
- // Encoded as RFC 3339 partial-time format string: 23:20:50.52
- TIME = 20;
-
- // Encoded as RFC 3339 full-date "T" partial-time: 1985-04-12T23:20:50.52
- DATETIME = 21;
-
- // Encoded as WKT
- GEOGRAPHY = 22;
-
- // Encoded as a decimal string.
- NUMERIC = 23;
-
- // Encoded as a decimal string.
- BIGNUMERIC = 24;
-
- // Encoded as a list with types matching Type.array_type.
- ARRAY = 16;
-
- // Encoded as a list with fields of type Type.struct_type[i]. List is used
- // because a JSON object cannot have duplicate field names.
- STRUCT = 17;
- }
-
- // Required. The top level type of this field.
- // Can be any standard SQL data type (e.g., "INT64", "DATE", "ARRAY").
- TypeKind type_kind = 1 [(google.api.field_behavior) = REQUIRED];
-
- oneof sub_type {
- // The type of the array's elements, if type_kind = "ARRAY".
- StandardSqlDataType array_element_type = 2;
-
- // The fields of this struct, in order, if type_kind = "STRUCT".
- StandardSqlStructType struct_type = 3;
- }
-}
-
-// A field or a column.
-message StandardSqlField {
- // Optional. The name of this field. Can be absent for struct fields.
- string name = 1 [(google.api.field_behavior) = OPTIONAL];
-
- // Optional. The type of this parameter. Absent if not explicitly
- // specified (e.g., CREATE FUNCTION statement can omit the return type;
- // in this case the output parameter does not have this "type" field).
- StandardSqlDataType type = 2 [(google.api.field_behavior) = OPTIONAL];
-}
-
-message StandardSqlStructType {
- repeated StandardSqlField fields = 1;
-}
diff --git a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py b/google/cloud/bigquery_v2/proto/standard_sql_pb2.py
deleted file mode 100644
index bfe77f934..000000000
--- a/google/cloud/bigquery_v2/proto/standard_sql_pb2.py
+++ /dev/null
@@ -1,442 +0,0 @@
-# -*- coding: utf-8 -*-
-# Generated by the protocol buffer compiler. DO NOT EDIT!
-# source: google/cloud/bigquery_v2/proto/standard_sql.proto
-"""Generated protocol buffer code."""
-from google.protobuf import descriptor as _descriptor
-from google.protobuf import message as _message
-from google.protobuf import reflection as _reflection
-from google.protobuf import symbol_database as _symbol_database
-
-# @@protoc_insertion_point(imports)
-
-_sym_db = _symbol_database.Default()
-
-
-from google.api import field_behavior_pb2 as google_dot_api_dot_field__behavior__pb2
-from google.api import annotations_pb2 as google_dot_api_dot_annotations__pb2
-
-
-DESCRIPTOR = _descriptor.FileDescriptor(
- name="google/cloud/bigquery_v2/proto/standard_sql.proto",
- package="google.cloud.bigquery.v2",
- syntax="proto3",
- serialized_options=b"\n\034com.google.cloud.bigquery.v2B\020StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery",
- create_key=_descriptor._internal_create_key,
- serialized_pb=b'\n1google/cloud/bigquery_v2/proto/standard_sql.proto\x12\x18google.cloud.bigquery.v2\x1a\x1fgoogle/api/field_behavior.proto\x1a\x1cgoogle/api/annotations.proto"\xcb\x03\n\x13StandardSqlDataType\x12N\n\ttype_kind\x18\x01 \x01(\x0e\x32\x36.google.cloud.bigquery.v2.StandardSqlDataType.TypeKindB\x03\xe0\x41\x02\x12K\n\x12\x61rray_element_type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeH\x00\x12\x46\n\x0bstruct_type\x18\x03 \x01(\x0b\x32/.google.cloud.bigquery.v2.StandardSqlStructTypeH\x00"\xc2\x01\n\x08TypeKind\x12\x19\n\x15TYPE_KIND_UNSPECIFIED\x10\x00\x12\t\n\x05INT64\x10\x02\x12\x08\n\x04\x42OOL\x10\x05\x12\x0b\n\x07\x46LOAT64\x10\x07\x12\n\n\x06STRING\x10\x08\x12\t\n\x05\x42YTES\x10\t\x12\r\n\tTIMESTAMP\x10\x13\x12\x08\n\x04\x44\x41TE\x10\n\x12\x08\n\x04TIME\x10\x14\x12\x0c\n\x08\x44\x41TETIME\x10\x15\x12\r\n\tGEOGRAPHY\x10\x16\x12\x0b\n\x07NUMERIC\x10\x17\x12\t\n\x05\x41RRAY\x10\x10\x12\n\n\x06STRUCT\x10\x11\x42\n\n\x08sub_type"g\n\x10StandardSqlField\x12\x11\n\x04name\x18\x01 \x01(\tB\x03\xe0\x41\x01\x12@\n\x04type\x18\x02 \x01(\x0b\x32-.google.cloud.bigquery.v2.StandardSqlDataTypeB\x03\xe0\x41\x01"S\n\x15StandardSqlStructType\x12:\n\x06\x66ields\x18\x01 \x03(\x0b\x32*.google.cloud.bigquery.v2.StandardSqlFieldBr\n\x1c\x63om.google.cloud.bigquery.v2B\x10StandardSqlProtoZ@google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigqueryb\x06proto3',
- dependencies=[
- google_dot_api_dot_field__behavior__pb2.DESCRIPTOR,
- google_dot_api_dot_annotations__pb2.DESCRIPTOR,
- ],
-)
-
-
-_STANDARDSQLDATATYPE_TYPEKIND = _descriptor.EnumDescriptor(
- name="TypeKind",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType.TypeKind",
- filename=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- values=[
- _descriptor.EnumValueDescriptor(
- name="TYPE_KIND_UNSPECIFIED",
- index=0,
- number=0,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="INT64",
- index=1,
- number=2,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="BOOL",
- index=2,
- number=5,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="FLOAT64",
- index=3,
- number=7,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="STRING",
- index=4,
- number=8,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="BYTES",
- index=5,
- number=9,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="TIMESTAMP",
- index=6,
- number=19,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="DATE",
- index=7,
- number=10,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="TIME",
- index=8,
- number=20,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="DATETIME",
- index=9,
- number=21,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="GEOGRAPHY",
- index=10,
- number=22,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="NUMERIC",
- index=11,
- number=23,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="ARRAY",
- index=12,
- number=16,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.EnumValueDescriptor(
- name="STRUCT",
- index=13,
- number=17,
- serialized_options=None,
- type=None,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- containing_type=None,
- serialized_options=None,
- serialized_start=396,
- serialized_end=590,
-)
-_sym_db.RegisterEnumDescriptor(_STANDARDSQLDATATYPE_TYPEKIND)
-
-
-_STANDARDSQLDATATYPE = _descriptor.Descriptor(
- name="StandardSqlDataType",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="type_kind",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType.type_kind",
- index=0,
- number=1,
- type=14,
- cpp_type=8,
- label=1,
- has_default_value=False,
- default_value=0,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\002",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="array_element_type",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType.array_element_type",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="struct_type",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType.struct_type",
- index=2,
- number=3,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[_STANDARDSQLDATATYPE_TYPEKIND,],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[
- _descriptor.OneofDescriptor(
- name="sub_type",
- full_name="google.cloud.bigquery.v2.StandardSqlDataType.sub_type",
- index=0,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[],
- ),
- ],
- serialized_start=143,
- serialized_end=602,
-)
-
-
-_STANDARDSQLFIELD = _descriptor.Descriptor(
- name="StandardSqlField",
- full_name="google.cloud.bigquery.v2.StandardSqlField",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="name",
- full_name="google.cloud.bigquery.v2.StandardSqlField.name",
- index=0,
- number=1,
- type=9,
- cpp_type=9,
- label=1,
- has_default_value=False,
- default_value=b"".decode("utf-8"),
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- _descriptor.FieldDescriptor(
- name="type",
- full_name="google.cloud.bigquery.v2.StandardSqlField.type",
- index=1,
- number=2,
- type=11,
- cpp_type=10,
- label=1,
- has_default_value=False,
- default_value=None,
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=b"\340A\001",
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=604,
- serialized_end=707,
-)
-
-
-_STANDARDSQLSTRUCTTYPE = _descriptor.Descriptor(
- name="StandardSqlStructType",
- full_name="google.cloud.bigquery.v2.StandardSqlStructType",
- filename=None,
- file=DESCRIPTOR,
- containing_type=None,
- create_key=_descriptor._internal_create_key,
- fields=[
- _descriptor.FieldDescriptor(
- name="fields",
- full_name="google.cloud.bigquery.v2.StandardSqlStructType.fields",
- index=0,
- number=1,
- type=11,
- cpp_type=10,
- label=3,
- has_default_value=False,
- default_value=[],
- message_type=None,
- enum_type=None,
- containing_type=None,
- is_extension=False,
- extension_scope=None,
- serialized_options=None,
- file=DESCRIPTOR,
- create_key=_descriptor._internal_create_key,
- ),
- ],
- extensions=[],
- nested_types=[],
- enum_types=[],
- serialized_options=None,
- is_extendable=False,
- syntax="proto3",
- extension_ranges=[],
- oneofs=[],
- serialized_start=709,
- serialized_end=792,
-)
-
-_STANDARDSQLDATATYPE.fields_by_name[
- "type_kind"
-].enum_type = _STANDARDSQLDATATYPE_TYPEKIND
-_STANDARDSQLDATATYPE.fields_by_name[
- "array_element_type"
-].message_type = _STANDARDSQLDATATYPE
-_STANDARDSQLDATATYPE.fields_by_name["struct_type"].message_type = _STANDARDSQLSTRUCTTYPE
-_STANDARDSQLDATATYPE_TYPEKIND.containing_type = _STANDARDSQLDATATYPE
-_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append(
- _STANDARDSQLDATATYPE.fields_by_name["array_element_type"]
-)
-_STANDARDSQLDATATYPE.fields_by_name[
- "array_element_type"
-].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"]
-_STANDARDSQLDATATYPE.oneofs_by_name["sub_type"].fields.append(
- _STANDARDSQLDATATYPE.fields_by_name["struct_type"]
-)
-_STANDARDSQLDATATYPE.fields_by_name[
- "struct_type"
-].containing_oneof = _STANDARDSQLDATATYPE.oneofs_by_name["sub_type"]
-_STANDARDSQLFIELD.fields_by_name["type"].message_type = _STANDARDSQLDATATYPE
-_STANDARDSQLSTRUCTTYPE.fields_by_name["fields"].message_type = _STANDARDSQLFIELD
-DESCRIPTOR.message_types_by_name["StandardSqlDataType"] = _STANDARDSQLDATATYPE
-DESCRIPTOR.message_types_by_name["StandardSqlField"] = _STANDARDSQLFIELD
-DESCRIPTOR.message_types_by_name["StandardSqlStructType"] = _STANDARDSQLSTRUCTTYPE
-_sym_db.RegisterFileDescriptor(DESCRIPTOR)
-
-StandardSqlDataType = _reflection.GeneratedProtocolMessageType(
- "StandardSqlDataType",
- (_message.Message,),
- {
- "DESCRIPTOR": _STANDARDSQLDATATYPE,
- "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2",
- "__doc__": """The type of a variable, e.g., a function argument. Examples: INT64:
- {type_kind=``INT64``} ARRAY: {type_kind=``ARRAY``,
- array_element_type=``STRING``} STRUCT:
- {type_kind=``STRUCT``, struct_type={fields=[ {name=``x``,
- type={type_kind=``STRING``}}, {name=``y``, type={type_kind=``ARRAY``,
- array_element_type=``DATE``}} ]}}
-
- Attributes:
- type_kind:
- Required. The top level type of this field. Can be any
- standard SQL data type (e.g., ``INT64``, ``DATE``, ``ARRAY``).
- array_element_type:
- The type of the array’s elements, if type_kind = ``ARRAY``.
- struct_type:
- The fields of this struct, in order, if type_kind = ``STRUCT``.
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlDataType)
- },
-)
-_sym_db.RegisterMessage(StandardSqlDataType)
-
-StandardSqlField = _reflection.GeneratedProtocolMessageType(
- "StandardSqlField",
- (_message.Message,),
- {
- "DESCRIPTOR": _STANDARDSQLFIELD,
- "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2",
- "__doc__": """A field or a column.
-
- Attributes:
- name:
- Optional. The name of this field. Can be absent for struct
- fields.
- type:
- Optional. The type of this parameter. Absent if not explicitly
- specified (e.g., CREATE FUNCTION statement can omit the return
- type; in this case the output parameter does not have this
- ``type`` field).
- """,
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlField)
- },
-)
-_sym_db.RegisterMessage(StandardSqlField)
-
-StandardSqlStructType = _reflection.GeneratedProtocolMessageType(
- "StandardSqlStructType",
- (_message.Message,),
- {
- "DESCRIPTOR": _STANDARDSQLSTRUCTTYPE,
- "__module__": "google.cloud.bigquery_v2.proto.standard_sql_pb2"
- # @@protoc_insertion_point(class_scope:google.cloud.bigquery.v2.StandardSqlStructType)
- },
-)
-_sym_db.RegisterMessage(StandardSqlStructType)
-
-
-DESCRIPTOR._options = None
-_STANDARDSQLDATATYPE.fields_by_name["type_kind"]._options = None
-_STANDARDSQLFIELD.fields_by_name["name"]._options = None
-_STANDARDSQLFIELD.fields_by_name["type"]._options = None
-# @@protoc_insertion_point(module_scope)
diff --git a/google/cloud/bigquery_v2/proto/table_reference.proto b/google/cloud/bigquery_v2/proto/table_reference.proto
deleted file mode 100644
index ba02f80c4..000000000
--- a/google/cloud/bigquery_v2/proto/table_reference.proto
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright 2020 Google LLC
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-syntax = "proto3";
-
-package google.cloud.bigquery.v2;
-
-import "google/api/field_behavior.proto";
-import "google/api/annotations.proto";
-
-option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/v2;bigquery";
-option java_outer_classname = "TableReferenceProto";
-option java_package = "com.google.cloud.bigquery.v2";
-
-message TableReference {
- // Required. The ID of the project containing this table.
- string project_id = 1 [(google.api.field_behavior) = REQUIRED];
-
- // Required. The ID of the dataset containing this table.
- string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];
-
- // Required. The ID of the table. The ID must contain only
- // letters (a-z, A-Z), numbers (0-9), or underscores (_). The maximum
- // length is 1,024 characters. Certain operations allow
- // suffixing of the table ID with a partition decorator, such as
- // `sample_table$20190123`.
- string table_id = 3 [(google.api.field_behavior) = REQUIRED];
-}
diff --git a/google/cloud/bigquery_v2/types/__init__.py b/google/cloud/bigquery_v2/types/__init__.py
index 1e354641a..83bbb3a54 100644
--- a/google/cloud/bigquery_v2/types/__init__.py
+++ b/google/cloud/bigquery_v2/types/__init__.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,36 +13,36 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
from .encryption_config import EncryptionConfiguration
+from .model import (
+ DeleteModelRequest,
+ GetModelRequest,
+ ListModelsRequest,
+ ListModelsResponse,
+ Model,
+ PatchModelRequest,
+)
from .model_reference import ModelReference
from .standard_sql import (
StandardSqlDataType,
StandardSqlField,
StandardSqlStructType,
+ StandardSqlTableType,
)
from .table_reference import TableReference
-from .model import (
- Model,
- GetModelRequest,
- PatchModelRequest,
- DeleteModelRequest,
- ListModelsRequest,
- ListModelsResponse,
-)
-
__all__ = (
"EncryptionConfiguration",
+ "DeleteModelRequest",
+ "GetModelRequest",
+ "ListModelsRequest",
+ "ListModelsResponse",
+ "Model",
+ "PatchModelRequest",
"ModelReference",
"StandardSqlDataType",
"StandardSqlField",
"StandardSqlStructType",
+ "StandardSqlTableType",
"TableReference",
- "Model",
- "GetModelRequest",
- "PatchModelRequest",
- "DeleteModelRequest",
- "ListModelsRequest",
- "ListModelsResponse",
)
diff --git a/google/cloud/bigquery_v2/types/encryption_config.py b/google/cloud/bigquery_v2/types/encryption_config.py
index 6fb90f340..4b9139733 100644
--- a/google/cloud/bigquery_v2/types/encryption_config.py
+++ b/google/cloud/bigquery_v2/types/encryption_config.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,11 +13,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
import proto # type: ignore
-
-from google.protobuf import wrappers_pb2 as wrappers # type: ignore
+from google.protobuf import wrappers_pb2 # type: ignore
__protobuf__ = proto.module(
@@ -28,9 +25,8 @@
class EncryptionConfiguration(proto.Message):
r"""
-
Attributes:
- kms_key_name (~.wrappers.StringValue):
+ kms_key_name (google.protobuf.wrappers_pb2.StringValue):
Optional. Describes the Cloud KMS encryption
key that will be used to protect destination
BigQuery table. The BigQuery Service Account
@@ -38,7 +34,9 @@ class EncryptionConfiguration(proto.Message):
this encryption key.
"""
- kms_key_name = proto.Field(proto.MESSAGE, number=1, message=wrappers.StringValue,)
+ kms_key_name = proto.Field(
+ proto.MESSAGE, number=1, message=wrappers_pb2.StringValue,
+ )
__all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/bigquery_v2/types/model.py b/google/cloud/bigquery_v2/types/model.py
index c3530dec2..706418401 100644
--- a/google/cloud/bigquery_v2/types/model.py
+++ b/google/cloud/bigquery_v2/types/model.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,16 +13,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
import proto # type: ignore
-
from google.cloud.bigquery_v2.types import encryption_config
from google.cloud.bigquery_v2.types import model_reference as gcb_model_reference
from google.cloud.bigquery_v2.types import standard_sql
from google.cloud.bigquery_v2.types import table_reference
-from google.protobuf import timestamp_pb2 as timestamp # type: ignore
-from google.protobuf import wrappers_pb2 as wrappers # type: ignore
+from google.protobuf import timestamp_pb2 # type: ignore
+from google.protobuf import wrappers_pb2 # type: ignore
__protobuf__ = proto.module(
@@ -41,11 +38,10 @@
class Model(proto.Message):
r"""
-
Attributes:
etag (str):
Output only. A hash of this resource.
- model_reference (~.gcb_model_reference.ModelReference):
+ model_reference (google.cloud.bigquery_v2.types.ModelReference):
Required. Unique identifier for this model.
creation_time (int):
Output only. The time when this model was
@@ -58,7 +54,7 @@ class Model(proto.Message):
model.
friendly_name (str):
Optional. A descriptive name for this model.
- labels (Sequence[~.gcb_model.Model.LabelsEntry]):
+ labels (Sequence[google.cloud.bigquery_v2.types.Model.LabelsEntry]):
The labels associated with this model. You
can use these to organize and group your models.
Label keys and values can be no longer than 63
@@ -81,25 +77,27 @@ class Model(proto.Message):
Output only. The geographic location where
the model resides. This value is inherited from
the dataset.
- encryption_configuration (~.encryption_config.EncryptionConfiguration):
+ encryption_configuration (google.cloud.bigquery_v2.types.EncryptionConfiguration):
Custom encryption configuration (e.g., Cloud
KMS keys). This shows the encryption
configuration of the model data while stored in
BigQuery storage. This field can be used with
PatchModel to update encryption key for an
already encrypted model.
- model_type (~.gcb_model.Model.ModelType):
+ model_type (google.cloud.bigquery_v2.types.Model.ModelType):
Output only. Type of the model resource.
- training_runs (Sequence[~.gcb_model.Model.TrainingRun]):
+ training_runs (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun]):
Output only. Information for all training runs in increasing
order of start_time.
- feature_columns (Sequence[~.standard_sql.StandardSqlField]):
+ feature_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
Output only. Input feature columns that were
used to train this model.
- label_columns (Sequence[~.standard_sql.StandardSqlField]):
+ label_columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
Output only. Label columns that were used to train this
model. The output of the model will have a `predicted_`
prefix to these columns.
+ best_trial_id (int):
+ The best trial_id across all training runs.
"""
class ModelType(proto.Enum):
@@ -117,6 +115,7 @@ class ModelType(proto.Enum):
ARIMA = 11
AUTOML_REGRESSOR = 12
AUTOML_CLASSIFIER = 13
+ ARIMA_PLUS = 19
class LossType(proto.Enum):
r"""Loss metric to evaluate model training performance."""
@@ -155,6 +154,7 @@ class DataFrequency(proto.Enum):
WEEKLY = 5
DAILY = 6
HOURLY = 7
+ PER_MINUTE = 8
class HolidayRegion(proto.Enum):
r"""Type of supported holiday regions for time series forecasting
@@ -251,7 +251,7 @@ class FeedbackType(proto.Enum):
EXPLICIT = 2
class SeasonalPeriod(proto.Message):
- r""""""
+ r""" """
class SeasonalPeriodType(proto.Enum):
r""""""
@@ -264,7 +264,7 @@ class SeasonalPeriodType(proto.Enum):
YEARLY = 6
class KmeansEnums(proto.Message):
- r""""""
+ r""" """
class KmeansInitializationMethod(proto.Enum):
r"""Indicates the method used to initialize the centroids for
@@ -280,35 +280,33 @@ class RegressionMetrics(proto.Message):
matrix factorization models.
Attributes:
- mean_absolute_error (~.wrappers.DoubleValue):
+ mean_absolute_error (google.protobuf.wrappers_pb2.DoubleValue):
Mean absolute error.
- mean_squared_error (~.wrappers.DoubleValue):
+ mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue):
Mean squared error.
- mean_squared_log_error (~.wrappers.DoubleValue):
+ mean_squared_log_error (google.protobuf.wrappers_pb2.DoubleValue):
Mean squared log error.
- median_absolute_error (~.wrappers.DoubleValue):
+ median_absolute_error (google.protobuf.wrappers_pb2.DoubleValue):
Median absolute error.
- r_squared (~.wrappers.DoubleValue):
- R^2 score.
+ r_squared (google.protobuf.wrappers_pb2.DoubleValue):
+ R^2 score. This corresponds to r2_score in ML.EVALUATE.
"""
mean_absolute_error = proto.Field(
- proto.MESSAGE, number=1, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
)
-
mean_squared_error = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,
)
-
mean_squared_log_error = proto.Field(
- proto.MESSAGE, number=3, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue,
)
-
median_absolute_error = proto.Field(
- proto.MESSAGE, number=4, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue,
+ )
+ r_squared = proto.Field(
+ proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue,
)
-
- r_squared = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,)
class AggregateClassificationMetrics(proto.Message):
r"""Aggregate metrics for classification/classifier models. For
@@ -319,59 +317,65 @@ class AggregateClassificationMetrics(proto.Message):
by counting the total number of correctly predicted rows.
Attributes:
- precision (~.wrappers.DoubleValue):
+ precision (google.protobuf.wrappers_pb2.DoubleValue):
Precision is the fraction of actual positive
predictions that had positive actual labels. For
multiclass this is a macro-averaged metric
treating each class as a binary classifier.
- recall (~.wrappers.DoubleValue):
+ recall (google.protobuf.wrappers_pb2.DoubleValue):
Recall is the fraction of actual positive
labels that were given a positive prediction.
For multiclass this is a macro-averaged metric.
- accuracy (~.wrappers.DoubleValue):
+ accuracy (google.protobuf.wrappers_pb2.DoubleValue):
Accuracy is the fraction of predictions given
the correct label. For multiclass this is a
micro-averaged metric.
- threshold (~.wrappers.DoubleValue):
+ threshold (google.protobuf.wrappers_pb2.DoubleValue):
Threshold at which the metrics are computed.
For binary classification models this is the
positive class threshold. For multi-class
classfication models this is the confidence
threshold.
- f1_score (~.wrappers.DoubleValue):
+ f1_score (google.protobuf.wrappers_pb2.DoubleValue):
The F1 score is an average of recall and
precision. For multiclass this is a macro-
averaged metric.
- log_loss (~.wrappers.DoubleValue):
+ log_loss (google.protobuf.wrappers_pb2.DoubleValue):
Logarithmic Loss. For multiclass this is a
macro-averaged metric.
- roc_auc (~.wrappers.DoubleValue):
+ roc_auc (google.protobuf.wrappers_pb2.DoubleValue):
Area Under a ROC Curve. For multiclass this
is a macro-averaged metric.
"""
- precision = proto.Field(proto.MESSAGE, number=1, message=wrappers.DoubleValue,)
-
- recall = proto.Field(proto.MESSAGE, number=2, message=wrappers.DoubleValue,)
-
- accuracy = proto.Field(proto.MESSAGE, number=3, message=wrappers.DoubleValue,)
-
- threshold = proto.Field(proto.MESSAGE, number=4, message=wrappers.DoubleValue,)
-
- f1_score = proto.Field(proto.MESSAGE, number=5, message=wrappers.DoubleValue,)
-
- log_loss = proto.Field(proto.MESSAGE, number=6, message=wrappers.DoubleValue,)
-
- roc_auc = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,)
+ precision = proto.Field(
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
+ )
+ recall = proto.Field(proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,)
+ accuracy = proto.Field(
+ proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue,
+ )
+ threshold = proto.Field(
+ proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue,
+ )
+ f1_score = proto.Field(
+ proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue,
+ )
+ log_loss = proto.Field(
+ proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue,
+ )
+ roc_auc = proto.Field(
+ proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue,
+ )
class BinaryClassificationMetrics(proto.Message):
r"""Evaluation metrics for binary classification/classifier
models.
Attributes:
- aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics):
+ aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics):
Aggregate classification metrics.
- binary_confusion_matrix_list (Sequence[~.gcb_model.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]):
+ binary_confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics.BinaryConfusionMatrix]):
Binary confusion matrix at multiple
thresholds.
positive_label (str):
@@ -382,134 +386,119 @@ class BinaryClassificationMetrics(proto.Message):
class BinaryConfusionMatrix(proto.Message):
r"""Confusion matrix for binary classification models.
-
Attributes:
- positive_class_threshold (~.wrappers.DoubleValue):
+ positive_class_threshold (google.protobuf.wrappers_pb2.DoubleValue):
Threshold value used when computing each of
the following metric.
- true_positives (~.wrappers.Int64Value):
+ true_positives (google.protobuf.wrappers_pb2.Int64Value):
Number of true samples predicted as true.
- false_positives (~.wrappers.Int64Value):
+ false_positives (google.protobuf.wrappers_pb2.Int64Value):
Number of false samples predicted as true.
- true_negatives (~.wrappers.Int64Value):
+ true_negatives (google.protobuf.wrappers_pb2.Int64Value):
Number of true samples predicted as false.
- false_negatives (~.wrappers.Int64Value):
+ false_negatives (google.protobuf.wrappers_pb2.Int64Value):
Number of false samples predicted as false.
- precision (~.wrappers.DoubleValue):
+ precision (google.protobuf.wrappers_pb2.DoubleValue):
The fraction of actual positive predictions
that had positive actual labels.
- recall (~.wrappers.DoubleValue):
+ recall (google.protobuf.wrappers_pb2.DoubleValue):
The fraction of actual positive labels that
were given a positive prediction.
- f1_score (~.wrappers.DoubleValue):
+ f1_score (google.protobuf.wrappers_pb2.DoubleValue):
The equally weighted average of recall and
precision.
- accuracy (~.wrappers.DoubleValue):
+ accuracy (google.protobuf.wrappers_pb2.DoubleValue):
The fraction of predictions given the correct
label.
"""
positive_class_threshold = proto.Field(
- proto.MESSAGE, number=1, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
)
-
true_positives = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.Int64Value,
+ proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value,
)
-
false_positives = proto.Field(
- proto.MESSAGE, number=3, message=wrappers.Int64Value,
+ proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value,
)
-
true_negatives = proto.Field(
- proto.MESSAGE, number=4, message=wrappers.Int64Value,
+ proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value,
)
-
false_negatives = proto.Field(
- proto.MESSAGE, number=5, message=wrappers.Int64Value,
+ proto.MESSAGE, number=5, message=wrappers_pb2.Int64Value,
)
-
precision = proto.Field(
- proto.MESSAGE, number=6, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue,
+ )
+ recall = proto.Field(
+ proto.MESSAGE, number=7, message=wrappers_pb2.DoubleValue,
)
-
- recall = proto.Field(proto.MESSAGE, number=7, message=wrappers.DoubleValue,)
-
f1_score = proto.Field(
- proto.MESSAGE, number=8, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=8, message=wrappers_pb2.DoubleValue,
)
-
accuracy = proto.Field(
- proto.MESSAGE, number=9, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=9, message=wrappers_pb2.DoubleValue,
)
aggregate_classification_metrics = proto.Field(
proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics",
)
-
binary_confusion_matrix_list = proto.RepeatedField(
proto.MESSAGE,
number=2,
message="Model.BinaryClassificationMetrics.BinaryConfusionMatrix",
)
-
- positive_label = proto.Field(proto.STRING, number=3)
-
- negative_label = proto.Field(proto.STRING, number=4)
+ positive_label = proto.Field(proto.STRING, number=3,)
+ negative_label = proto.Field(proto.STRING, number=4,)
class MultiClassClassificationMetrics(proto.Message):
r"""Evaluation metrics for multi-class classification/classifier
models.
Attributes:
- aggregate_classification_metrics (~.gcb_model.Model.AggregateClassificationMetrics):
+ aggregate_classification_metrics (google.cloud.bigquery_v2.types.Model.AggregateClassificationMetrics):
Aggregate classification metrics.
- confusion_matrix_list (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix]):
+ confusion_matrix_list (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix]):
Confusion matrix at different thresholds.
"""
class ConfusionMatrix(proto.Message):
r"""Confusion matrix for multi-class classification models.
-
Attributes:
- confidence_threshold (~.wrappers.DoubleValue):
+ confidence_threshold (google.protobuf.wrappers_pb2.DoubleValue):
Confidence threshold used when computing the
entries of the confusion matrix.
- rows (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]):
+ rows (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Row]):
One row per actual label.
"""
class Entry(proto.Message):
r"""A single entry in the confusion matrix.
-
Attributes:
predicted_label (str):
The predicted label. For confidence_threshold > 0, we will
also add an entry indicating the number of items under the
confidence threshold.
- item_count (~.wrappers.Int64Value):
+ item_count (google.protobuf.wrappers_pb2.Int64Value):
Number of items being predicted as this
label.
"""
- predicted_label = proto.Field(proto.STRING, number=1)
-
+ predicted_label = proto.Field(proto.STRING, number=1,)
item_count = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.Int64Value,
+ proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value,
)
class Row(proto.Message):
r"""A single row in the confusion matrix.
-
Attributes:
actual_label (str):
The original label of this row.
- entries (Sequence[~.gcb_model.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]):
+ entries (Sequence[google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics.ConfusionMatrix.Entry]):
Info describing predicted label distribution.
"""
- actual_label = proto.Field(proto.STRING, number=1)
-
+ actual_label = proto.Field(proto.STRING, number=1,)
entries = proto.RepeatedField(
proto.MESSAGE,
number=2,
@@ -517,9 +506,8 @@ class Row(proto.Message):
)
confidence_threshold = proto.Field(
- proto.MESSAGE, number=1, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
)
-
rows = proto.RepeatedField(
proto.MESSAGE,
number=2,
@@ -529,7 +517,6 @@ class Row(proto.Message):
aggregate_classification_metrics = proto.Field(
proto.MESSAGE, number=1, message="Model.AggregateClassificationMetrics",
)
-
confusion_matrix_list = proto.RepeatedField(
proto.MESSAGE,
number=2,
@@ -538,49 +525,45 @@ class Row(proto.Message):
class ClusteringMetrics(proto.Message):
r"""Evaluation metrics for clustering models.
-
Attributes:
- davies_bouldin_index (~.wrappers.DoubleValue):
+ davies_bouldin_index (google.protobuf.wrappers_pb2.DoubleValue):
Davies-Bouldin index.
- mean_squared_distance (~.wrappers.DoubleValue):
+ mean_squared_distance (google.protobuf.wrappers_pb2.DoubleValue):
Mean of squared distances between each sample
to its cluster centroid.
- clusters (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster]):
- [Beta] Information for all clusters.
+ clusters (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster]):
+ Information for all clusters.
"""
class Cluster(proto.Message):
r"""Message containing the information about one cluster.
-
Attributes:
centroid_id (int):
Centroid id.
- feature_values (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue]):
+ feature_values (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue]):
Values of highly variant features for this
cluster.
- count (~.wrappers.Int64Value):
+ count (google.protobuf.wrappers_pb2.Int64Value):
Count of training data rows that were
assigned to this cluster.
"""
class FeatureValue(proto.Message):
r"""Representative value of a single feature within the cluster.
-
Attributes:
feature_column (str):
The feature column name.
- numerical_value (~.wrappers.DoubleValue):
+ numerical_value (google.protobuf.wrappers_pb2.DoubleValue):
The numerical feature value. This is the
centroid value for this feature.
- categorical_value (~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue):
+ categorical_value (google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue):
The categorical feature value.
"""
class CategoricalValue(proto.Message):
r"""Representative value of a categorical feature.
-
Attributes:
- category_counts (Sequence[~.gcb_model.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]):
+ category_counts (Sequence[google.cloud.bigquery_v2.types.Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount]):
Counts of all categories for the categorical feature. If
there are more than ten categories, we return top ten (by
count) and return one more CategoryCount with category
@@ -590,19 +573,17 @@ class CategoricalValue(proto.Message):
class CategoryCount(proto.Message):
r"""Represents the count of a single category within the cluster.
-
Attributes:
category (str):
The name of category.
- count (~.wrappers.Int64Value):
+ count (google.protobuf.wrappers_pb2.Int64Value):
The count of training samples matching the
category within the cluster.
"""
- category = proto.Field(proto.STRING, number=1)
-
+ category = proto.Field(proto.STRING, number=1,)
count = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.Int64Value,
+ proto.MESSAGE, number=2, message=wrappers_pb2.Int64Value,
)
category_counts = proto.RepeatedField(
@@ -611,15 +592,13 @@ class CategoryCount(proto.Message):
message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue.CategoryCount",
)
- feature_column = proto.Field(proto.STRING, number=1)
-
+ feature_column = proto.Field(proto.STRING, number=1,)
numerical_value = proto.Field(
proto.MESSAGE,
number=2,
oneof="value",
- message=wrappers.DoubleValue,
+ message=wrappers_pb2.DoubleValue,
)
-
categorical_value = proto.Field(
proto.MESSAGE,
number=3,
@@ -627,24 +606,22 @@ class CategoryCount(proto.Message):
message="Model.ClusteringMetrics.Cluster.FeatureValue.CategoricalValue",
)
- centroid_id = proto.Field(proto.INT64, number=1)
-
+ centroid_id = proto.Field(proto.INT64, number=1,)
feature_values = proto.RepeatedField(
proto.MESSAGE,
number=2,
message="Model.ClusteringMetrics.Cluster.FeatureValue",
)
-
- count = proto.Field(proto.MESSAGE, number=3, message=wrappers.Int64Value,)
+ count = proto.Field(
+ proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value,
+ )
davies_bouldin_index = proto.Field(
- proto.MESSAGE, number=1, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
)
-
mean_squared_distance = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,
)
-
clusters = proto.RepeatedField(
proto.MESSAGE, number=3, message="Model.ClusteringMetrics.Cluster",
)
@@ -654,53 +631,49 @@ class RankingMetrics(proto.Message):
feedback_type=implicit.
Attributes:
- mean_average_precision (~.wrappers.DoubleValue):
+ mean_average_precision (google.protobuf.wrappers_pb2.DoubleValue):
Calculates a precision per user for all the
items by ranking them and then averages all the
precisions across all the users.
- mean_squared_error (~.wrappers.DoubleValue):
+ mean_squared_error (google.protobuf.wrappers_pb2.DoubleValue):
Similar to the mean squared error computed in
regression and explicit recommendation models
except instead of computing the rating directly,
the output from evaluate is computed against a
preference which is 1 or 0 depending on if the
rating exists or not.
- normalized_discounted_cumulative_gain (~.wrappers.DoubleValue):
+ normalized_discounted_cumulative_gain (google.protobuf.wrappers_pb2.DoubleValue):
A metric to determine the goodness of a
ranking calculated from the predicted confidence
by comparing it to an ideal rank measured by the
original ratings.
- average_rank (~.wrappers.DoubleValue):
+ average_rank (google.protobuf.wrappers_pb2.DoubleValue):
Determines the goodness of a ranking by
computing the percentile rank from the predicted
confidence and dividing it by the original rank.
"""
mean_average_precision = proto.Field(
- proto.MESSAGE, number=1, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=1, message=wrappers_pb2.DoubleValue,
)
-
mean_squared_error = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,
)
-
normalized_discounted_cumulative_gain = proto.Field(
- proto.MESSAGE, number=3, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=3, message=wrappers_pb2.DoubleValue,
)
-
average_rank = proto.Field(
- proto.MESSAGE, number=4, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue,
)
class ArimaForecastingMetrics(proto.Message):
r"""Model evaluation metrics for ARIMA forecasting models.
-
Attributes:
- non_seasonal_order (Sequence[~.gcb_model.Model.ArimaOrder]):
+ non_seasonal_order (Sequence[google.cloud.bigquery_v2.types.Model.ArimaOrder]):
Non-seasonal order.
- arima_fitting_metrics (Sequence[~.gcb_model.Model.ArimaFittingMetrics]):
+ arima_fitting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics]):
Arima model fitting metrics.
- seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]):
+ seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
has_drift (Sequence[bool]):
@@ -709,7 +682,7 @@ class ArimaForecastingMetrics(proto.Message):
time_series_id (Sequence[str]):
Id to differentiate different time series for
the large-scale case.
- arima_single_model_forecasting_metrics (Sequence[~.gcb_model.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]):
+ arima_single_model_forecasting_metrics (Sequence[google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics.ArimaSingleModelForecastingMetrics]):
Repeated as there can be many metric sets
(one for each model) in auto-arima and the
large-scale case.
@@ -720,52 +693,72 @@ class ArimaSingleModelForecastingMetrics(proto.Message):
model.
Attributes:
- non_seasonal_order (~.gcb_model.Model.ArimaOrder):
+ non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder):
Non-seasonal order.
- arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics):
+ arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics):
Arima fitting metrics.
has_drift (bool):
Is arima model fitted with drift or not. It
is always false when d is not 1.
time_series_id (str):
- The id to indicate different time series.
- seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]):
+ The time_series_id value for this time series. It will be
+ one of the unique values from the time_series_id_column
+ specified during ARIMA model training. Only present when
+ time_series_id_column training option was used.
+ time_series_ids (Sequence[str]):
+ The tuple of time_series_ids identifying this time series.
+ It will be one of the unique tuples of values present in the
+ time_series_id_columns specified during ARIMA model
+ training. Only present when time_series_id_columns training
+ option was used and the order of values here are same as the
+ order of time_series_id_columns.
+ seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
+ has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue):
+ If true, holiday_effect is a part of time series
+ decomposition result.
+ has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
+ If true, spikes_and_dips is a part of time series
+ decomposition result.
+ has_step_changes (google.protobuf.wrappers_pb2.BoolValue):
+ If true, step_changes is a part of time series decomposition
+ result.
"""
non_seasonal_order = proto.Field(
proto.MESSAGE, number=1, message="Model.ArimaOrder",
)
-
arima_fitting_metrics = proto.Field(
proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics",
)
-
- has_drift = proto.Field(proto.BOOL, number=3)
-
- time_series_id = proto.Field(proto.STRING, number=4)
-
+ has_drift = proto.Field(proto.BOOL, number=3,)
+ time_series_id = proto.Field(proto.STRING, number=4,)
+ time_series_ids = proto.RepeatedField(proto.STRING, number=9,)
seasonal_periods = proto.RepeatedField(
proto.ENUM, number=5, enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
+ has_holiday_effect = proto.Field(
+ proto.MESSAGE, number=6, message=wrappers_pb2.BoolValue,
+ )
+ has_spikes_and_dips = proto.Field(
+ proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue,
+ )
+ has_step_changes = proto.Field(
+ proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue,
+ )
non_seasonal_order = proto.RepeatedField(
proto.MESSAGE, number=1, message="Model.ArimaOrder",
)
-
arima_fitting_metrics = proto.RepeatedField(
proto.MESSAGE, number=2, message="Model.ArimaFittingMetrics",
)
-
seasonal_periods = proto.RepeatedField(
proto.ENUM, number=3, enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
-
- has_drift = proto.RepeatedField(proto.BOOL, number=4)
-
- time_series_id = proto.RepeatedField(proto.STRING, number=5)
-
+ has_drift = proto.RepeatedField(proto.BOOL, number=4,)
+ time_series_id = proto.RepeatedField(proto.STRING, number=5,)
arima_single_model_forecasting_metrics = proto.RepeatedField(
proto.MESSAGE,
number=6,
@@ -779,50 +772,45 @@ class EvaluationMetrics(proto.Message):
imported models.
Attributes:
- regression_metrics (~.gcb_model.Model.RegressionMetrics):
+ regression_metrics (google.cloud.bigquery_v2.types.Model.RegressionMetrics):
Populated for regression models and explicit
feedback type matrix factorization models.
- binary_classification_metrics (~.gcb_model.Model.BinaryClassificationMetrics):
+ binary_classification_metrics (google.cloud.bigquery_v2.types.Model.BinaryClassificationMetrics):
Populated for binary
classification/classifier models.
- multi_class_classification_metrics (~.gcb_model.Model.MultiClassClassificationMetrics):
+ multi_class_classification_metrics (google.cloud.bigquery_v2.types.Model.MultiClassClassificationMetrics):
Populated for multi-class
classification/classifier models.
- clustering_metrics (~.gcb_model.Model.ClusteringMetrics):
+ clustering_metrics (google.cloud.bigquery_v2.types.Model.ClusteringMetrics):
Populated for clustering models.
- ranking_metrics (~.gcb_model.Model.RankingMetrics):
+ ranking_metrics (google.cloud.bigquery_v2.types.Model.RankingMetrics):
Populated for implicit feedback type matrix
factorization models.
- arima_forecasting_metrics (~.gcb_model.Model.ArimaForecastingMetrics):
+ arima_forecasting_metrics (google.cloud.bigquery_v2.types.Model.ArimaForecastingMetrics):
Populated for ARIMA models.
"""
regression_metrics = proto.Field(
proto.MESSAGE, number=1, oneof="metrics", message="Model.RegressionMetrics",
)
-
binary_classification_metrics = proto.Field(
proto.MESSAGE,
number=2,
oneof="metrics",
message="Model.BinaryClassificationMetrics",
)
-
multi_class_classification_metrics = proto.Field(
proto.MESSAGE,
number=3,
oneof="metrics",
message="Model.MultiClassClassificationMetrics",
)
-
clustering_metrics = proto.Field(
proto.MESSAGE, number=4, oneof="metrics", message="Model.ClusteringMetrics",
)
-
ranking_metrics = proto.Field(
proto.MESSAGE, number=5, oneof="metrics", message="Model.RankingMetrics",
)
-
arima_forecasting_metrics = proto.Field(
proto.MESSAGE,
number=6,
@@ -835,10 +823,10 @@ class DataSplitResult(proto.Message):
and evaluation data tables that were used to train the model.
Attributes:
- training_table (~.table_reference.TableReference):
+ training_table (google.cloud.bigquery_v2.types.TableReference):
Table reference of the training data after
split.
- evaluation_table (~.table_reference.TableReference):
+ evaluation_table (google.cloud.bigquery_v2.types.TableReference):
Table reference of the evaluation data after
split.
"""
@@ -846,7 +834,6 @@ class DataSplitResult(proto.Message):
training_table = proto.Field(
proto.MESSAGE, number=1, message=table_reference.TableReference,
)
-
evaluation_table = proto.Field(
proto.MESSAGE, number=2, message=table_reference.TableReference,
)
@@ -864,15 +851,12 @@ class ArimaOrder(proto.Message):
Order of the moving-average part.
"""
- p = proto.Field(proto.INT64, number=1)
-
- d = proto.Field(proto.INT64, number=2)
-
- q = proto.Field(proto.INT64, number=3)
+ p = proto.Field(proto.INT64, number=1,)
+ d = proto.Field(proto.INT64, number=2,)
+ q = proto.Field(proto.INT64, number=3,)
class ArimaFittingMetrics(proto.Message):
r"""ARIMA model fitting metrics.
-
Attributes:
log_likelihood (float):
Log-likelihood.
@@ -882,18 +866,16 @@ class ArimaFittingMetrics(proto.Message):
Variance.
"""
- log_likelihood = proto.Field(proto.DOUBLE, number=1)
-
- aic = proto.Field(proto.DOUBLE, number=2)
-
- variance = proto.Field(proto.DOUBLE, number=3)
+ log_likelihood = proto.Field(proto.DOUBLE, number=1,)
+ aic = proto.Field(proto.DOUBLE, number=2,)
+ variance = proto.Field(proto.DOUBLE, number=3,)
class GlobalExplanation(proto.Message):
r"""Global explanations containing the top most important
features after training.
Attributes:
- explanations (Sequence[~.gcb_model.Model.GlobalExplanation.Explanation]):
+ explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation.Explanation]):
A list of the top global explanations. Sorted
by absolute value of attribution in descending
order.
@@ -906,49 +888,45 @@ class GlobalExplanation(proto.Message):
class Explanation(proto.Message):
r"""Explanation for a single feature.
-
Attributes:
feature_name (str):
Full name of the feature. For non-numerical features, will
be formatted like ..
Overall size of feature name will always be truncated to
first 120 characters.
- attribution (~.wrappers.DoubleValue):
+ attribution (google.protobuf.wrappers_pb2.DoubleValue):
Attribution of feature.
"""
- feature_name = proto.Field(proto.STRING, number=1)
-
+ feature_name = proto.Field(proto.STRING, number=1,)
attribution = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,
)
explanations = proto.RepeatedField(
proto.MESSAGE, number=1, message="Model.GlobalExplanation.Explanation",
)
-
- class_label = proto.Field(proto.STRING, number=2)
+ class_label = proto.Field(proto.STRING, number=2,)
class TrainingRun(proto.Message):
r"""Information about a single training query run for the model.
-
Attributes:
- training_options (~.gcb_model.Model.TrainingRun.TrainingOptions):
+ training_options (google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions):
Options that were used for this training run,
includes user specified and default options that
were used.
- start_time (~.timestamp.Timestamp):
+ start_time (google.protobuf.timestamp_pb2.Timestamp):
The start time of this training run.
- results (Sequence[~.gcb_model.Model.TrainingRun.IterationResult]):
+ results (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult]):
Output of each iteration run, results.size() <=
max_iterations.
- evaluation_metrics (~.gcb_model.Model.EvaluationMetrics):
+ evaluation_metrics (google.cloud.bigquery_v2.types.Model.EvaluationMetrics):
The evaluation metrics over training/eval
data that were computed at the end of training.
- data_split_result (~.gcb_model.Model.DataSplitResult):
+ data_split_result (google.cloud.bigquery_v2.types.Model.DataSplitResult):
Data split result of the training run. Only
set when the input data is actually split.
- global_explanations (Sequence[~.gcb_model.Model.GlobalExplanation]):
+ global_explanations (Sequence[google.cloud.bigquery_v2.types.Model.GlobalExplanation]):
Global explanations for important features of
the model. For multi-class models, there is one
entry for each label class. For other models,
@@ -956,36 +934,35 @@ class TrainingRun(proto.Message):
"""
class TrainingOptions(proto.Message):
- r"""
-
+ r"""Options used in model training.
Attributes:
max_iterations (int):
The maximum number of iterations in training.
Used only for iterative training algorithms.
- loss_type (~.gcb_model.Model.LossType):
+ loss_type (google.cloud.bigquery_v2.types.Model.LossType):
Type of loss function used during training
run.
learn_rate (float):
Learning rate in training. Used only for
iterative training algorithms.
- l1_regularization (~.wrappers.DoubleValue):
+ l1_regularization (google.protobuf.wrappers_pb2.DoubleValue):
L1 regularization coefficient.
- l2_regularization (~.wrappers.DoubleValue):
+ l2_regularization (google.protobuf.wrappers_pb2.DoubleValue):
L2 regularization coefficient.
- min_relative_progress (~.wrappers.DoubleValue):
+ min_relative_progress (google.protobuf.wrappers_pb2.DoubleValue):
When early_stop is true, stops training when accuracy
improvement is less than 'min_relative_progress'. Used only
for iterative training algorithms.
- warm_start (~.wrappers.BoolValue):
+ warm_start (google.protobuf.wrappers_pb2.BoolValue):
Whether to train a model from the last
checkpoint.
- early_stop (~.wrappers.BoolValue):
+ early_stop (google.protobuf.wrappers_pb2.BoolValue):
Whether to stop early when the loss doesn't improve
significantly any more (compared to min_relative_progress).
Used only for iterative training algorithms.
input_label_columns (Sequence[str]):
Name of input label columns in training data.
- data_split_method (~.gcb_model.Model.DataSplitMethod):
+ data_split_method (google.cloud.bigquery_v2.types.Model.DataSplitMethod):
The data split type for training and
evaluation, e.g. RANDOM.
data_split_eval_fraction (float):
@@ -1007,13 +984,13 @@ class TrainingOptions(proto.Message):
and the rest are eval data. It respects the order in
Orderable data types:
https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#data-type-properties
- learn_rate_strategy (~.gcb_model.Model.LearnRateStrategy):
+ learn_rate_strategy (google.cloud.bigquery_v2.types.Model.LearnRateStrategy):
The strategy to determine learn rate for the
current iteration.
initial_learn_rate (float):
Specifies the initial learning rate for the
line search learn rate strategy.
- label_class_weights (Sequence[~.gcb_model.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]):
+ label_class_weights (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.TrainingOptions.LabelClassWeightsEntry]):
Weights associated with each label class, for
rebalancing the training data. Only applicable
for classification models.
@@ -1023,21 +1000,22 @@ class TrainingOptions(proto.Message):
item_column (str):
Item column specified for matrix
factorization models.
- distance_type (~.gcb_model.Model.DistanceType):
+ distance_type (google.cloud.bigquery_v2.types.Model.DistanceType):
Distance type for clustering models.
num_clusters (int):
Number of clusters for clustering models.
model_uri (str):
- [Beta] Google Cloud Storage URI from which the model was
- imported. Only applicable for imported models.
- optimization_strategy (~.gcb_model.Model.OptimizationStrategy):
+ Google Cloud Storage URI from which the model
+ was imported. Only applicable for imported
+ models.
+ optimization_strategy (google.cloud.bigquery_v2.types.Model.OptimizationStrategy):
Optimization strategy for training linear
regression models.
hidden_units (Sequence[int]):
Hidden units for dnn models.
batch_size (int):
Batch size for dnn models.
- dropout (~.wrappers.DoubleValue):
+ dropout (google.protobuf.wrappers_pb2.DoubleValue):
Dropout probability for dnn models.
max_tree_depth (int):
Maximum depth of a tree for boosted tree
@@ -1046,18 +1024,18 @@ class TrainingOptions(proto.Message):
Subsample fraction of the training data to
grow tree to prevent overfitting for boosted
tree models.
- min_split_loss (~.wrappers.DoubleValue):
+ min_split_loss (google.protobuf.wrappers_pb2.DoubleValue):
Minimum split loss for boosted tree models.
num_factors (int):
Num factors specified for matrix
factorization models.
- feedback_type (~.gcb_model.Model.FeedbackType):
+ feedback_type (google.cloud.bigquery_v2.types.Model.FeedbackType):
Feedback type that specifies which algorithm
to run for matrix factorization.
- wals_alpha (~.wrappers.DoubleValue):
+ wals_alpha (google.protobuf.wrappers_pb2.DoubleValue):
Hyperparameter for matrix factoration when
implicit feedback type is specified.
- kmeans_initialization_method (~.gcb_model.Model.KmeansEnums.KmeansInitializationMethod):
+ kmeans_initialization_method (google.cloud.bigquery_v2.types.Model.KmeansEnums.KmeansInitializationMethod):
The method used to initialize the centroids
for kmeans algorithm.
kmeans_initialization_column (str):
@@ -1071,23 +1049,26 @@ class TrainingOptions(proto.Message):
for ARIMA model.
auto_arima (bool):
Whether to enable auto ARIMA or not.
- non_seasonal_order (~.gcb_model.Model.ArimaOrder):
+ non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder):
A specification of the non-seasonal part of
the ARIMA model: the three components (p, d, q)
are the AR order, the degree of differencing,
and the MA order.
- data_frequency (~.gcb_model.Model.DataFrequency):
+ data_frequency (google.cloud.bigquery_v2.types.Model.DataFrequency):
The data frequency of a time series.
include_drift (bool):
Include drift when fitting an ARIMA model.
- holiday_region (~.gcb_model.Model.HolidayRegion):
+ holiday_region (google.cloud.bigquery_v2.types.Model.HolidayRegion):
The geographical region based on which the
holidays are considered in time series modeling.
If a valid value is specified, then holiday
effects modeling is enabled.
time_series_id_column (str):
- The id column that will be used to indicate
- different time series to forecast in parallel.
+ The time series id column that was used
+ during ARIMA model training.
+ time_series_id_columns (Sequence[str]):
+ The time series id columns that were used
+ during ARIMA model training.
horizon (int):
The number of periods ahead that need to be
forecasted.
@@ -1098,176 +1079,149 @@ class TrainingOptions(proto.Message):
output feature name is A.b.
auto_arima_max_order (int):
The max value of non-seasonal p and q.
+ decompose_time_series (google.protobuf.wrappers_pb2.BoolValue):
+ If true, perform decompose time series and
+ save the results.
+ clean_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
+ If true, clean spikes and dips in the input
+ time series.
+ adjust_step_changes (google.protobuf.wrappers_pb2.BoolValue):
+ If true, detect step changes and make data
+ adjustment in the input time series.
"""
- max_iterations = proto.Field(proto.INT64, number=1)
-
+ max_iterations = proto.Field(proto.INT64, number=1,)
loss_type = proto.Field(proto.ENUM, number=2, enum="Model.LossType",)
-
- learn_rate = proto.Field(proto.DOUBLE, number=3)
-
+ learn_rate = proto.Field(proto.DOUBLE, number=3,)
l1_regularization = proto.Field(
- proto.MESSAGE, number=4, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=4, message=wrappers_pb2.DoubleValue,
)
-
l2_regularization = proto.Field(
- proto.MESSAGE, number=5, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue,
)
-
min_relative_progress = proto.Field(
- proto.MESSAGE, number=6, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue,
)
-
warm_start = proto.Field(
- proto.MESSAGE, number=7, message=wrappers.BoolValue,
+ proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue,
)
-
early_stop = proto.Field(
- proto.MESSAGE, number=8, message=wrappers.BoolValue,
+ proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue,
)
-
- input_label_columns = proto.RepeatedField(proto.STRING, number=9)
-
+ input_label_columns = proto.RepeatedField(proto.STRING, number=9,)
data_split_method = proto.Field(
proto.ENUM, number=10, enum="Model.DataSplitMethod",
)
-
- data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11)
-
- data_split_column = proto.Field(proto.STRING, number=12)
-
+ data_split_eval_fraction = proto.Field(proto.DOUBLE, number=11,)
+ data_split_column = proto.Field(proto.STRING, number=12,)
learn_rate_strategy = proto.Field(
proto.ENUM, number=13, enum="Model.LearnRateStrategy",
)
-
- initial_learn_rate = proto.Field(proto.DOUBLE, number=16)
-
- label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17)
-
- user_column = proto.Field(proto.STRING, number=18)
-
- item_column = proto.Field(proto.STRING, number=19)
-
+ initial_learn_rate = proto.Field(proto.DOUBLE, number=16,)
+ label_class_weights = proto.MapField(proto.STRING, proto.DOUBLE, number=17,)
+ user_column = proto.Field(proto.STRING, number=18,)
+ item_column = proto.Field(proto.STRING, number=19,)
distance_type = proto.Field(
proto.ENUM, number=20, enum="Model.DistanceType",
)
-
- num_clusters = proto.Field(proto.INT64, number=21)
-
- model_uri = proto.Field(proto.STRING, number=22)
-
+ num_clusters = proto.Field(proto.INT64, number=21,)
+ model_uri = proto.Field(proto.STRING, number=22,)
optimization_strategy = proto.Field(
proto.ENUM, number=23, enum="Model.OptimizationStrategy",
)
-
- hidden_units = proto.RepeatedField(proto.INT64, number=24)
-
- batch_size = proto.Field(proto.INT64, number=25)
-
+ hidden_units = proto.RepeatedField(proto.INT64, number=24,)
+ batch_size = proto.Field(proto.INT64, number=25,)
dropout = proto.Field(
- proto.MESSAGE, number=26, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=26, message=wrappers_pb2.DoubleValue,
)
-
- max_tree_depth = proto.Field(proto.INT64, number=27)
-
- subsample = proto.Field(proto.DOUBLE, number=28)
-
+ max_tree_depth = proto.Field(proto.INT64, number=27,)
+ subsample = proto.Field(proto.DOUBLE, number=28,)
min_split_loss = proto.Field(
- proto.MESSAGE, number=29, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=29, message=wrappers_pb2.DoubleValue,
)
-
- num_factors = proto.Field(proto.INT64, number=30)
-
+ num_factors = proto.Field(proto.INT64, number=30,)
feedback_type = proto.Field(
proto.ENUM, number=31, enum="Model.FeedbackType",
)
-
wals_alpha = proto.Field(
- proto.MESSAGE, number=32, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=32, message=wrappers_pb2.DoubleValue,
)
-
kmeans_initialization_method = proto.Field(
proto.ENUM,
number=33,
enum="Model.KmeansEnums.KmeansInitializationMethod",
)
-
- kmeans_initialization_column = proto.Field(proto.STRING, number=34)
-
- time_series_timestamp_column = proto.Field(proto.STRING, number=35)
-
- time_series_data_column = proto.Field(proto.STRING, number=36)
-
- auto_arima = proto.Field(proto.BOOL, number=37)
-
+ kmeans_initialization_column = proto.Field(proto.STRING, number=34,)
+ time_series_timestamp_column = proto.Field(proto.STRING, number=35,)
+ time_series_data_column = proto.Field(proto.STRING, number=36,)
+ auto_arima = proto.Field(proto.BOOL, number=37,)
non_seasonal_order = proto.Field(
proto.MESSAGE, number=38, message="Model.ArimaOrder",
)
-
data_frequency = proto.Field(
proto.ENUM, number=39, enum="Model.DataFrequency",
)
-
- include_drift = proto.Field(proto.BOOL, number=41)
-
+ include_drift = proto.Field(proto.BOOL, number=41,)
holiday_region = proto.Field(
proto.ENUM, number=42, enum="Model.HolidayRegion",
)
-
- time_series_id_column = proto.Field(proto.STRING, number=43)
-
- horizon = proto.Field(proto.INT64, number=44)
-
- preserve_input_structs = proto.Field(proto.BOOL, number=45)
-
- auto_arima_max_order = proto.Field(proto.INT64, number=46)
+ time_series_id_column = proto.Field(proto.STRING, number=43,)
+ time_series_id_columns = proto.RepeatedField(proto.STRING, number=51,)
+ horizon = proto.Field(proto.INT64, number=44,)
+ preserve_input_structs = proto.Field(proto.BOOL, number=45,)
+ auto_arima_max_order = proto.Field(proto.INT64, number=46,)
+ decompose_time_series = proto.Field(
+ proto.MESSAGE, number=50, message=wrappers_pb2.BoolValue,
+ )
+ clean_spikes_and_dips = proto.Field(
+ proto.MESSAGE, number=52, message=wrappers_pb2.BoolValue,
+ )
+ adjust_step_changes = proto.Field(
+ proto.MESSAGE, number=53, message=wrappers_pb2.BoolValue,
+ )
class IterationResult(proto.Message):
r"""Information about a single iteration of the training run.
-
Attributes:
- index (~.wrappers.Int32Value):
+ index (google.protobuf.wrappers_pb2.Int32Value):
Index of the iteration, 0 based.
- duration_ms (~.wrappers.Int64Value):
+ duration_ms (google.protobuf.wrappers_pb2.Int64Value):
Time taken to run the iteration in
milliseconds.
- training_loss (~.wrappers.DoubleValue):
+ training_loss (google.protobuf.wrappers_pb2.DoubleValue):
Loss computed on the training data at the end
of iteration.
- eval_loss (~.wrappers.DoubleValue):
+ eval_loss (google.protobuf.wrappers_pb2.DoubleValue):
Loss computed on the eval data at the end of
iteration.
learn_rate (float):
Learn rate used for this iteration.
- cluster_infos (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ClusterInfo]):
+ cluster_infos (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ClusterInfo]):
Information about top clusters for clustering
models.
- arima_result (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult):
+ arima_result (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult):
"""
class ClusterInfo(proto.Message):
r"""Information about a single cluster for clustering model.
-
Attributes:
centroid_id (int):
Centroid id.
- cluster_radius (~.wrappers.DoubleValue):
+ cluster_radius (google.protobuf.wrappers_pb2.DoubleValue):
Cluster radius, the average distance from
centroid to each point assigned to the cluster.
- cluster_size (~.wrappers.Int64Value):
+ cluster_size (google.protobuf.wrappers_pb2.Int64Value):
Cluster size, the total number of points
assigned to the cluster.
"""
- centroid_id = proto.Field(proto.INT64, number=1)
-
+ centroid_id = proto.Field(proto.INT64, number=1,)
cluster_radius = proto.Field(
- proto.MESSAGE, number=2, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=2, message=wrappers_pb2.DoubleValue,
)
-
cluster_size = proto.Field(
- proto.MESSAGE, number=3, message=wrappers.Int64Value,
+ proto.MESSAGE, number=3, message=wrappers_pb2.Int64Value,
)
class ArimaResult(proto.Message):
@@ -1276,18 +1230,17 @@ class ArimaResult(proto.Message):
iteration results.
Attributes:
- arima_model_info (Sequence[~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]):
+ arima_model_info (Sequence[google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo]):
This message is repeated because there are
multiple arima models fitted in auto-arima. For
non-auto-arima model, its size is one.
- seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]):
+ seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
"""
class ArimaCoefficients(proto.Message):
r"""Arima coefficients.
-
Attributes:
auto_regressive_coefficients (Sequence[float]):
Auto-regressive coefficients, an array of
@@ -1301,93 +1254,109 @@ class ArimaCoefficients(proto.Message):
"""
auto_regressive_coefficients = proto.RepeatedField(
- proto.DOUBLE, number=1
+ proto.DOUBLE, number=1,
)
-
moving_average_coefficients = proto.RepeatedField(
- proto.DOUBLE, number=2
+ proto.DOUBLE, number=2,
)
-
- intercept_coefficient = proto.Field(proto.DOUBLE, number=3)
+ intercept_coefficient = proto.Field(proto.DOUBLE, number=3,)
class ArimaModelInfo(proto.Message):
r"""Arima model information.
-
Attributes:
- non_seasonal_order (~.gcb_model.Model.ArimaOrder):
+ non_seasonal_order (google.cloud.bigquery_v2.types.Model.ArimaOrder):
Non-seasonal order.
- arima_coefficients (~.gcb_model.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients):
+ arima_coefficients (google.cloud.bigquery_v2.types.Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients):
Arima coefficients.
- arima_fitting_metrics (~.gcb_model.Model.ArimaFittingMetrics):
+ arima_fitting_metrics (google.cloud.bigquery_v2.types.Model.ArimaFittingMetrics):
Arima fitting metrics.
has_drift (bool):
Whether Arima model fitted with drift or not.
It is always false when d is not 1.
time_series_id (str):
- The id to indicate different time series.
- seasonal_periods (Sequence[~.gcb_model.Model.SeasonalPeriod.SeasonalPeriodType]):
+ The time_series_id value for this time series. It will be
+ one of the unique values from the time_series_id_column
+ specified during ARIMA model training. Only present when
+ time_series_id_column training option was used.
+ time_series_ids (Sequence[str]):
+ The tuple of time_series_ids identifying this time series.
+ It will be one of the unique tuples of values present in the
+ time_series_id_columns specified during ARIMA model
+ training. Only present when time_series_id_columns training
+ option was used and the order of values here are same as the
+ order of time_series_id_columns.
+ seasonal_periods (Sequence[google.cloud.bigquery_v2.types.Model.SeasonalPeriod.SeasonalPeriodType]):
Seasonal periods. Repeated because multiple
periods are supported for one time series.
+ has_holiday_effect (google.protobuf.wrappers_pb2.BoolValue):
+ If true, holiday_effect is a part of time series
+ decomposition result.
+ has_spikes_and_dips (google.protobuf.wrappers_pb2.BoolValue):
+ If true, spikes_and_dips is a part of time series
+ decomposition result.
+ has_step_changes (google.protobuf.wrappers_pb2.BoolValue):
+ If true, step_changes is a part of time series decomposition
+ result.
"""
non_seasonal_order = proto.Field(
proto.MESSAGE, number=1, message="Model.ArimaOrder",
)
-
arima_coefficients = proto.Field(
proto.MESSAGE,
number=2,
message="Model.TrainingRun.IterationResult.ArimaResult.ArimaCoefficients",
)
-
arima_fitting_metrics = proto.Field(
proto.MESSAGE, number=3, message="Model.ArimaFittingMetrics",
)
-
- has_drift = proto.Field(proto.BOOL, number=4)
-
- time_series_id = proto.Field(proto.STRING, number=5)
-
+ has_drift = proto.Field(proto.BOOL, number=4,)
+ time_series_id = proto.Field(proto.STRING, number=5,)
+ time_series_ids = proto.RepeatedField(proto.STRING, number=10,)
seasonal_periods = proto.RepeatedField(
proto.ENUM,
number=6,
enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
+ has_holiday_effect = proto.Field(
+ proto.MESSAGE, number=7, message=wrappers_pb2.BoolValue,
+ )
+ has_spikes_and_dips = proto.Field(
+ proto.MESSAGE, number=8, message=wrappers_pb2.BoolValue,
+ )
+ has_step_changes = proto.Field(
+ proto.MESSAGE, number=9, message=wrappers_pb2.BoolValue,
+ )
arima_model_info = proto.RepeatedField(
proto.MESSAGE,
number=1,
message="Model.TrainingRun.IterationResult.ArimaResult.ArimaModelInfo",
)
-
seasonal_periods = proto.RepeatedField(
proto.ENUM,
number=2,
enum="Model.SeasonalPeriod.SeasonalPeriodType",
)
- index = proto.Field(proto.MESSAGE, number=1, message=wrappers.Int32Value,)
-
+ index = proto.Field(
+ proto.MESSAGE, number=1, message=wrappers_pb2.Int32Value,
+ )
duration_ms = proto.Field(
- proto.MESSAGE, number=4, message=wrappers.Int64Value,
+ proto.MESSAGE, number=4, message=wrappers_pb2.Int64Value,
)
-
training_loss = proto.Field(
- proto.MESSAGE, number=5, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=5, message=wrappers_pb2.DoubleValue,
)
-
eval_loss = proto.Field(
- proto.MESSAGE, number=6, message=wrappers.DoubleValue,
+ proto.MESSAGE, number=6, message=wrappers_pb2.DoubleValue,
)
-
- learn_rate = proto.Field(proto.DOUBLE, number=7)
-
+ learn_rate = proto.Field(proto.DOUBLE, number=7,)
cluster_infos = proto.RepeatedField(
proto.MESSAGE,
number=8,
message="Model.TrainingRun.IterationResult.ClusterInfo",
)
-
arima_result = proto.Field(
proto.MESSAGE,
number=9,
@@ -1397,65 +1366,49 @@ class ArimaModelInfo(proto.Message):
training_options = proto.Field(
proto.MESSAGE, number=1, message="Model.TrainingRun.TrainingOptions",
)
-
- start_time = proto.Field(proto.MESSAGE, number=8, message=timestamp.Timestamp,)
-
+ start_time = proto.Field(
+ proto.MESSAGE, number=8, message=timestamp_pb2.Timestamp,
+ )
results = proto.RepeatedField(
proto.MESSAGE, number=6, message="Model.TrainingRun.IterationResult",
)
-
evaluation_metrics = proto.Field(
proto.MESSAGE, number=7, message="Model.EvaluationMetrics",
)
-
data_split_result = proto.Field(
proto.MESSAGE, number=9, message="Model.DataSplitResult",
)
-
global_explanations = proto.RepeatedField(
proto.MESSAGE, number=10, message="Model.GlobalExplanation",
)
- etag = proto.Field(proto.STRING, number=1)
-
+ etag = proto.Field(proto.STRING, number=1,)
model_reference = proto.Field(
proto.MESSAGE, number=2, message=gcb_model_reference.ModelReference,
)
-
- creation_time = proto.Field(proto.INT64, number=5)
-
- last_modified_time = proto.Field(proto.INT64, number=6)
-
- description = proto.Field(proto.STRING, number=12)
-
- friendly_name = proto.Field(proto.STRING, number=14)
-
- labels = proto.MapField(proto.STRING, proto.STRING, number=15)
-
- expiration_time = proto.Field(proto.INT64, number=16)
-
- location = proto.Field(proto.STRING, number=13)
-
+ creation_time = proto.Field(proto.INT64, number=5,)
+ last_modified_time = proto.Field(proto.INT64, number=6,)
+ description = proto.Field(proto.STRING, number=12,)
+ friendly_name = proto.Field(proto.STRING, number=14,)
+ labels = proto.MapField(proto.STRING, proto.STRING, number=15,)
+ expiration_time = proto.Field(proto.INT64, number=16,)
+ location = proto.Field(proto.STRING, number=13,)
encryption_configuration = proto.Field(
proto.MESSAGE, number=17, message=encryption_config.EncryptionConfiguration,
)
-
model_type = proto.Field(proto.ENUM, number=7, enum=ModelType,)
-
training_runs = proto.RepeatedField(proto.MESSAGE, number=9, message=TrainingRun,)
-
feature_columns = proto.RepeatedField(
proto.MESSAGE, number=10, message=standard_sql.StandardSqlField,
)
-
label_columns = proto.RepeatedField(
proto.MESSAGE, number=11, message=standard_sql.StandardSqlField,
)
+ best_trial_id = proto.Field(proto.INT64, number=19,)
class GetModelRequest(proto.Message):
r"""
-
Attributes:
project_id (str):
Required. Project ID of the requested model.
@@ -1465,16 +1418,13 @@ class GetModelRequest(proto.Message):
Required. Model ID of the requested model.
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- model_id = proto.Field(proto.STRING, number=3)
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ model_id = proto.Field(proto.STRING, number=3,)
class PatchModelRequest(proto.Message):
r"""
-
Attributes:
project_id (str):
Required. Project ID of the model to patch.
@@ -1482,25 +1432,21 @@ class PatchModelRequest(proto.Message):
Required. Dataset ID of the model to patch.
model_id (str):
Required. Model ID of the model to patch.
- model (~.gcb_model.Model):
+ model (google.cloud.bigquery_v2.types.Model):
Required. Patched model.
Follows RFC5789 patch semantics. Missing fields
are not updated. To clear a field, explicitly
set to default value.
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- model_id = proto.Field(proto.STRING, number=3)
-
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ model_id = proto.Field(proto.STRING, number=3,)
model = proto.Field(proto.MESSAGE, number=4, message="Model",)
class DeleteModelRequest(proto.Message):
r"""
-
Attributes:
project_id (str):
Required. Project ID of the model to delete.
@@ -1510,22 +1456,19 @@ class DeleteModelRequest(proto.Message):
Required. Model ID of the model to delete.
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- model_id = proto.Field(proto.STRING, number=3)
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ model_id = proto.Field(proto.STRING, number=3,)
class ListModelsRequest(proto.Message):
r"""
-
Attributes:
project_id (str):
Required. Project ID of the models to list.
dataset_id (str):
Required. Dataset ID of the models to list.
- max_results (~.wrappers.UInt32Value):
+ max_results (google.protobuf.wrappers_pb2.UInt32Value):
The maximum number of results to return in a
single response page. Leverage the page tokens
to iterate through the entire collection.
@@ -1534,20 +1477,18 @@ class ListModelsRequest(proto.Message):
request the next page of results
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- max_results = proto.Field(proto.MESSAGE, number=3, message=wrappers.UInt32Value,)
-
- page_token = proto.Field(proto.STRING, number=4)
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ max_results = proto.Field(
+ proto.MESSAGE, number=3, message=wrappers_pb2.UInt32Value,
+ )
+ page_token = proto.Field(proto.STRING, number=4,)
class ListModelsResponse(proto.Message):
r"""
-
Attributes:
- models (Sequence[~.gcb_model.Model]):
+ models (Sequence[google.cloud.bigquery_v2.types.Model]):
Models in the requested dataset. Only the following fields
are populated: model_reference, model_type, creation_time,
last_modified_time and labels.
@@ -1560,8 +1501,7 @@ def raw_page(self):
return self
models = proto.RepeatedField(proto.MESSAGE, number=1, message="Model",)
-
- next_page_token = proto.Field(proto.STRING, number=2)
+ next_page_token = proto.Field(proto.STRING, number=2,)
__all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/bigquery_v2/types/model_reference.py b/google/cloud/bigquery_v2/types/model_reference.py
index e3891d6c1..a9ebad613 100644
--- a/google/cloud/bigquery_v2/types/model_reference.py
+++ b/google/cloud/bigquery_v2/types/model_reference.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
import proto # type: ignore
@@ -25,7 +23,6 @@
class ModelReference(proto.Message):
r"""Id path of a model.
-
Attributes:
project_id (str):
Required. The ID of the project containing
@@ -39,11 +36,9 @@ class ModelReference(proto.Message):
maximum length is 1,024 characters.
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- model_id = proto.Field(proto.STRING, number=3)
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ model_id = proto.Field(proto.STRING, number=3,)
__all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/bigquery_v2/types/standard_sql.py b/google/cloud/bigquery_v2/types/standard_sql.py
index 80e4632f7..7a845fc48 100644
--- a/google/cloud/bigquery_v2/types/standard_sql.py
+++ b/google/cloud/bigquery_v2/types/standard_sql.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,13 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
import proto # type: ignore
__protobuf__ = proto.module(
package="google.cloud.bigquery.v2",
- manifest={"StandardSqlDataType", "StandardSqlField", "StandardSqlStructType",},
+ manifest={
+ "StandardSqlDataType",
+ "StandardSqlField",
+ "StandardSqlStructType",
+ "StandardSqlTableType",
+ },
)
@@ -33,13 +36,13 @@ class StandardSqlDataType(proto.Message):
array_element_type="DATE"}} ]}}
Attributes:
- type_kind (~.standard_sql.StandardSqlDataType.TypeKind):
+ type_kind (google.cloud.bigquery_v2.types.StandardSqlDataType.TypeKind):
Required. The top level type of this field.
Can be any standard SQL data type (e.g.,
"INT64", "DATE", "ARRAY").
- array_element_type (~.standard_sql.StandardSqlDataType):
+ array_element_type (google.cloud.bigquery_v2.types.StandardSqlDataType):
The type of the array's elements, if type_kind = "ARRAY".
- struct_type (~.standard_sql.StandardSqlStructType):
+ struct_type (google.cloud.bigquery_v2.types.StandardSqlStructType):
The fields of this struct, in order, if type_kind =
"STRUCT".
"""
@@ -56,18 +59,18 @@ class TypeKind(proto.Enum):
DATE = 10
TIME = 20
DATETIME = 21
+ INTERVAL = 26
GEOGRAPHY = 22
NUMERIC = 23
BIGNUMERIC = 24
+ JSON = 25
ARRAY = 16
STRUCT = 17
type_kind = proto.Field(proto.ENUM, number=1, enum=TypeKind,)
-
array_element_type = proto.Field(
proto.MESSAGE, number=2, oneof="sub_type", message="StandardSqlDataType",
)
-
struct_type = proto.Field(
proto.MESSAGE, number=3, oneof="sub_type", message="StandardSqlStructType",
)
@@ -75,12 +78,11 @@ class TypeKind(proto.Enum):
class StandardSqlField(proto.Message):
r"""A field or a column.
-
Attributes:
name (str):
Optional. The name of this field. Can be
absent for struct fields.
- type (~.standard_sql.StandardSqlDataType):
+ type (google.cloud.bigquery_v2.types.StandardSqlDataType):
Optional. The type of this parameter. Absent
if not explicitly specified (e.g., CREATE
FUNCTION statement can omit the return type; in
@@ -88,20 +90,28 @@ class StandardSqlField(proto.Message):
this "type" field).
"""
- name = proto.Field(proto.STRING, number=1)
-
+ name = proto.Field(proto.STRING, number=1,)
type = proto.Field(proto.MESSAGE, number=2, message="StandardSqlDataType",)
class StandardSqlStructType(proto.Message):
r"""
-
Attributes:
- fields (Sequence[~.standard_sql.StandardSqlField]):
+ fields (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
"""
fields = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",)
+class StandardSqlTableType(proto.Message):
+ r"""A table type
+ Attributes:
+ columns (Sequence[google.cloud.bigquery_v2.types.StandardSqlField]):
+ The columns in this table type
+ """
+
+ columns = proto.RepeatedField(proto.MESSAGE, number=1, message="StandardSqlField",)
+
+
__all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/bigquery_v2/types/table_reference.py b/google/cloud/bigquery_v2/types/table_reference.py
index d213e8bb6..d56e5b09f 100644
--- a/google/cloud/bigquery_v2/types/table_reference.py
+++ b/google/cloud/bigquery_v2/types/table_reference.py
@@ -1,5 +1,4 @@
# -*- coding: utf-8 -*-
-
# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,7 +13,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-
import proto # type: ignore
@@ -25,7 +23,6 @@
class TableReference(proto.Message):
r"""
-
Attributes:
project_id (str):
Required. The ID of the project containing
@@ -39,13 +36,23 @@ class TableReference(proto.Message):
maximum length is 1,024 characters. Certain operations allow
suffixing of the table ID with a partition decorator, such
as ``sample_table$20190123``.
+ project_id_alternative (Sequence[str]):
+ The alternative field that will be used when ESF is not able
+ to translate the received data to the project_id field.
+ dataset_id_alternative (Sequence[str]):
+ The alternative field that will be used when ESF is not able
+ to translate the received data to the project_id field.
+ table_id_alternative (Sequence[str]):
+ The alternative field that will be used when ESF is not able
+ to translate the received data to the project_id field.
"""
- project_id = proto.Field(proto.STRING, number=1)
-
- dataset_id = proto.Field(proto.STRING, number=2)
-
- table_id = proto.Field(proto.STRING, number=3)
+ project_id = proto.Field(proto.STRING, number=1,)
+ dataset_id = proto.Field(proto.STRING, number=2,)
+ table_id = proto.Field(proto.STRING, number=3,)
+ project_id_alternative = proto.RepeatedField(proto.STRING, number=4,)
+ dataset_id_alternative = proto.RepeatedField(proto.STRING, number=5,)
+ table_id_alternative = proto.RepeatedField(proto.STRING, number=6,)
__all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/noxfile.py b/noxfile.py
index 942525ca9..9077924e9 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -21,6 +21,7 @@
import nox
+PYTYPE_VERSION = "pytype==2021.4.9"
BLACK_VERSION = "black==19.10b0"
BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py")
@@ -31,6 +32,7 @@
# 'docfx' is excluded since it only needs to run in 'docs-presubmit'
nox.options.sessions = [
+ "unit_noextras",
"unit",
"system",
"snippets",
@@ -38,11 +40,12 @@
"lint",
"lint_setup_py",
"blacken",
+ "pytype",
"docs",
]
-def default(session):
+def default(session, install_extras=True):
"""Default unit test session.
This is intended to be run **without** an interpreter set, so
@@ -65,7 +68,8 @@ def default(session):
constraints_path,
)
- session.install("-e", ".[all]", "-c", constraints_path)
+ install_target = ".[all]" if install_extras else "."
+ session.install("-e", install_target, "-c", constraints_path)
session.install("ipython", "-c", constraints_path)
@@ -73,8 +77,8 @@ def default(session):
session.run(
"py.test",
"--quiet",
- "--cov=google.cloud.bigquery",
- "--cov=tests.unit",
+ "--cov=google/cloud/bigquery",
+ "--cov=tests/unit",
"--cov-append",
"--cov-config=.coveragerc",
"--cov-report=",
@@ -90,6 +94,25 @@ def unit(session):
default(session)
+@nox.session(python=UNIT_TEST_PYTHON_VERSIONS[-1])
+def unit_noextras(session):
+ """Run the unit test suite."""
+ default(session, install_extras=False)
+
+
+@nox.session(python=DEFAULT_PYTHON_VERSION)
+def pytype(session):
+ """Run type checks."""
+ # An indirect dependecy attrs==21.1.0 breaks the check, and installing a less
+ # recent version avoids the error until a possibly better fix is found.
+ # https://github.com/googleapis/python-bigquery/issues/655
+ session.install("attrs==20.3.0")
+ session.install("-e", ".[all]")
+ session.install("ipython")
+ session.install(PYTYPE_VERSION)
+ session.run("pytype")
+
+
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
def system(session):
"""Run the system test suite."""
@@ -113,7 +136,14 @@ def system(session):
session.install(
"mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path
)
- session.install("google-cloud-storage", "-c", constraints_path)
+ if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true":
+ # mTLS test requires pyopenssl and latest google-cloud-storage
+ session.install("google-cloud-storage", "pyopenssl")
+ else:
+ session.install("google-cloud-storage", "-c", constraints_path)
+
+ # Data Catalog needed for the column ACL test with a real Policy Tag.
+ session.install("google-cloud-datacatalog", "-c", constraints_path)
session.install("-e", ".[all]", "-c", constraints_path)
session.install("ipython", "-c", constraints_path)
@@ -130,10 +160,6 @@ def snippets(session):
if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false":
session.skip("RUN_SNIPPETS_TESTS is set to false, skipping")
- # Sanity check: Only run snippets tests if the environment variable is set.
- if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""):
- session.skip("Credentials must be set via environment variable.")
-
constraints_path = str(
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
)
@@ -184,6 +210,7 @@ def prerelease_deps(session):
session.install("--pre", "grpcio", "pandas")
session.install(
"freezegun",
+ "google-cloud-datacatalog",
"google-cloud-storage",
"google-cloud-testutils",
"IPython",
@@ -230,15 +257,12 @@ def lint_setup_py(session):
session.run("python", "setup.py", "check", "--restructuredtext", "--strict")
-@nox.session(python="3.6")
+@nox.session(python=DEFAULT_PYTHON_VERSION)
def blacken(session):
"""Run black.
Format code to uniform standard.
-
- This currently uses Python 3.6 due to the automated Kokoro run of synthtool.
- That run uses an image that doesn't have 3.6 installed. Before updating this
- check the state of the `gcp_ubuntu_config` we use for that Kokoro run.
"""
+
session.install(BLACK_VERSION)
session.run("black", *BLACK_PATHS)
@@ -247,7 +271,7 @@ def blacken(session):
def docs(session):
"""Build the docs."""
- session.install("ipython", "recommonmark", "sphinx", "sphinx_rtd_theme")
+ session.install("ipython", "recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme")
session.install("google-cloud-storage")
session.install("-e", ".[all]")
@@ -271,7 +295,9 @@ def docfx(session):
"""Build the docfx yaml files for this library."""
session.install("-e", ".")
- session.install("sphinx", "alabaster", "recommonmark", "sphinx-docfx-yaml")
+ session.install(
+ "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml"
+ )
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
session.run(
diff --git a/owlbot.py b/owlbot.py
new file mode 100644
index 000000000..8664b658a
--- /dev/null
+++ b/owlbot.py
@@ -0,0 +1,172 @@
+# Copyright 2018 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This script is used to synthesize generated parts of this library."""
+import textwrap
+
+import synthtool as s
+from synthtool import gcp
+from synthtool.languages import python
+
+common = gcp.CommonTemplates()
+
+default_version = "v2"
+
+for library in s.get_staging_dirs(default_version):
+ # Do not expose ModelServiceClient and ModelServiceAsyncClient, as there
+ # is no public API endpoint for the models service.
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/__init__.py",
+ r"from \.services\.model_service import ModelServiceClient",
+ "",
+ )
+
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/__init__.py",
+ r"from \.services\.model_service import ModelServiceAsyncClient",
+ "",
+ )
+
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/__init__.py",
+ r"""["']ModelServiceClient["'],""",
+ "",
+ )
+
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/__init__.py",
+ r"""["']ModelServiceAsyncClient["'],""",
+ "",
+ )
+
+ # Adjust Model docstring so that Sphinx does not think that "predicted_" is
+ # a reference to something, issuing a false warning.
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/types/model.py",
+ r'will have a "predicted_"',
+ "will have a `predicted_`",
+ )
+
+ # Avoid breaking change due to change in field renames.
+ # https://github.com/googleapis/python-bigquery/issues/319
+ s.replace(
+ library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py",
+ r"type_ ",
+ "type ",
+ )
+
+ s.move(
+ library,
+ excludes=[
+ "*.tar.gz",
+ ".coveragerc",
+ "docs/index.rst",
+ f"docs/bigquery_{library.name}/*_service.rst",
+ f"docs/bigquery_{library.name}/services.rst",
+ "README.rst",
+ "noxfile.py",
+ "setup.py",
+ f"scripts/fixup_bigquery_{library.name}_keywords.py",
+ "google/cloud/bigquery/__init__.py",
+ "google/cloud/bigquery/py.typed",
+ # There are no public API endpoints for the generated ModelServiceClient,
+ # thus there's no point in generating it and its tests.
+ f"google/cloud/bigquery_{library.name}/services/**",
+ f"tests/unit/gapic/bigquery_{library.name}/**",
+ ],
+ )
+
+s.remove_staging_dirs()
+
+# ----------------------------------------------------------------------------
+# Add templated files
+# ----------------------------------------------------------------------------
+templated_files = common.py_library(
+ cov_level=100,
+ samples=True,
+ microgenerator=True,
+ split_system_tests=True,
+ intersphinx_dependencies={
+ "pandas": "http://pandas.pydata.org/pandas-docs/dev",
+ "geopandas": "https://geopandas.org/",
+ },
+)
+
+# BigQuery has a custom multiprocessing note
+s.move(
+ templated_files,
+ excludes=[
+ "noxfile.py",
+ "docs/multiprocessing.rst",
+ ".coveragerc",
+ # Include custom SNIPPETS_TESTS job for performance.
+ # https://github.com/googleapis/python-bigquery/issues/191
+ ".kokoro/presubmit/presubmit.cfg",
+ # Group all renovate PRs together. If this works well, remove this and
+ # update the shared templates (possibly with configuration option to
+ # py_library.)
+ "renovate.json",
+ ],
+)
+
+# ----------------------------------------------------------------------------
+# Samples templates
+# ----------------------------------------------------------------------------
+
+python.py_samples()
+
+s.replace(
+ "docs/conf.py",
+ r'\{"members": True\}',
+ '{"members": True, "inherited-members": True}',
+)
+
+# Tell Sphinx to ingore autogenerated docs files.
+s.replace(
+ "docs/conf.py",
+ r'"samples/snippets/README\.rst",',
+ '\\g<0>\n "bigquery_v2/services.rst", # generated by the code generator',
+)
+
+# ----------------------------------------------------------------------------
+# pytype-related changes
+# ----------------------------------------------------------------------------
+
+# Add .pytype to .gitignore
+s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype")
+
+# Add pytype config to setup.cfg
+s.replace(
+ "setup.cfg",
+ r"universal = 1",
+ textwrap.dedent(
+ """ \\g<0>
+
+ [pytype]
+ python_version = 3.8
+ inputs =
+ google/cloud/
+ exclude =
+ tests/
+ google/cloud/bigquery_v2/
+ output = .pytype/
+ disable =
+ # There's some issue with finding some pyi files, thus disabling.
+ # The issue https://github.com/google/pytype/issues/150 is closed, but the
+ # error still occurs for some reason.
+ pyi-error"""
+ ),
+)
+
+s.shell.run(["nox", "-s", "blacken"], hide_output=False)
diff --git a/renovate.json b/renovate.json
index 4fa949311..713c60bb4 100644
--- a/renovate.json
+++ b/renovate.json
@@ -1,5 +1,9 @@
{
"extends": [
- "config:base", ":preserveSemverRanges"
- ]
+ "config:base", "group:all", ":preserveSemverRanges"
+ ],
+ "ignorePaths": [".pre-commit-config.yaml"],
+ "pip_requirements": {
+ "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"]
+ }
}
diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py
index ca8eec0b5..41a27770e 100644
--- a/samples/client_query_w_timestamp_params.py
+++ b/samples/client_query_w_timestamp_params.py
@@ -18,7 +18,6 @@ def client_query_w_timestamp_params():
# [START bigquery_query_params_timestamps]
import datetime
- import pytz
from google.cloud import bigquery
# Construct a BigQuery client object.
@@ -30,7 +29,7 @@ def client_query_w_timestamp_params():
bigquery.ScalarQueryParameter(
"ts_value",
"TIMESTAMP",
- datetime.datetime(2016, 12, 7, 8, 0, tzinfo=pytz.UTC),
+ datetime.datetime(2016, 12, 7, 8, 0, tzinfo=datetime.timezone.utc),
)
]
)
diff --git a/samples/create_routine.py b/samples/create_routine.py
index 012c7927a..1cb4a80b4 100644
--- a/samples/create_routine.py
+++ b/samples/create_routine.py
@@ -22,7 +22,7 @@ def create_routine(routine_id):
# Construct a BigQuery client object.
client = bigquery.Client()
- # TODO(developer): Choose a fully-qualified ID for the routine.
+ # TODO(developer): Choose a fully qualified ID for the routine.
# routine_id = "my-project.my_dataset.my_routine"
routine = bigquery.Routine(
diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py
index ab2c49227..b008613f0 100644
--- a/samples/geography/noxfile.py
+++ b/samples/geography/noxfile.py
@@ -17,6 +17,7 @@
import os
from pathlib import Path
import sys
+from typing import Callable, Dict, List, Optional
import nox
@@ -27,8 +28,9 @@
# WARNING - WARNING - WARNING - WARNING - WARNING
# WARNING - WARNING - WARNING - WARNING - WARNING
-# Copy `noxfile_config.py` to your directory and modify it instead.
+BLACK_VERSION = "black==19.10b0"
+# Copy `noxfile_config.py` to your directory and modify it instead.
# `TEST_CONFIG` dict is a configuration hook that allows users to
# modify the test configurations. The values here should be in sync
@@ -37,7 +39,7 @@
TEST_CONFIG = {
# You can opt out from the test for specific Python versions.
- "ignored_versions": ["2.7"],
+ "ignored_versions": [],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
"enforce_type_hints": False,
@@ -47,6 +49,10 @@
# to use your own Cloud project.
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+ # If you need to use a specific version of pip,
+ # change pip_version_override to the string representation
+ # of the version number, for example, "20.2.4"
+ "pip_version_override": None,
# A dictionary you want to inject into your test. Don't put any
# secrets here. These values will override predefined values.
"envs": {},
@@ -65,7 +71,7 @@
TEST_CONFIG.update(TEST_CONFIG_OVERRIDE)
-def get_pytest_env_vars():
+def get_pytest_env_vars() -> Dict[str, str]:
"""Returns a dict for pytest invocation."""
ret = {}
@@ -80,21 +86,24 @@ def get_pytest_env_vars():
# DO NOT EDIT - automatically generated.
-# All versions used to tested samples.
-ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"]
+# All versions used to test samples.
+ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
-INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False))
+INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in (
+ "True",
+ "true",
+)
#
# Style Checks
#
-def _determine_local_import_names(start_dir):
+def _determine_local_import_names(start_dir: str) -> List[str]:
"""Determines all import names that should be considered "local".
This is used when running the linter to insure that import order is
@@ -132,7 +141,7 @@ def _determine_local_import_names(start_dir):
@nox.session
-def lint(session):
+def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG["enforce_type_hints"]:
session.install("flake8", "flake8-import-order")
else:
@@ -153,8 +162,8 @@ def lint(session):
@nox.session
-def blacken(session):
- session.install("black")
+def blacken(session: nox.sessions.Session) -> None:
+ session.install(BLACK_VERSION)
python_files = [path for path in os.listdir(".") if path.endswith(".py")]
session.run("black", *python_files)
@@ -168,13 +177,24 @@ def blacken(session):
PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
-def _session_tests(session, post_install=None):
+def _session_tests(
+ session: nox.sessions.Session, post_install: Callable = None
+) -> None:
+ if TEST_CONFIG["pip_version_override"]:
+ pip_version = TEST_CONFIG["pip_version_override"]
+ session.install(f"pip=={pip_version}")
"""Runs py.test for a particular project."""
if os.path.exists("requirements.txt"):
- session.install("-r", "requirements.txt")
+ if os.path.exists("constraints.txt"):
+ session.install("-r", "requirements.txt", "-c", "constraints.txt")
+ else:
+ session.install("-r", "requirements.txt")
if os.path.exists("requirements-test.txt"):
- session.install("-r", "requirements-test.txt")
+ if os.path.exists("constraints-test.txt"):
+ session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt")
+ else:
+ session.install("-r", "requirements-test.txt")
if INSTALL_LIBRARY_FROM_SOURCE:
session.install("-e", _get_repo_root())
@@ -189,12 +209,12 @@ def _session_tests(session, post_install=None):
# on travis where slow and flaky tests are excluded.
# See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
success_codes=[0, 5],
- env=get_pytest_env_vars()
+ env=get_pytest_env_vars(),
)
@nox.session(python=ALL_VERSIONS)
-def py(session):
+def py(session: nox.sessions.Session) -> None:
"""Runs py.test for a sample using the specified version of Python."""
if session.python in TESTED_VERSIONS:
_session_tests(session)
@@ -209,7 +229,7 @@ def py(session):
#
-def _get_repo_root():
+def _get_repo_root() -> Optional[str]:
""" Returns the root folder of the project. """
# Get root of this repository. Assume we don't have directories nested deeper than 10 items.
p = Path(os.getcwd())
@@ -232,7 +252,7 @@ def _get_repo_root():
@nox.session
@nox.parametrize("path", GENERATED_READMES)
-def readmegen(session, path):
+def readmegen(session: nox.sessions.Session, path: str) -> None:
"""(Re-)generates the readme for a sample."""
session.install("jinja2", "pyyaml")
dir_ = os.path.dirname(path)
diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt
index 676ff949e..b0cf76724 100644
--- a/samples/geography/requirements-test.txt
+++ b/samples/geography/requirements-test.txt
@@ -1,2 +1,2 @@
-pytest==5.4.3
-mock==4.0.2
+pytest==6.2.4
+mock==4.0.3
diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index 338cf2e89..b5fe247cb 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -1,3 +1,50 @@
+attrs==21.2.0
+cachetools==4.2.2
+certifi==2021.5.30
+cffi==1.14.6
+charset-normalizer==2.0.4
+click==8.0.1
+click-plugins==1.1.1
+cligj==0.7.2
+dataclasses==0.6; python_version < '3.7'
+Fiona==1.8.20
geojson==2.5.0
-google-cloud-bigquery==2.6.2
+geopandas==0.9.0
+google-api-core==1.31.2
+google-auth==1.35.0
+google-cloud-bigquery==2.25.0
+google-cloud-bigquery-storage==2.6.3
+google-cloud-core==2.0.0
+google-crc32c==1.1.2
+google-resumable-media==1.3.3
+googleapis-common-protos==1.53.0
+grpcio==1.39.0
+idna==3.2
+importlib-metadata==4.6.4
+libcst==0.3.20
+munch==2.5.0
+mypy-extensions==0.4.3
+numpy==1.19.5; python_version < "3.7"
+numpy==1.21.2; python_version > "3.6"
+packaging==21.0
+pandas==1.1.5; python_version < '3.7'
+pandas==1.3.2; python_version >= '3.7'
+proto-plus==1.19.0
+protobuf==3.17.3
+pyarrow==5.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.20
+pyparsing==2.4.7
+pyproj==3.0.1
+python-dateutil==2.8.2
+pytz==2021.1
+PyYAML==5.4.1
+requests==2.26.0
+rsa==4.7.2
Shapely==1.7.1
+six==1.16.0
+typing-extensions==3.10.0.0
+typing-inspect==0.7.1
+urllib3==1.26.6
+zipp==3.5.0
diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py
new file mode 100644
index 000000000..fa8073fef
--- /dev/null
+++ b/samples/geography/to_geodataframe.py
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+client = bigquery.Client()
+
+
+def get_austin_service_requests_as_geography():
+ # [START bigquery_query_results_geodataframe]
+
+ sql = """
+ SELECT created_date, complaint_description,
+ ST_GEOGPOINT(longitude, latitude) as location
+ FROM bigquery-public-data.austin_311.311_service_requests
+ LIMIT 10
+ """
+
+ df = client.query(sql).to_geodataframe()
+ # [END bigquery_query_results_geodataframe]
+ return df
diff --git a/samples/geography/to_geodataframe_test.py b/samples/geography/to_geodataframe_test.py
new file mode 100644
index 000000000..7a2ba6937
--- /dev/null
+++ b/samples/geography/to_geodataframe_test.py
@@ -0,0 +1,25 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from .to_geodataframe import get_austin_service_requests_as_geography
+
+
+def test_get_austin_service_requests_as_geography():
+ geopandas = pytest.importorskip("geopandas")
+ df = get_austin_service_requests_as_geography()
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert len(list(df)) == 3 # verify the number of columns
+ assert len(df) == 10 # verify the number of rows
diff --git a/samples/snippets/authenticate_service_account.py b/samples/snippets/authenticate_service_account.py
index 58cd2b542..c07848bee 100644
--- a/samples/snippets/authenticate_service_account.py
+++ b/samples/snippets/authenticate_service_account.py
@@ -30,6 +30,11 @@ def main():
key_path, scopes=["https://www.googleapis.com/auth/cloud-platform"],
)
+ # Alternatively, use service_account.Credentials.from_service_account_info()
+ # to set credentials directly via a json object rather than set a filepath
+ # TODO(developer): Set key_json to the content of the service account key file.
+ # credentials = service_account.Credentials.from_service_account_info(key_json)
+
client = bigquery.Client(credentials=credentials, project=credentials.project_id,)
# [END bigquery_client_json_credentials]
return client
diff --git a/samples/snippets/conftest.py b/samples/snippets/conftest.py
index d22a33318..74984f902 100644
--- a/samples/snippets/conftest.py
+++ b/samples/snippets/conftest.py
@@ -14,6 +14,19 @@
from google.cloud import bigquery
import pytest
+import test_utils.prefixer
+
+
+prefixer = test_utils.prefixer.Prefixer("python-bigquery", "samples/snippets")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_datasets(bigquery_client: bigquery.Client):
+ for dataset in bigquery_client.list_datasets():
+ if prefixer.should_cleanup(dataset.dataset_id):
+ bigquery_client.delete_dataset(
+ dataset, delete_contents=True, not_found_ok=True
+ )
@pytest.fixture(scope="session")
@@ -25,3 +38,54 @@ def bigquery_client():
@pytest.fixture(scope="session")
def project_id(bigquery_client):
return bigquery_client.project
+
+
+@pytest.fixture(scope="session")
+def dataset_id(bigquery_client: bigquery.Client, project_id: str):
+ dataset_id = prefixer.create_prefix()
+ full_dataset_id = f"{project_id}.{dataset_id}"
+ dataset = bigquery.Dataset(full_dataset_id)
+ bigquery_client.create_dataset(dataset)
+ yield dataset_id
+ bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
+
+
+@pytest.fixture(scope="session")
+def dataset_id_us_east1(bigquery_client: bigquery.Client, project_id: str):
+ dataset_id = prefixer.create_prefix()
+ full_dataset_id = f"{project_id}.{dataset_id}"
+ dataset = bigquery.Dataset(full_dataset_id)
+ dataset.location = "us-east1"
+ bigquery_client.create_dataset(dataset)
+ yield dataset_id
+ bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True)
+
+
+@pytest.fixture(scope="session")
+def table_id_us_east1(
+ bigquery_client: bigquery.Client, project_id: str, dataset_id_us_east1: str
+):
+ table_id = prefixer.create_prefix()
+ full_table_id = f"{project_id}.{dataset_id_us_east1}.{table_id}"
+ table = bigquery.Table(
+ full_table_id, schema=[bigquery.SchemaField("string_col", "STRING")]
+ )
+ bigquery_client.create_table(table)
+ yield full_table_id
+ bigquery_client.delete_table(table, not_found_ok=True)
+
+
+@pytest.fixture
+def random_table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
+ """Create a new table ID each time, so random_table_id can be used as
+ target for load jobs.
+ """
+ random_table_id = prefixer.create_prefix()
+ full_table_id = f"{project_id}.{dataset_id}.{random_table_id}"
+ yield full_table_id
+ bigquery_client.delete_table(full_table_id, not_found_ok=True)
+
+
+@pytest.fixture
+def bigquery_client_patch(monkeypatch, bigquery_client):
+ monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py
new file mode 100644
index 000000000..abed0c90d
--- /dev/null
+++ b/samples/snippets/delete_job.py
@@ -0,0 +1,44 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def delete_job_metadata(job_id: str, location: str):
+ orig_job_id = job_id
+ orig_location = location
+ # [START bigquery_delete_job]
+ from google.cloud import bigquery
+ from google.api_core import exceptions
+
+ # TODO(developer): Set the job ID to the ID of the job whose metadata you
+ # wish to delete.
+ job_id = "abcd-efgh-ijkl-mnop"
+
+ # TODO(developer): Set the location to the region or multi-region
+ # containing the job.
+ location = "us-east1"
+
+ # [END bigquery_delete_job]
+ job_id = orig_job_id
+ location = orig_location
+
+ # [START bigquery_delete_job]
+ client = bigquery.Client()
+
+ client.delete_job_metadata(job_id, location=location)
+
+ try:
+ client.get_job(job_id, location=location)
+ except exceptions.NotFound:
+ print(f"Job metadata for job {location}:{job_id} was deleted.")
+ # [END bigquery_delete_job]
diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py
new file mode 100644
index 000000000..c9baa817d
--- /dev/null
+++ b/samples/snippets/delete_job_test.py
@@ -0,0 +1,33 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+import delete_job
+
+
+def test_delete_job_metadata(
+ capsys, bigquery_client: bigquery.Client, table_id_us_east1: str
+):
+ query_job: bigquery.QueryJob = bigquery_client.query(
+ f"SELECT COUNT(*) FROM `{table_id_us_east1}`", location="us-east1",
+ )
+ query_job.result()
+ assert query_job.job_id is not None
+
+ delete_job.delete_job_metadata(query_job.job_id, "us-east1")
+
+ out, _ = capsys.readouterr()
+ assert "deleted" in out
+ assert f"us-east1:{query_job.job_id}" in out
diff --git a/samples/snippets/load_table_uri_firestore.py b/samples/snippets/load_table_uri_firestore.py
new file mode 100644
index 000000000..bf9d01349
--- /dev/null
+++ b/samples/snippets/load_table_uri_firestore.py
@@ -0,0 +1,55 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def load_table_uri_firestore(table_id):
+ orig_table_id = table_id
+ # [START bigquery_load_table_gcs_firestore]
+ # TODO(developer): Set table_id to the ID of the table to create.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # TODO(developer): Set uri to the path of the kind export metadata
+ uri = (
+ "gs://cloud-samples-data/bigquery/us-states"
+ "/2021-07-02T16:04:48_70344/all_namespaces/kind_us-states"
+ "/all_namespaces_kind_us-states.export_metadata"
+ )
+
+ # TODO(developer): Set projection_fields to a list of document properties
+ # to import. Leave unset or set to `None` for all fields.
+ projection_fields = ["name", "post_abbr"]
+
+ # [END bigquery_load_table_gcs_firestore]
+ table_id = orig_table_id
+
+ # [START bigquery_load_table_gcs_firestore]
+ from google.cloud import bigquery
+
+ # Construct a BigQuery client object.
+ client = bigquery.Client()
+
+ job_config = bigquery.LoadJobConfig(
+ source_format=bigquery.SourceFormat.DATASTORE_BACKUP,
+ projection_fields=projection_fields,
+ )
+
+ load_job = client.load_table_from_uri(
+ uri, table_id, job_config=job_config
+ ) # Make an API request.
+
+ load_job.result() # Waits for the job to complete.
+
+ destination_table = client.get_table(table_id)
+ print("Loaded {} rows.".format(destination_table.num_rows))
+ # [END bigquery_load_table_gcs_firestore]
diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py
new file mode 100644
index 000000000..ffa02cdf9
--- /dev/null
+++ b/samples/snippets/load_table_uri_firestore_test.py
@@ -0,0 +1,21 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import load_table_uri_firestore
+
+
+def test_load_table_uri_firestore(capsys, random_table_id):
+ load_table_uri_firestore.load_table_uri_firestore(random_table_id)
+ out, _ = capsys.readouterr()
+ assert "Loaded 50 rows." in out
diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py
index ab2c49227..b008613f0 100644
--- a/samples/snippets/noxfile.py
+++ b/samples/snippets/noxfile.py
@@ -17,6 +17,7 @@
import os
from pathlib import Path
import sys
+from typing import Callable, Dict, List, Optional
import nox
@@ -27,8 +28,9 @@
# WARNING - WARNING - WARNING - WARNING - WARNING
# WARNING - WARNING - WARNING - WARNING - WARNING
-# Copy `noxfile_config.py` to your directory and modify it instead.
+BLACK_VERSION = "black==19.10b0"
+# Copy `noxfile_config.py` to your directory and modify it instead.
# `TEST_CONFIG` dict is a configuration hook that allows users to
# modify the test configurations. The values here should be in sync
@@ -37,7 +39,7 @@
TEST_CONFIG = {
# You can opt out from the test for specific Python versions.
- "ignored_versions": ["2.7"],
+ "ignored_versions": [],
# Old samples are opted out of enforcing Python type hints
# All new samples should feature them
"enforce_type_hints": False,
@@ -47,6 +49,10 @@
# to use your own Cloud project.
"gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
# 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+ # If you need to use a specific version of pip,
+ # change pip_version_override to the string representation
+ # of the version number, for example, "20.2.4"
+ "pip_version_override": None,
# A dictionary you want to inject into your test. Don't put any
# secrets here. These values will override predefined values.
"envs": {},
@@ -65,7 +71,7 @@
TEST_CONFIG.update(TEST_CONFIG_OVERRIDE)
-def get_pytest_env_vars():
+def get_pytest_env_vars() -> Dict[str, str]:
"""Returns a dict for pytest invocation."""
ret = {}
@@ -80,21 +86,24 @@ def get_pytest_env_vars():
# DO NOT EDIT - automatically generated.
-# All versions used to tested samples.
-ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"]
+# All versions used to test samples.
+ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
-INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False))
+INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in (
+ "True",
+ "true",
+)
#
# Style Checks
#
-def _determine_local_import_names(start_dir):
+def _determine_local_import_names(start_dir: str) -> List[str]:
"""Determines all import names that should be considered "local".
This is used when running the linter to insure that import order is
@@ -132,7 +141,7 @@ def _determine_local_import_names(start_dir):
@nox.session
-def lint(session):
+def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG["enforce_type_hints"]:
session.install("flake8", "flake8-import-order")
else:
@@ -153,8 +162,8 @@ def lint(session):
@nox.session
-def blacken(session):
- session.install("black")
+def blacken(session: nox.sessions.Session) -> None:
+ session.install(BLACK_VERSION)
python_files = [path for path in os.listdir(".") if path.endswith(".py")]
session.run("black", *python_files)
@@ -168,13 +177,24 @@ def blacken(session):
PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
-def _session_tests(session, post_install=None):
+def _session_tests(
+ session: nox.sessions.Session, post_install: Callable = None
+) -> None:
+ if TEST_CONFIG["pip_version_override"]:
+ pip_version = TEST_CONFIG["pip_version_override"]
+ session.install(f"pip=={pip_version}")
"""Runs py.test for a particular project."""
if os.path.exists("requirements.txt"):
- session.install("-r", "requirements.txt")
+ if os.path.exists("constraints.txt"):
+ session.install("-r", "requirements.txt", "-c", "constraints.txt")
+ else:
+ session.install("-r", "requirements.txt")
if os.path.exists("requirements-test.txt"):
- session.install("-r", "requirements-test.txt")
+ if os.path.exists("constraints-test.txt"):
+ session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt")
+ else:
+ session.install("-r", "requirements-test.txt")
if INSTALL_LIBRARY_FROM_SOURCE:
session.install("-e", _get_repo_root())
@@ -189,12 +209,12 @@ def _session_tests(session, post_install=None):
# on travis where slow and flaky tests are excluded.
# See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
success_codes=[0, 5],
- env=get_pytest_env_vars()
+ env=get_pytest_env_vars(),
)
@nox.session(python=ALL_VERSIONS)
-def py(session):
+def py(session: nox.sessions.Session) -> None:
"""Runs py.test for a sample using the specified version of Python."""
if session.python in TESTED_VERSIONS:
_session_tests(session)
@@ -209,7 +229,7 @@ def py(session):
#
-def _get_repo_root():
+def _get_repo_root() -> Optional[str]:
""" Returns the root folder of the project. """
# Get root of this repository. Assume we don't have directories nested deeper than 10 items.
p = Path(os.getcwd())
@@ -232,7 +252,7 @@ def _get_repo_root():
@nox.session
@nox.parametrize("path", GENERATED_READMES)
-def readmegen(session, path):
+def readmegen(session: nox.sessions.Session, path: str) -> None:
"""(Re-)generates the readme for a sample."""
session.install("jinja2", "pyyaml")
dir_ = os.path.dirname(path)
diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt
index 676ff949e..b8dee50d0 100644
--- a/samples/snippets/requirements-test.txt
+++ b/samples/snippets/requirements-test.txt
@@ -1,2 +1,3 @@
-pytest==5.4.3
-mock==4.0.2
+google-cloud-testutils==1.0.0
+pytest==6.2.4
+mock==4.0.3
diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt
index 599b6d52f..d75c747fb 100644
--- a/samples/snippets/requirements.txt
+++ b/samples/snippets/requirements.txt
@@ -1,11 +1,12 @@
-google-cloud-bigquery==2.6.2
-google-cloud-bigquery-storage==2.2.1
-google-auth-oauthlib==0.4.2
-grpcio==1.35.0
+google-cloud-bigquery==2.25.0
+google-cloud-bigquery-storage==2.6.3
+google-auth-oauthlib==0.4.5
+grpcio==1.39.0
ipython==7.16.1; python_version < '3.7'
ipython==7.17.0; python_version >= '3.7'
-matplotlib==3.3.3
+matplotlib==3.3.4; python_version < '3.7'
+matplotlib==3.4.1; python_version >= '3.7'
pandas==1.1.5; python_version < '3.7'
-pandas==1.2.0; python_version >= '3.7'
-pyarrow==2.0.0
-pytz==2020.5
+pandas==1.3.2; python_version >= '3.7'
+pyarrow==5.0.0
+pytz==2021.1
diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py
new file mode 100644
index 000000000..912fd76e2
--- /dev/null
+++ b/samples/snippets/test_update_with_dml.py
@@ -0,0 +1,36 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+import pytest
+
+from conftest import prefixer
+import update_with_dml
+
+
+@pytest.fixture
+def table_id(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
+ table_id = f"{prefixer.create_prefix()}_update_with_dml"
+ yield table_id
+ full_table_id = f"{project_id}.{dataset_id}.{table_id}"
+ bigquery_client.delete_table(full_table_id, not_found_ok=True)
+
+
+def test_update_with_dml(bigquery_client_patch, dataset_id, table_id):
+ override_values = {
+ "dataset_id": dataset_id,
+ "table_id": table_id,
+ }
+ num_rows = update_with_dml.run_sample(override_values=override_values)
+ assert num_rows > 0
diff --git a/samples/snippets/update_with_dml.py b/samples/snippets/update_with_dml.py
new file mode 100644
index 000000000..7fd09dd80
--- /dev/null
+++ b/samples/snippets/update_with_dml.py
@@ -0,0 +1,82 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# [START bigquery_update_with_dml]
+import pathlib
+
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
+
+
+def load_from_newline_delimited_json(
+ client: bigquery.Client,
+ filepath: pathlib.Path,
+ project_id: str,
+ dataset_id: str,
+ table_id: str,
+):
+ full_table_id = f"{project_id}.{dataset_id}.{table_id}"
+ job_config = bigquery.LoadJobConfig()
+ job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+ job_config.schema = [
+ bigquery.SchemaField("id", enums.SqlTypeNames.STRING),
+ bigquery.SchemaField("user_id", enums.SqlTypeNames.INTEGER),
+ bigquery.SchemaField("login_time", enums.SqlTypeNames.TIMESTAMP),
+ bigquery.SchemaField("logout_time", enums.SqlTypeNames.TIMESTAMP),
+ bigquery.SchemaField("ip_address", enums.SqlTypeNames.STRING),
+ ]
+
+ with open(filepath, "rb") as json_file:
+ load_job = client.load_table_from_file(
+ json_file, full_table_id, job_config=job_config
+ )
+
+ # Wait for load job to finish.
+ load_job.result()
+
+
+def update_with_dml(
+ client: bigquery.Client, project_id: str, dataset_id: str, table_id: str
+):
+ query_text = f"""
+ UPDATE `{project_id}.{dataset_id}.{table_id}`
+ SET ip_address = REGEXP_REPLACE(ip_address, r"(\\.[0-9]+)$", ".0")
+ WHERE TRUE
+ """
+ query_job = client.query(query_text)
+
+ # Wait for query job to finish.
+ query_job.result()
+
+ print(f"DML query modified {query_job.num_dml_affected_rows} rows.")
+ return query_job.num_dml_affected_rows
+
+
+def run_sample(override_values={}):
+ client = bigquery.Client()
+ filepath = pathlib.Path(__file__).parent / "user_sessions_data.json"
+ project_id = client.project
+ dataset_id = "sample_db"
+ table_id = "UserSessions"
+ # [END bigquery_update_with_dml]
+ # To facilitate testing, we replace values with alternatives
+ # provided by the testing harness.
+ dataset_id = override_values.get("dataset_id", dataset_id)
+ table_id = override_values.get("table_id", table_id)
+ # [START bigquery_update_with_dml]
+ load_from_newline_delimited_json(client, filepath, project_id, dataset_id, table_id)
+ return update_with_dml(client, project_id, dataset_id, table_id)
+
+
+# [END bigquery_update_with_dml]
diff --git a/samples/snippets/user_sessions_data.json b/samples/snippets/user_sessions_data.json
new file mode 100644
index 000000000..7ea3715ad
--- /dev/null
+++ b/samples/snippets/user_sessions_data.json
@@ -0,0 +1,10 @@
+{"id":"2ad525d6-c832-4c3d-b7fe-59d104885519","user_id":"38","login_time":"1.47766087E9","logout_time":"1.477661109E9","ip_address":"192.0.2.12"}
+{"id":"53d65e20-6ea9-4650-98d9-a2111fbd1122","user_id":"88","login_time":"1.47707544E9","logout_time":"1.477075519E9","ip_address":"192.0.2.88"}
+{"id":"5e6c3021-d5e7-4ccd-84b2-adfa9176d13d","user_id":"39","login_time":"1.474022869E9","logout_time":"1.474022961E9","ip_address":"203.0.113.52"}
+{"id":"6196eefa-1498-4567-8ef0-498845b888d9","user_id":"52","login_time":"1.478604612E9","logout_time":"1.478604691E9","ip_address":"203.0.113.169"}
+{"id":"70656dc5-7e0f-49cf-9e00-f06ed93c1f5b","user_id":"46","login_time":"1.474089924E9","logout_time":"1.474090227E9","ip_address":"192.0.2.10"}
+{"id":"aafa5eef-ad49-49a7-9a0f-fbc7fd639bd3","user_id":"40","login_time":"1.478031161E9","logout_time":"1.478031388E9","ip_address":"203.0.113.18"}
+{"id":"d2792fc2-24dd-4260-9456-3fbe6cdfdd90","user_id":"5","login_time":"1.481259081E9","logout_time":"1.481259247E9","ip_address":"192.0.2.140"}
+{"id":"d835dc49-32f9-4790-b4eb-dddee62e0dcc","user_id":"62","login_time":"1.478892977E9","logout_time":"1.478893219E9","ip_address":"203.0.113.83"}
+{"id":"f4a0d3c7-351f-471c-8e11-e093e7a6ce75","user_id":"89","login_time":"1.459031555E9","logout_time":"1.459031831E9","ip_address":"203.0.113.233"}
+{"id":"f6e9f526-5b22-4679-9c3e-56a636e815bb","user_id":"97","login_time":"1.482426034E9","logout_time":"1.482426415E9","ip_address":"203.0.113.167"}
diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst
index a0406dba8..275d64989 100644
--- a/scripts/readme-gen/templates/install_deps.tmpl.rst
+++ b/scripts/readme-gen/templates/install_deps.tmpl.rst
@@ -12,7 +12,7 @@ Install Dependencies
.. _Python Development Environment Setup Guide:
https://cloud.google.com/python/setup
-#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+.
+#. Create a virtualenv. Samples are compatible with Python 3.6+.
.. code-block:: bash
diff --git a/setup.cfg b/setup.cfg
index c3a2b39f6..8eefc4435 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,3 +17,17 @@
# Generated by synthtool. DO NOT EDIT!
[bdist_wheel]
universal = 1
+
+[pytype]
+python_version = 3.8
+inputs =
+ google/cloud/
+exclude =
+ tests/
+ google/cloud/bigquery_v2/
+output = .pytype/
+disable =
+ # There's some issue with finding some pyi files, thus disabling.
+ # The issue https://github.com/google/pytype/issues/150 is closed, but the
+ # error still occurs for some reason.
+ pyi-error
diff --git a/setup.py b/setup.py
index ea2df4843..e7515493d 100644
--- a/setup.py
+++ b/setup.py
@@ -29,11 +29,20 @@
# 'Development Status :: 5 - Production/Stable'
release_status = "Development Status :: 5 - Production/Stable"
dependencies = [
- "google-api-core[grpc] >= 1.23.0, < 2.0.0dev",
+ "grpcio >= 1.38.1, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/695
+ # NOTE: Maintainers, please do not require google-api-core>=2.x.x
+ # Until this issue is closed
+ # https://github.com/googleapis/google-cloud-python/issues/10566
+ "google-api-core[grpc] >= 1.29.0, <3.0.0dev",
"proto-plus >= 1.10.0",
- "google-cloud-core >= 1.4.1, < 2.0dev",
- "google-resumable-media >= 0.6.0, < 2.0dev",
+ # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x
+ # Until this issue is closed
+ # https://github.com/googleapis/google-cloud-python/issues/10566
+ "google-cloud-core >= 1.4.1, <3.0.0dev",
+ "google-resumable-media >= 0.6.0, < 3.0dev",
+ "packaging >= 14.3",
"protobuf >= 3.12.0",
+ "requests >= 2.18.0, < 3.0.0dev",
]
extras = {
"bqstorage": [
@@ -44,21 +53,28 @@
# See: https://github.com/googleapis/python-bigquery/issues/83 The
# grpc.Channel.close() method isn't added until 1.32.0.
# https://github.com/grpc/grpc/pull/15254
- "grpcio >= 1.32.0, < 2.0dev",
- "pyarrow >= 1.0.0, < 4.0dev",
+ "grpcio >= 1.38.1, < 2.0dev",
+ "pyarrow >= 3.0.0, < 6.0dev",
],
- "pandas": ["pandas>=0.23.0", "pyarrow >= 1.0.0, < 4.0dev",],
+ "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"],
+ "pandas": ["pandas>=0.23.0", "pyarrow >= 3.0.0, < 6.0dev"],
+ "bignumeric_type": ["pyarrow >= 3.0.0, < 6.0dev"],
"tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
"opentelemetry": [
- "opentelemetry-api==0.11b0",
- "opentelemetry-sdk==0.11b0",
- "opentelemetry-instrumentation==0.11b0",
+ "opentelemetry-api >= 0.11b0",
+ "opentelemetry-sdk >= 0.11b0",
+ "opentelemetry-instrumentation >= 0.11b0",
],
}
all_extras = []
for extra in extras:
+ # Exclude this extra from all to avoid overly strict dependencies on core
+ # libraries such as pyarrow.
+ # https://github.com/googleapis/python-bigquery/issues/563
+ if extra in {"bignumeric_type"}:
+ continue
all_extras.extend(extras[extra])
extras["all"] = all_extras
diff --git a/synth.metadata b/synth.metadata
deleted file mode 100644
index 6b7854860..000000000
--- a/synth.metadata
+++ /dev/null
@@ -1,44 +0,0 @@
-{
- "sources": [
- {
- "git": {
- "name": ".",
- "remote": "git@github.com:tswast/python-bigquery.git",
- "sha": "5a422eb20c57dae66c5716fd319b66432d3edce6"
- }
- },
- {
- "git": {
- "name": "googleapis",
- "remote": "https://github.com/googleapis/googleapis.git",
- "sha": "df4fd38d040c5c8a0869936205bca13fb64b2cff",
- "internalRef": "344443035"
- }
- },
- {
- "git": {
- "name": "synthtool",
- "remote": "https://github.com/googleapis/synthtool.git",
- "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5"
- }
- },
- {
- "git": {
- "name": "synthtool",
- "remote": "https://github.com/googleapis/synthtool.git",
- "sha": "c2de32114ec484aa708d32012d1fa8d75232daf5"
- }
- }
- ],
- "destinations": [
- {
- "client": {
- "source": "googleapis",
- "apiName": "bigquery",
- "apiVersion": "v2",
- "language": "python",
- "generator": "bazel"
- }
- }
- ]
-}
\ No newline at end of file
diff --git a/synth.py b/synth.py
deleted file mode 100644
index 341c5832f..000000000
--- a/synth.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright 2018 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""This script is used to synthesize generated parts of this library."""
-
-import synthtool as s
-from synthtool import gcp
-from synthtool.languages import python
-
-gapic = gcp.GAPICBazel()
-common = gcp.CommonTemplates()
-version = "v2"
-
-library = gapic.py_library(
- service="bigquery",
- version=version,
- bazel_target=f"//google/cloud/bigquery/{version}:bigquery-{version}-py",
- include_protos=True,
-)
-
-s.move(
- library,
- excludes=[
- "docs/index.rst",
- "README.rst",
- "noxfile.py",
- "setup.py",
- "scripts/fixup_bigquery_v2_keywords.py",
- library / f"google/cloud/bigquery/__init__.py",
- library / f"google/cloud/bigquery/py.typed",
- # There are no public API endpoints for the generated ModelServiceClient,
- # thus there's no point in generating it and its tests.
- library / f"google/cloud/bigquery_{version}/services/**",
- library / f"tests/unit/gapic/bigquery_{version}/**",
- ],
-)
-
-# ----------------------------------------------------------------------------
-# Add templated files
-# ----------------------------------------------------------------------------
-templated_files = common.py_library(
- cov_level=100,
- samples=True,
- microgenerator=True,
- split_system_tests=True,
-)
-
-# BigQuery has a custom multiprocessing note
-s.move(
- templated_files,
- excludes=[
- "noxfile.py",
- "docs/multiprocessing.rst",
- ".coveragerc",
- # Include custom SNIPPETS_TESTS job for performance.
- # https://github.com/googleapis/python-bigquery/issues/191
- ".kokoro/presubmit/presubmit.cfg",
- ]
-)
-
-# ----------------------------------------------------------------------------
-# Samples templates
-# ----------------------------------------------------------------------------
-
-python.py_samples()
-
-# Do not expose ModelServiceClient, as there is no public API endpoint for the
-# models service.
-s.replace(
- "google/cloud/bigquery_v2/__init__.py",
- r"from \.services\.model_service import ModelServiceClient",
- "",
-)
-s.replace(
- "google/cloud/bigquery_v2/__init__.py",
- r"""["']ModelServiceClient["'],""",
- "",
-)
-
-# Adjust Model docstring so that Sphinx does not think that "predicted_" is
-# a reference to something, issuing a false warning.
-s.replace(
- "google/cloud/bigquery_v2/types/model.py",
- r'will have a "predicted_"',
- "will have a `predicted_`",
-)
-
-s.replace(
- "docs/conf.py",
- r'\{"members": True\}',
- '{"members": True, "inherited-members": True}'
-)
-
-# Avoid breaking change due to change in field renames.
-# https://github.com/googleapis/python-bigquery/issues/319
-s.replace(
- "google/cloud/bigquery_v2/types/standard_sql.py",
- r"type_ ",
- "type "
-)
-
-# Tell Sphinx to ingore autogenerated docs files.
-s.replace(
- "docs/conf.py",
- r'"samples/snippets/README\.rst",',
- '\g<0>\n "bigquery_v2/services.rst", # generated by the code generator',
-)
-
-s.shell.run(["nox", "-s", "blacken"], hide_output=False)
diff --git a/testing/constraints-3.6.txt b/testing/constraints-3.6.txt
index c4a5c51be..be1a992fa 100644
--- a/testing/constraints-3.6.txt
+++ b/testing/constraints-3.6.txt
@@ -5,17 +5,20 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
-google-api-core==1.23.0
+geopandas==0.9.0
+google-api-core==1.29.0
google-cloud-bigquery-storage==2.0.0
google-cloud-core==1.4.1
google-resumable-media==0.6.0
-grpcio==1.32.0
+grpcio==1.38.1
opentelemetry-api==0.11b0
opentelemetry-instrumentation==0.11b0
opentelemetry-sdk==0.11b0
-pandas==0.23.0
+pandas==0.24.2
proto-plus==1.10.0
protobuf==3.12.0
-pyarrow==1.0.0
+pyarrow==3.0.0
+requests==2.18.0
+shapely==1.6.0
six==1.13.0
tqdm==4.7.4
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index e69de29bb..39dc6250e 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -0,0 +1,7 @@
+# This constraints file is used to make sure that the latest dependency versions
+# we claim to support in setup.py are indeed installed in test sessions in the most
+# recent Python version supported (3.9 at the time of writing - 2021-05-05).
+#
+# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by
+# the renovate bot.
+pyarrow>=4.0.0
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb..4de65971c 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tests/data/numeric_38_12.parquet b/tests/data/numeric_38_12.parquet
new file mode 100644
index 000000000..ef4db91ea
Binary files /dev/null and b/tests/data/numeric_38_12.parquet differ
diff --git a/tests/data/scalars.jsonl b/tests/data/scalars.jsonl
new file mode 100644
index 000000000..e06139e5c
--- /dev/null
+++ b/tests/data/scalars.jsonl
@@ -0,0 +1,2 @@
+{"bool_col": true, "bytes_col": "SGVsbG8sIFdvcmxkIQ==", "date_col": "2021-07-21", "datetime_col": "2021-07-21 11:39:45", "geography_col": "POINT(-122.0838511 37.3860517)", "int64_col": "123456789", "interval_col": "P7Y11M9DT4H15M37.123456S", "numeric_col": "1.23456789", "bignumeric_col": "10.111213141516171819", "float64_col": "1.25", "rowindex": 0, "string_col": "Hello, World!", "time_col": "11:41:43.07616", "timestamp_col": "2021-07-21T17:43:43.945289Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 1, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_extreme.jsonl b/tests/data/scalars_extreme.jsonl
new file mode 100644
index 000000000..d0a33fdba
--- /dev/null
+++ b/tests/data/scalars_extreme.jsonl
@@ -0,0 +1,5 @@
+{"bool_col": true, "bytes_col": "DQo=\n", "date_col": "9999-12-31", "datetime_col": "9999-12-31 23:59:59.999999", "geography_col": "POINT(-135.0000 90.0000)", "int64_col": "9223372036854775807", "interval_col": "P-10000Y0M-3660000DT-87840000H0M0S", "numeric_col": "9.9999999999999999999999999999999999999E+28", "bignumeric_col": "9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "+inf", "rowindex": 0, "string_col": "Hello, World", "time_col": "23:59:59.999999", "timestamp_col": "9999-12-31T23:59:59.999999Z"}
+{"bool_col": false, "bytes_col": "8J+Zgw==\n", "date_col": "0001-01-01", "datetime_col": "0001-01-01 00:00:00", "geography_col": "POINT(45.0000 -90.0000)", "int64_col": "-9223372036854775808", "interval_col": "P10000Y0M3660000DT87840000H0M0S", "numeric_col": "-9.9999999999999999999999999999999999999E+28", "bignumeric_col": "-9.999999999999999999999999999999999999999999999999999999999999999999999999999E+37", "float64_col": "-inf", "rowindex": 1, "string_col": "Hello, World", "time_col": "00:00:00", "timestamp_col": "0001-01-01T00:00:00.000000Z"}
+{"bool_col": true, "bytes_col": "AA==\n", "date_col": "1900-01-01", "datetime_col": "1900-01-01 00:00:00", "geography_col": "POINT(-180.0000 0.0000)", "int64_col": "-1", "interval_col": "P0Y0M0DT0H0M0.000001S", "numeric_col": "0.000000001", "bignumeric_col": "-0.00000000000000000000000000000000000001", "float64_col": "nan", "rowindex": 2, "string_col": "こんにちは", "time_col": "00:00:00.000001", "timestamp_col": "1900-01-01T00:00:00.000000Z"}
+{"bool_col": false, "bytes_col": "", "date_col": "1970-01-01", "datetime_col": "1970-01-01 00:00:00", "geography_col": "POINT(0 0)", "int64_col": "0", "interval_col": "P0Y0M0DT0H0M0S", "numeric_col": "0.0", "bignumeric_col": "0.0", "float64_col": 0.0, "rowindex": 3, "string_col": "", "time_col": "12:00:00", "timestamp_col": "1970-01-01T00:00:00.000000Z"}
+{"bool_col": null, "bytes_col": null, "date_col": null, "datetime_col": null, "geography_col": null, "int64_col": null, "interval_col": null, "numeric_col": null, "bignumeric_col": null, "float64_col": null, "rowindex": 4, "string_col": null, "time_col": null, "timestamp_col": null}
diff --git a/tests/data/scalars_schema.json b/tests/data/scalars_schema.json
new file mode 100644
index 000000000..676d37d56
--- /dev/null
+++ b/tests/data/scalars_schema.json
@@ -0,0 +1,72 @@
+[
+ {
+ "mode": "NULLABLE",
+ "name": "bool_col",
+ "type": "BOOLEAN"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "bignumeric_col",
+ "type": "BIGNUMERIC"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "bytes_col",
+ "type": "BYTES"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "date_col",
+ "type": "DATE"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "datetime_col",
+ "type": "DATETIME"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "float64_col",
+ "type": "FLOAT"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "geography_col",
+ "type": "GEOGRAPHY"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "int64_col",
+ "type": "INTEGER"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "interval_col",
+ "type": "INTERVAL"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "numeric_col",
+ "type": "NUMERIC"
+ },
+ {
+ "mode": "REQUIRED",
+ "name": "rowindex",
+ "type": "INTEGER"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "string_col",
+ "type": "STRING"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "time_col",
+ "type": "TIME"
+ },
+ {
+ "mode": "NULLABLE",
+ "name": "timestamp_col",
+ "type": "TIMESTAMP"
+ }
+]
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
new file mode 100644
index 000000000..cc2c2a4dc
--- /dev/null
+++ b/tests/system/conftest.py
@@ -0,0 +1,100 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+
+import pytest
+import test_utils.prefixer
+
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
+from . import helpers
+
+
+prefixer = test_utils.prefixer.Prefixer("python-bigquery", "tests/system")
+
+DATA_DIR = pathlib.Path(__file__).parent.parent / "data"
+
+
+@pytest.fixture(scope="session", autouse=True)
+def cleanup_datasets(bigquery_client: bigquery.Client):
+ for dataset in bigquery_client.list_datasets():
+ if prefixer.should_cleanup(dataset.dataset_id):
+ bigquery_client.delete_dataset(
+ dataset, delete_contents=True, not_found_ok=True
+ )
+
+
+@pytest.fixture(scope="session")
+def bigquery_client():
+ return bigquery.Client()
+
+
+@pytest.fixture(scope="session")
+def project_id(bigquery_client: bigquery.Client):
+ return bigquery_client.project
+
+
+@pytest.fixture(scope="session")
+def bqstorage_client(bigquery_client):
+ from google.cloud import bigquery_storage
+
+ return bigquery_storage.BigQueryReadClient(credentials=bigquery_client._credentials)
+
+
+@pytest.fixture(scope="session")
+def dataset_id(bigquery_client):
+ dataset_id = prefixer.create_prefix()
+ bigquery_client.create_dataset(dataset_id)
+ yield dataset_id
+ bigquery_client.delete_dataset(dataset_id, delete_contents=True, not_found_ok=True)
+
+
+@pytest.fixture
+def table_id(dataset_id):
+ return f"{dataset_id}.table_{helpers.temp_suffix()}"
+
+
+@pytest.fixture(scope="session")
+def scalars_table(bigquery_client: bigquery.Client, project_id: str, dataset_id: str):
+ schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+ job_config = bigquery.LoadJobConfig()
+ job_config.schema = schema
+ job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+ full_table_id = f"{project_id}.{dataset_id}.scalars"
+ with open(DATA_DIR / "scalars.jsonl", "rb") as data_file:
+ job = bigquery_client.load_table_from_file(
+ data_file, full_table_id, job_config=job_config
+ )
+ job.result()
+ yield full_table_id
+ bigquery_client.delete_table(full_table_id)
+
+
+@pytest.fixture(scope="session")
+def scalars_extreme_table(
+ bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+ schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+ job_config = bigquery.LoadJobConfig()
+ job_config.schema = schema
+ job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+ full_table_id = f"{project_id}.{dataset_id}.scalars_extreme"
+ with open(DATA_DIR / "scalars_extreme.jsonl", "rb") as data_file:
+ job = bigquery_client.load_table_from_file(
+ data_file, full_table_id, job_config=job_config
+ )
+ job.result()
+ yield full_table_id
+ bigquery_client.delete_table(full_table_id)
diff --git a/tests/system/helpers.py b/tests/system/helpers.py
new file mode 100644
index 000000000..76e609345
--- /dev/null
+++ b/tests/system/helpers.py
@@ -0,0 +1,94 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import decimal
+import uuid
+
+import google.api_core.exceptions
+import test_utils.retry
+
+from google.cloud._helpers import UTC
+
+
+_naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
+_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
+_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat())
+_stamp_microseconds = _stamp + ".250000"
+_zoned = _naive.replace(tzinfo=UTC)
+_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC)
+_numeric = decimal.Decimal("123456789.123456789")
+
+
+# Examples of most data types to test with query() and DB-API.
+STANDARD_SQL_EXAMPLES = [
+ ("SELECT 1", 1),
+ ("SELECT 1.3", 1.3),
+ ("SELECT TRUE", True),
+ ('SELECT "ABC"', "ABC"),
+ ('SELECT CAST("foo" AS BYTES)', b"foo"),
+ ('SELECT TIMESTAMP "%s"' % (_stamp,), _zoned),
+ ('SELECT TIMESTAMP "%s"' % (_stamp_microseconds,), _zoned_microseconds,),
+ ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp,), _naive),
+ ('SELECT DATETIME(TIMESTAMP "%s")' % (_stamp_microseconds,), _naive_microseconds,),
+ ('SELECT DATE(TIMESTAMP "%s")' % (_stamp,), _naive.date()),
+ ('SELECT TIME(TIMESTAMP "%s")' % (_stamp,), _naive.time()),
+ ('SELECT NUMERIC "%s"' % (_numeric,), _numeric),
+ ("SELECT (1, 2)", {"_field_1": 1, "_field_2": 2}),
+ (
+ "SELECT ((1, 2), (3, 4), 5)",
+ {
+ "_field_1": {"_field_1": 1, "_field_2": 2},
+ "_field_2": {"_field_1": 3, "_field_2": 4},
+ "_field_3": 5,
+ },
+ ),
+ ("SELECT [1, 2, 3]", [1, 2, 3]),
+ (
+ "SELECT ([1, 2], 3, [4, 5])",
+ {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]},
+ ),
+ (
+ "SELECT [(1, 2, 3), (4, 5, 6)]",
+ [
+ {"_field_1": 1, "_field_2": 2, "_field_3": 3},
+ {"_field_1": 4, "_field_2": 5, "_field_3": 6},
+ ],
+ ),
+ (
+ "SELECT [([1, 2, 3], 4), ([5, 6], 7)]",
+ [{"_field_1": [1, 2, 3], "_field_2": 4}, {"_field_1": [5, 6], "_field_2": 7}],
+ ),
+ ("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]),
+ ("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"),
+]
+
+
+def temp_suffix():
+ now = datetime.datetime.now()
+ return f"{now.strftime('%Y%m%d%H%M%S')}_{uuid.uuid4().hex[:8]}"
+
+
+def _rate_limit_exceeded(forbidden):
+ """Predicate: pass only exceptions with 'rateLimitExceeded' as reason."""
+ return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors)
+
+
+# We need to wait to stay within the rate limits.
+# The alternative outcome is a 403 Forbidden response from upstream, which
+# they return instead of the more appropriate 429.
+# See https://cloud.google.com/bigquery/quota-policy
+retry_403 = test_utils.retry.RetryErrors(
+ google.api_core.exceptions.Forbidden, error_predicate=_rate_limit_exceeded,
+)
diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py
new file mode 100644
index 000000000..12f7af9cb
--- /dev/null
+++ b/tests/system/test_arrow.py
@@ -0,0 +1,112 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""System tests for Arrow connector."""
+
+from typing import Optional
+
+import pytest
+
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
+
+
+pyarrow = pytest.importorskip(
+ "pyarrow", minversion="3.0.0"
+) # Needs decimal256 for BIGNUMERIC columns.
+
+
+@pytest.mark.parametrize(
+ ("max_results", "scalars_table_name"),
+ (
+ (None, "scalars_table"), # Use BQ Storage API.
+ (10, "scalars_table"), # Use REST API.
+ (None, "scalars_extreme_table"), # Use BQ Storage API.
+ (10, "scalars_extreme_table"), # Use REST API.
+ ),
+)
+def test_list_rows_nullable_scalars_dtypes(
+ bigquery_client: bigquery.Client,
+ scalars_table: str,
+ scalars_extreme_table: str,
+ max_results: Optional[int],
+ scalars_table_name: str,
+):
+ table_id = scalars_table
+ if scalars_table_name == "scalars_extreme_table":
+ table_id = scalars_extreme_table
+
+ # TODO(GH#836): Avoid INTERVAL columns until they are supported by the
+ # BigQuery Storage API and pyarrow.
+ schema = [
+ bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
+ bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
+ bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
+ bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
+ bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
+ bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
+ bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
+ bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
+ bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
+ bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
+ bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
+ bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
+ ]
+
+ arrow_table = bigquery_client.list_rows(
+ table_id, max_results=max_results, selected_fields=schema,
+ ).to_arrow()
+
+ schema = arrow_table.schema
+ bignumeric_type = schema.field("bignumeric_col").type
+ # 77th digit is partial.
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
+ assert bignumeric_type.precision in {76, 77}
+ assert bignumeric_type.scale == 38
+
+ bool_type = schema.field("bool_col").type
+ assert bool_type.equals(pyarrow.bool_())
+
+ bytes_type = schema.field("bytes_col").type
+ assert bytes_type.equals(pyarrow.binary())
+
+ date_type = schema.field("date_col").type
+ assert date_type.equals(pyarrow.date32())
+
+ datetime_type = schema.field("datetime_col").type
+ assert datetime_type.unit == "us"
+ assert datetime_type.tz is None
+
+ float64_type = schema.field("float64_col").type
+ assert float64_type.equals(pyarrow.float64())
+
+ geography_type = schema.field("geography_col").type
+ assert geography_type.equals(pyarrow.string())
+
+ int64_type = schema.field("int64_col").type
+ assert int64_type.equals(pyarrow.int64())
+
+ numeric_type = schema.field("numeric_col").type
+ assert numeric_type.precision == 38
+ assert numeric_type.scale == 9
+
+ string_type = schema.field("string_col").type
+ assert string_type.equals(pyarrow.string())
+
+ time_type = schema.field("time_col").type
+ assert time_type.equals(pyarrow.time64("us"))
+
+ timestamp_type = schema.field("timestamp_col").type
+ assert timestamp_type.unit == "us"
+ assert timestamp_type.tz is not None
diff --git a/tests/system/test_client.py b/tests/system/test_client.py
index aa1a03160..9da45ee6e 100644
--- a/tests/system/test_client.py
+++ b/tests/system/test_client.py
@@ -13,7 +13,6 @@
# limitations under the License.
import base64
-import collections
import concurrent.futures
import csv
import datetime
@@ -26,10 +25,12 @@
import time
import unittest
import uuid
+from typing import Optional
import psutil
-import pytz
-import pkg_resources
+import pytest
+
+from . import helpers
try:
from google.cloud import bigquery_storage
@@ -41,10 +42,6 @@
except ImportError: # pragma: NO COVER
fastavro = None
-try:
- import pandas
-except ImportError: # pragma: NO COVER
- pandas = None
try:
import pyarrow
import pyarrow.types
@@ -55,7 +52,6 @@
from google.api_core.exceptions import BadRequest
from google.api_core.exceptions import ClientError
from google.api_core.exceptions import Conflict
-from google.api_core.exceptions import Forbidden
from google.api_core.exceptions import GoogleAPICallError
from google.api_core.exceptions import NotFound
from google.api_core.exceptions import InternalServerError
@@ -70,6 +66,8 @@
from google.cloud._helpers import UTC
from google.cloud.bigquery import dbapi, enums
from google.cloud import storage
+from google.cloud.datacatalog_v1 import types as datacatalog_types
+from google.cloud.datacatalog_v1 import PolicyTagManagerClient
from test_utils.retry import RetryErrors
from test_utils.retry import RetryInstanceState
@@ -92,6 +90,12 @@
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
+CLUSTERING_SCHEMA = [
+ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
+ bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ bigquery.SchemaField("body_height_cm", "INTEGER", mode="REQUIRED"),
+ bigquery.SchemaField("date_of_birth", "DATE", mode="REQUIRED"),
+]
TIME_PARTITIONING_CLUSTERING_FIELDS_SCHEMA = [
bigquery.SchemaField("transaction_time", "TIMESTAMP", mode="REQUIRED"),
bigquery.SchemaField("transaction_id", "INTEGER", mode="REQUIRED"),
@@ -119,18 +123,7 @@
(TooManyRequests, InternalServerError, ServiceUnavailable)
)
-PANDAS_MINIMUM_VERSION = pkg_resources.parse_version("1.0.0")
-PYARROW_MINIMUM_VERSION = pkg_resources.parse_version("0.17.0")
-
-if pandas:
- PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
-else:
- PANDAS_INSTALLED_VERSION = None
-
-if pyarrow:
- PYARROW_INSTALLED_VERSION = pkg_resources.get_distribution("pyarrow").parsed_version
-else:
- PYARROW_INSTALLED_VERSION = None
+MTLS_TESTING = os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true"
def _has_rows(result):
@@ -138,7 +131,7 @@ def _has_rows(result):
def _make_dataset_id(prefix):
- return "%s%s" % (prefix, unique_resource_id())
+ return f"python_bigquery_tests_system_{prefix}{unique_resource_id()}"
def _load_json_schema(filename="schema.json"):
@@ -150,18 +143,6 @@ def _load_json_schema(filename="schema.json"):
return _parse_schema_resource(json.load(schema_file))
-def _rate_limit_exceeded(forbidden):
- """Predicate: pass only exceptions with 'rateLimitExceeded' as reason."""
- return any(error["reason"] == "rateLimitExceeded" for error in forbidden._errors)
-
-
-# We need to wait to stay within the rate limits.
-# The alternative outcome is a 403 Forbidden response from upstream, which
-# they return instead of the more appropriate 429.
-# See https://cloud.google.com/bigquery/quota-policy
-retry_403 = RetryErrors(Forbidden, error_predicate=_rate_limit_exceeded)
-
-
class Config(object):
"""Run-time configuration to be modified at set-up.
@@ -169,9 +150,8 @@ class Config(object):
global state.
"""
- CLIENT = None
+ CLIENT: Optional[bigquery.Client] = None
CURSOR = None
- DATASET = None
def setUpModule():
@@ -181,11 +161,11 @@ def setUpModule():
class TestBigQuery(unittest.TestCase):
def setUp(self):
- Config.DATASET = _make_dataset_id("bq_system_tests")
- dataset = Config.CLIENT.create_dataset(Config.DATASET)
- self.to_delete = [dataset]
+ self.to_delete = []
def tearDown(self):
+ policy_tag_client = PolicyTagManagerClient()
+
def _still_in_use(bad_request):
return any(
error["reason"] == "resourceInUse" for error in bad_request._errors
@@ -202,6 +182,8 @@ def _still_in_use(bad_request):
retry_in_use(Config.CLIENT.delete_dataset)(doomed, delete_contents=True)
elif isinstance(doomed, (Table, bigquery.TableReference)):
retry_in_use(Config.CLIENT.delete_table)(doomed)
+ elif isinstance(doomed, datacatalog_types.Taxonomy):
+ policy_tag_client.delete_taxonomy(name=doomed.name)
else:
doomed.delete()
@@ -216,7 +198,9 @@ def test_get_service_account_email(self):
def _create_bucket(self, bucket_name, location=None):
storage_client = storage.Client()
bucket = storage_client.bucket(bucket_name)
- retry_storage_errors(bucket.create)(location=location)
+ retry_storage_errors(storage_client.create_bucket)(
+ bucket_name, location=location
+ )
self.to_delete.append(bucket)
return bucket
@@ -258,7 +242,7 @@ def test_get_dataset(self):
dataset_arg = Dataset(dataset_ref)
dataset_arg.friendly_name = "Friendly"
dataset_arg.description = "Description"
- dataset = retry_403(client.create_dataset)(dataset_arg)
+ dataset = helpers.retry_403(client.create_dataset)(dataset_arg)
self.to_delete.append(dataset)
dataset_ref = bigquery.DatasetReference(project, dataset_id)
@@ -341,7 +325,7 @@ def test_create_table(self):
table_arg = Table(dataset.table(table_id), schema=SCHEMA)
self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -376,7 +360,7 @@ def test_create_table_with_policy(self):
table_arg = Table(dataset.table(table_id), schema=schema)
self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -398,6 +382,68 @@ def test_create_table_with_policy(self):
table2 = Config.CLIENT.update_table(table, ["schema"])
self.assertEqual(policy_2, table2.schema[1].policy_tags)
+ def test_create_table_with_real_custom_policy(self):
+ from google.cloud.bigquery.schema import PolicyTagList
+
+ policy_tag_client = PolicyTagManagerClient()
+ taxonomy_parent = f"projects/{Config.CLIENT.project}/locations/us"
+
+ new_taxonomy = datacatalog_types.Taxonomy(
+ display_name="Custom test taxonomy" + unique_resource_id(),
+ description="This taxonomy is ony used for a test.",
+ activated_policy_types=[
+ datacatalog_types.Taxonomy.PolicyType.FINE_GRAINED_ACCESS_CONTROL
+ ],
+ )
+
+ taxonomy = policy_tag_client.create_taxonomy(
+ parent=taxonomy_parent, taxonomy=new_taxonomy
+ )
+ self.to_delete.insert(0, taxonomy)
+
+ parent_policy_tag = policy_tag_client.create_policy_tag(
+ parent=taxonomy.name,
+ policy_tag=datacatalog_types.PolicyTag(
+ display_name="Parent policy tag", parent_policy_tag=None
+ ),
+ )
+ child_policy_tag = policy_tag_client.create_policy_tag(
+ parent=taxonomy.name,
+ policy_tag=datacatalog_types.PolicyTag(
+ display_name="Child policy tag",
+ parent_policy_tag=parent_policy_tag.name,
+ ),
+ )
+
+ dataset = self.temp_dataset(
+ _make_dataset_id("create_table_with_real_custom_policy")
+ )
+ table_id = "test_table"
+ policy_1 = PolicyTagList(names=[parent_policy_tag.name])
+ policy_2 = PolicyTagList(names=[child_policy_tag.name])
+
+ schema = [
+ bigquery.SchemaField(
+ "first_name", "STRING", mode="REQUIRED", policy_tags=policy_1
+ ),
+ bigquery.SchemaField(
+ "age", "INTEGER", mode="REQUIRED", policy_tags=policy_2
+ ),
+ ]
+ table_arg = Table(dataset.table(table_id), schema=schema)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table))
+ self.assertCountEqual(
+ list(table.schema[0].policy_tags.names), [parent_policy_tag.name]
+ )
+ self.assertCountEqual(
+ list(table.schema[1].policy_tags.names), [child_policy_tag.name]
+ )
+
def test_create_table_w_time_partitioning_w_clustering_fields(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
@@ -412,7 +458,7 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self):
table_arg.time_partitioning = TimePartitioning(field="transaction_time")
table_arg.clustering_fields = ["user_email", "store_code"]
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -426,7 +472,7 @@ def test_delete_dataset_with_string(self):
dataset_id = _make_dataset_id("delete_table_true_with_string")
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
- retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
+ helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
self.assertTrue(_dataset_exists(dataset_ref))
Config.CLIENT.delete_dataset(dataset_id)
self.assertFalse(_dataset_exists(dataset_ref))
@@ -435,11 +481,11 @@ def test_delete_dataset_delete_contents_true(self):
dataset_id = _make_dataset_id("delete_table_true_with_content")
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
+ dataset = helpers.retry_403(Config.CLIENT.create_dataset)(Dataset(dataset_ref))
table_id = "test_table"
table_arg = Table(dataset.table(table_id), schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
Config.CLIENT.delete_dataset(dataset, delete_contents=True)
self.assertFalse(_table_exists(table))
@@ -451,7 +497,7 @@ def test_delete_dataset_delete_contents_false(self):
table_id = "test_table"
table_arg = Table(dataset.table(table_id), schema=SCHEMA)
- retry_403(Config.CLIENT.create_table)(table_arg)
+ helpers.retry_403(Config.CLIENT.create_table)(table_arg)
with self.assertRaises(exceptions.BadRequest):
Config.CLIENT.delete_dataset(dataset)
@@ -500,7 +546,7 @@ def test_list_tables(self):
]
for table_name in tables_to_create:
table = Table(dataset.table(table_name), schema=SCHEMA)
- created_table = retry_403(Config.CLIENT.create_table)(table)
+ created_table = helpers.retry_403(Config.CLIENT.create_table)(table)
self.to_delete.insert(0, created_table)
# Retrieve the tables.
@@ -530,7 +576,7 @@ def test_update_table(self):
TABLE_NAME = "test_table"
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
self.assertIsNone(table.friendly_name)
@@ -570,7 +616,7 @@ def test_update_table_schema(self):
TABLE_NAME = "test_table"
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
voter = bigquery.SchemaField("voter", "BOOLEAN", mode="NULLABLE")
@@ -586,6 +632,75 @@ def test_update_table_schema(self):
self.assertEqual(found.field_type, expected.field_type)
self.assertEqual(found.mode, expected.mode)
+ def test_unset_table_schema_attributes(self):
+ from google.cloud.bigquery.schema import PolicyTagList
+
+ dataset = self.temp_dataset(_make_dataset_id("unset_policy_tags"))
+ table_id = "test_table"
+ policy_tags = PolicyTagList(
+ names=[
+ "projects/{}/locations/us/taxonomies/1/policyTags/2".format(
+ Config.CLIENT.project
+ ),
+ ]
+ )
+
+ schema = [
+ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
+ bigquery.SchemaField(
+ "secret_int",
+ "INTEGER",
+ mode="REQUIRED",
+ description="This field is numeric",
+ policy_tags=policy_tags,
+ ),
+ ]
+ table_arg = Table(dataset.table(table_id), schema=schema)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table))
+ self.assertEqual(policy_tags, table.schema[1].policy_tags)
+
+ # Amend the schema to replace the policy tags
+ new_schema = table.schema[:]
+ old_field = table.schema[1]
+ new_schema[1] = bigquery.SchemaField(
+ name=old_field.name,
+ field_type=old_field.field_type,
+ mode=old_field.mode,
+ description=None,
+ fields=old_field.fields,
+ policy_tags=None,
+ )
+
+ table.schema = new_schema
+ updated_table = Config.CLIENT.update_table(table, ["schema"])
+
+ self.assertFalse(updated_table.schema[1].description) # Empty string or None.
+ self.assertEqual(updated_table.schema[1].policy_tags.names, ())
+
+ def test_update_table_clustering_configuration(self):
+ dataset = self.temp_dataset(_make_dataset_id("update_table"))
+
+ TABLE_NAME = "test_table"
+ table_arg = Table(dataset.table(TABLE_NAME), schema=CLUSTERING_SCHEMA)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+
+ table.clustering_fields = ["full_name", "date_of_birth"]
+ table2 = Config.CLIENT.update_table(table, ["clustering_fields"])
+ self.assertEqual(table2.clustering_fields, ["full_name", "date_of_birth"])
+
+ table2.clustering_fields = None
+ table3 = Config.CLIENT.update_table(table2, ["clustering_fields"])
+ self.assertIsNone(table3.clustering_fields, None)
+
@staticmethod
def _fetch_single_page(table, selected_fields=None):
iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields)
@@ -670,7 +785,7 @@ def test_insert_rows_then_dump_table(self):
]
table_arg = Table(dataset.table(TABLE_ID), schema=schema)
self.assertFalse(_table_exists(table_arg))
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -728,397 +843,59 @@ def test_load_table_from_local_avro_file_then_dump_table(self):
sorted(row_tuples, key=by_wavelength), sorted(ROWS, key=by_wavelength)
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_automatic_schema(self):
- """Test that a DataFrame with dtypes that map well to BigQuery types
- can be uploaded without specifying a schema.
-
- https://github.com/googleapis/google-cloud-python/issues/9044
- """
- df_data = collections.OrderedDict(
- [
- ("bool_col", pandas.Series([True, False, True], dtype="bool")),
- (
- "ts_col",
- pandas.Series(
- [
- datetime.datetime(2010, 1, 2, 3, 44, 50),
- datetime.datetime(2011, 2, 3, 14, 50, 59),
- datetime.datetime(2012, 3, 14, 15, 16),
- ],
- dtype="datetime64[ns]",
- ).dt.tz_localize(pytz.utc),
- ),
- (
- "dt_col",
- pandas.Series(
- [
- datetime.datetime(2010, 1, 2, 3, 44, 50),
- datetime.datetime(2011, 2, 3, 14, 50, 59),
- datetime.datetime(2012, 3, 14, 15, 16),
- ],
- dtype="datetime64[ns]",
- ),
- ),
- ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")),
- ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")),
- ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")),
- ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")),
- ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")),
- ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")),
- ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")),
- ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")),
- ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")),
- ]
- )
- dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format(
- Config.CLIENT.project, dataset_id
- )
-
- load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
- load_job.result()
-
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(
- tuple(table.schema),
- (
- bigquery.SchemaField("bool_col", "BOOLEAN"),
- bigquery.SchemaField("ts_col", "TIMESTAMP"),
- # BigQuery does not support uploading DATETIME values from
- # Parquet files. See:
- # https://github.com/googleapis/google-cloud-python/issues/9996
- bigquery.SchemaField("dt_col", "TIMESTAMP"),
- bigquery.SchemaField("float32_col", "FLOAT"),
- bigquery.SchemaField("float64_col", "FLOAT"),
- bigquery.SchemaField("int8_col", "INTEGER"),
- bigquery.SchemaField("int16_col", "INTEGER"),
- bigquery.SchemaField("int32_col", "INTEGER"),
- bigquery.SchemaField("int64_col", "INTEGER"),
- bigquery.SchemaField("uint8_col", "INTEGER"),
- bigquery.SchemaField("uint16_col", "INTEGER"),
- bigquery.SchemaField("uint32_col", "INTEGER"),
- ),
- )
- self.assertEqual(table.num_rows, 3)
-
- @unittest.skipIf(
- pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
- "Only `pandas version >=1.0.0` is supported",
- )
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
- """Test that a DataFrame containing column with None-type values and int64 datatype
- can be uploaded if a BigQuery schema is specified.
-
- https://github.com/googleapis/python-bigquery/issues/22
- """
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
- Config.CLIENT.project, dataset_id
- )
- table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),)
- table = retry_403(Config.CLIENT.create_table)(
- Table(table_id, schema=table_schema)
- )
- self.to_delete.insert(0, table)
-
- df_data = collections.OrderedDict(
- [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
- )
- dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
- load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
- load_job.result()
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
- self.assertEqual(table.num_rows, 4)
-
- @unittest.skipIf(
- pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIMUM_VERSION,
- "Only `pandas version >=1.0.0` is supported",
- )
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
- """Test that a DataFrame containing column with None-type values and int64 datatype
- can be uploaded without specifying a schema.
-
- https://github.com/googleapis/python-bigquery/issues/22
- """
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
- Config.CLIENT.project, dataset_id
- )
- df_data = collections.OrderedDict(
- [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
- )
- dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
- load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
- load_job.result()
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(tuple(table.schema), (bigquery.SchemaField("x", "INTEGER"),))
- self.assertEqual(table.num_rows, 4)
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_nulls(self):
- """Test that a DataFrame with null columns can be uploaded if a
- BigQuery schema is specified.
+ def test_load_table_from_local_parquet_file_decimal_types(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+ from google.cloud.bigquery.job import SourceFormat
+ from google.cloud.bigquery.job import WriteDisposition
- See: https://github.com/googleapis/google-cloud-python/issues/7370
- """
- # Schema with all scalar types.
- scalars_schema = (
- bigquery.SchemaField("bool_col", "BOOLEAN"),
- bigquery.SchemaField("bytes_col", "BYTES"),
- bigquery.SchemaField("date_col", "DATE"),
- bigquery.SchemaField("dt_col", "DATETIME"),
- bigquery.SchemaField("float_col", "FLOAT"),
- bigquery.SchemaField("geo_col", "GEOGRAPHY"),
- bigquery.SchemaField("int_col", "INTEGER"),
- bigquery.SchemaField("num_col", "NUMERIC"),
- bigquery.SchemaField("str_col", "STRING"),
- bigquery.SchemaField("time_col", "TIME"),
- bigquery.SchemaField("ts_col", "TIMESTAMP"),
- )
- table_schema = scalars_schema + (
- # TODO: Array columns can't be read due to NULLABLE versus REPEATED
- # mode mismatch. See:
- # https://issuetracker.google.com/133415569#comment3
- # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
- # TODO: Support writing StructArrays to Parquet. See:
- # https://jira.apache.org/jira/browse/ARROW-2587
- # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
- )
- num_rows = 100
- nulls = [None] * num_rows
- df_data = collections.OrderedDict(
- [
- ("bool_col", nulls),
- ("bytes_col", nulls),
- ("date_col", nulls),
- ("dt_col", nulls),
- ("float_col", nulls),
- ("geo_col", nulls),
- ("int_col", nulls),
- ("num_col", nulls),
- ("str_col", nulls),
- ("time_col", nulls),
- ("ts_col", nulls),
- ]
- )
- dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
+ TABLE_NAME = "test_table_parquet"
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_nulls".format(
- Config.CLIENT.project, dataset_id
- )
+ expected_rows = [
+ (decimal.Decimal("123.999999999999"),),
+ (decimal.Decimal("99999999999999999999999999.999999999999"),),
+ ]
- # Create the table before loading so that schema mismatch errors are
- # identified.
- table = retry_403(Config.CLIENT.create_table)(
- Table(table_id, schema=table_schema)
- )
+ dataset = self.temp_dataset(_make_dataset_id("load_local_parquet_then_dump"))
+ table_ref = dataset.table(TABLE_NAME)
+ table = Table(table_ref)
self.to_delete.insert(0, table)
- job_config = bigquery.LoadJobConfig(schema=table_schema)
- load_job = Config.CLIENT.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
- load_job.result()
+ job_config = bigquery.LoadJobConfig()
+ job_config.source_format = SourceFormat.PARQUET
+ job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
+ job_config.decimal_target_types = [
+ DecimalTargetType.NUMERIC,
+ DecimalTargetType.BIGNUMERIC,
+ DecimalTargetType.STRING,
+ ]
- table = Config.CLIENT.get_table(table)
- self.assertEqual(tuple(table.schema), table_schema)
- self.assertEqual(table.num_rows, num_rows)
+ with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
+ job = Config.CLIENT.load_table_from_file(
+ parquet_file, table_ref, job_config=job_config
+ )
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_required(self):
- """Test that a DataFrame with required columns can be uploaded if a
- BigQuery schema is specified.
+ job.result(timeout=JOB_TIMEOUT) # Retry until done.
- See: https://github.com/googleapis/google-cloud-python/issues/8093
- """
- table_schema = (
- bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
- bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
- )
-
- records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}]
- dataframe = pandas.DataFrame(records, columns=["name", "age"])
- job_config = bigquery.LoadJobConfig(schema=table_schema)
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_required".format(
- Config.CLIENT.project, dataset_id
- )
-
- # Create the table before loading so that schema mismatch errors are
- # identified.
- table = retry_403(Config.CLIENT.create_table)(
- Table(table_id, schema=table_schema)
- )
- self.to_delete.insert(0, table)
-
- job_config = bigquery.LoadJobConfig(schema=table_schema)
- load_job = Config.CLIENT.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
- load_job.result()
+ self.assertEqual(job.output_rows, len(expected_rows))
table = Config.CLIENT.get_table(table)
- self.assertEqual(tuple(table.schema), table_schema)
- self.assertEqual(table.num_rows, 2)
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_load_table_from_dataframe_w_explicit_schema(self):
- # Schema with all scalar types.
- # TODO: Uploading DATETIME columns currently fails, thus that field type
- # is temporarily removed from the test.
- # See:
- # https://github.com/googleapis/python-bigquery/issues/61
- # https://issuetracker.google.com/issues/151765076
- scalars_schema = (
- bigquery.SchemaField("bool_col", "BOOLEAN"),
- bigquery.SchemaField("bytes_col", "BYTES"),
- bigquery.SchemaField("date_col", "DATE"),
- # bigquery.SchemaField("dt_col", "DATETIME"),
- bigquery.SchemaField("float_col", "FLOAT"),
- bigquery.SchemaField("geo_col", "GEOGRAPHY"),
- bigquery.SchemaField("int_col", "INTEGER"),
- bigquery.SchemaField("num_col", "NUMERIC"),
- bigquery.SchemaField("str_col", "STRING"),
- bigquery.SchemaField("time_col", "TIME"),
- bigquery.SchemaField("ts_col", "TIMESTAMP"),
- )
- table_schema = scalars_schema + (
- # TODO: Array columns can't be read due to NULLABLE versus REPEATED
- # mode mismatch. See:
- # https://issuetracker.google.com/133415569#comment3
- # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
- # TODO: Support writing StructArrays to Parquet. See:
- # https://jira.apache.org/jira/browse/ARROW-2587
- # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
- )
- df_data = collections.OrderedDict(
- [
- ("bool_col", [True, None, False]),
- ("bytes_col", [b"abc", None, b"def"]),
- (
- "date_col",
- [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],
- ),
- # (
- # "dt_col",
- # [
- # datetime.datetime(1, 1, 1, 0, 0, 0),
- # None,
- # datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
- # ],
- # ),
- ("float_col", [float("-inf"), float("nan"), float("inf")]),
- (
- "geo_col",
- [
- "POINT(30 10)",
- None,
- "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
- ],
- ),
- ("int_col", [-9223372036854775808, None, 9223372036854775807]),
- (
- "num_col",
- [
- decimal.Decimal("-99999999999999999999999999999.999999999"),
- None,
- decimal.Decimal("99999999999999999999999999999.999999999"),
- ],
- ),
- ("str_col", [u"abc", None, u"def"]),
- (
- "time_col",
- [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
- ),
- (
- "ts_col",
- [
- datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
- None,
- datetime.datetime(
- 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
- ),
- ],
- ),
- ]
- )
- dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format(
- Config.CLIENT.project, dataset_id
- )
-
- job_config = bigquery.LoadJobConfig(schema=table_schema)
- load_job = Config.CLIENT.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
- load_job.result()
-
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(tuple(table.schema), table_schema)
- self.assertEqual(table.num_rows, 3)
-
- @unittest.skipIf(
- pyarrow is None or PYARROW_INSTALLED_VERSION < PYARROW_MINIMUM_VERSION,
- "Only `pyarrow version >=0.17.0` is supported",
- )
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_load_table_from_dataframe_w_struct_datatype(self):
- """Test that a DataFrame with struct datatype can be uploaded if a
- BigQuery schema is specified.
+ rows = self._fetch_single_page(table)
+ row_tuples = [r.values() for r in rows]
+ self.assertEqual(sorted(row_tuples), sorted(expected_rows))
- https://github.com/googleapis/python-bigquery/issues/21
- """
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format(
- Config.CLIENT.project, dataset_id
- )
- table_schema = [
- bigquery.SchemaField(
- "bar",
- "RECORD",
- fields=[
- bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
- bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
- ],
- mode="REQUIRED",
- ),
- ]
- table = retry_403(Config.CLIENT.create_table)(
- Table(table_id, schema=table_schema)
- )
- self.to_delete.insert(0, table)
+ # Forcing the NUMERIC type, however, should result in an error.
+ job_config.decimal_target_types = [DecimalTargetType.NUMERIC]
- df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}]
- dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"])
+ with open(DATA_PATH / "numeric_38_12.parquet", "rb") as parquet_file:
+ job = Config.CLIENT.load_table_from_file(
+ parquet_file, table_ref, job_config=job_config
+ )
- load_job = Config.CLIENT.load_table_from_dataframe(dataframe, table_id)
- load_job.result()
+ with self.assertRaises(BadRequest) as exc_info:
+ job.result(timeout=JOB_TIMEOUT)
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(table.schema, table_schema)
- self.assertEqual(table.num_rows, 3)
+ exc_msg = str(exc_info.exception)
+ self.assertIn("out of valid NUMERIC range", exc_msg)
def test_load_table_from_json_basic_use(self):
table_schema = (
@@ -1141,7 +918,7 @@ def test_load_table_from_json_basic_use(self):
# Create the table before loading so that schema mismatch errors are
# identified.
- table = retry_403(Config.CLIENT.create_table)(
+ table = helpers.retry_403(Config.CLIENT.create_table)(
Table(table_id, schema=table_schema)
)
self.to_delete.insert(0, table)
@@ -1156,140 +933,6 @@ def test_load_table_from_json_basic_use(self):
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(self):
- from google.cloud.bigquery.job import SourceFormat
-
- table_schema = (
- bigquery.SchemaField("bool_col", "BOOLEAN"),
- bigquery.SchemaField("bytes_col", "BYTES"),
- bigquery.SchemaField("date_col", "DATE"),
- bigquery.SchemaField("dt_col", "DATETIME"),
- bigquery.SchemaField("float_col", "FLOAT"),
- bigquery.SchemaField("geo_col", "GEOGRAPHY"),
- bigquery.SchemaField("int_col", "INTEGER"),
- bigquery.SchemaField("num_col", "NUMERIC"),
- bigquery.SchemaField("str_col", "STRING"),
- bigquery.SchemaField("time_col", "TIME"),
- bigquery.SchemaField("ts_col", "TIMESTAMP"),
- )
- df_data = collections.OrderedDict(
- [
- ("bool_col", [True, None, False]),
- ("bytes_col", ["abc", None, "def"]),
- (
- "date_col",
- [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],
- ),
- (
- "dt_col",
- [
- datetime.datetime(1, 1, 1, 0, 0, 0),
- None,
- datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
- ],
- ),
- ("float_col", [float("-inf"), float("nan"), float("inf")]),
- (
- "geo_col",
- [
- "POINT(30 10)",
- None,
- "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
- ],
- ),
- ("int_col", [-9223372036854775808, None, 9223372036854775807]),
- (
- "num_col",
- [
- decimal.Decimal("-99999999999999999999999999999.999999999"),
- None,
- decimal.Decimal("99999999999999999999999999999.999999999"),
- ],
- ),
- ("str_col", [u"abc", None, u"def"]),
- (
- "time_col",
- [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
- ),
- (
- "ts_col",
- [
- datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
- None,
- datetime.datetime(
- 9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc
- ),
- ],
- ),
- ]
- )
- dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format(
- Config.CLIENT.project, dataset_id
- )
-
- job_config = bigquery.LoadJobConfig(
- schema=table_schema, source_format=SourceFormat.CSV
- )
- load_job = Config.CLIENT.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
- load_job.result()
-
- table = Config.CLIENT.get_table(table_id)
- self.assertEqual(tuple(table.schema), table_schema)
- self.assertEqual(table.num_rows, 3)
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(self):
- from google.cloud.bigquery.job import SourceFormat
-
- table_schema = (bigquery.SchemaField("float_col", "FLOAT"),)
- df_data = collections.OrderedDict(
- [
- (
- "float_col",
- [
- 0.14285714285714285,
- 0.51428571485748,
- 0.87128748,
- 1.807960649,
- 2.0679610649,
- 2.4406779661016949,
- 3.7148514257,
- 3.8571428571428572,
- 1.51251252e40,
- ],
- ),
- ]
- )
- dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
-
- dataset_id = _make_dataset_id("bq_load_test")
- self.temp_dataset(dataset_id)
- table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format(
- Config.CLIENT.project, dataset_id
- )
-
- job_config = bigquery.LoadJobConfig(
- schema=table_schema, source_format=SourceFormat.CSV
- )
- load_job = Config.CLIENT.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
- load_job.result()
-
- table = Config.CLIENT.get_table(table_id)
- rows = self._fetch_single_page(table)
- floats = [r.values()[0] for r in rows]
- self.assertEqual(tuple(table.schema), table_schema)
- self.assertEqual(table.num_rows, 9)
- self.assertEqual(floats, df_data["float_col"])
-
def test_load_table_from_json_schema_autodetect(self):
json_rows = [
{"name": "John", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
@@ -1311,7 +954,7 @@ def test_load_table_from_json_schema_autodetect(self):
bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
)
# create the table before loading so that the column order is predictable
- table = retry_403(Config.CLIENT.create_table)(
+ table = helpers.retry_403(Config.CLIENT.create_table)(
Table(table_id, schema=table_schema)
)
self.to_delete.insert(0, table)
@@ -1346,7 +989,7 @@ def test_load_avro_from_uri_then_dump_table(self):
dataset = self.temp_dataset(_make_dataset_id("bq_load_test"))
table_arg = dataset.table(table_name)
- table = retry_403(Config.CLIENT.create_table)(Table(table_arg))
+ table = helpers.retry_403(Config.CLIENT.create_table)(Table(table_arg))
self.to_delete.insert(0, table)
config = bigquery.LoadJobConfig()
@@ -1377,7 +1020,7 @@ def test_load_table_from_uri_then_dump_table(self):
dataset = self.temp_dataset(_make_dataset_id("load_gcs_then_dump"))
table_arg = Table(dataset.table(TABLE_ID), schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
config = bigquery.LoadJobConfig()
@@ -1425,7 +1068,7 @@ def test_load_table_from_file_w_explicit_location(self):
job_id = load_job.job_id
# Can get the job from the EU.
- load_job = client.get_job(job_id, location="EU")
+ load_job = client.get_job(load_job)
self.assertEqual(job_id, load_job.job_id)
self.assertEqual("EU", load_job.location)
self.assertTrue(load_job.exists())
@@ -1442,7 +1085,7 @@ def test_load_table_from_file_w_explicit_location(self):
# Can cancel the job from the EU.
self.assertTrue(load_job.cancel())
- load_job = client.cancel_job(job_id, location="EU")
+ load_job = client.cancel_job(load_job)
self.assertEqual(job_id, load_job.job_id)
self.assertEqual("EU", load_job.location)
@@ -1563,7 +1206,7 @@ def test_extract_table(self):
job.result(timeout=100)
self.to_delete.insert(0, destination)
- got_bytes = retry_storage_errors(destination.download_as_string)()
+ got_bytes = retry_storage_errors(destination.download_as_bytes)()
got = got_bytes.decode("utf-8")
self.assertIn("Bharney Rhubble", got)
@@ -1595,7 +1238,7 @@ def test_get_set_iam_policy(self):
table_ref = Table(dataset.table(table_id))
self.assertFalse(_table_exists(table_ref))
- table = retry_403(Config.CLIENT.create_table)(table_ref)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_ref)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -1620,7 +1263,7 @@ def test_test_iam_permissions(self):
table_ref = Table(dataset.table(table_id))
self.assertFalse(_table_exists(table_ref))
- table = retry_403(Config.CLIENT.create_table)(table_ref)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_ref)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
@@ -1644,7 +1287,7 @@ def test_job_cancel(self):
dataset = self.temp_dataset(DATASET_ID)
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
job = Config.CLIENT.query(QUERY, job_id_prefix=JOB_ID_PREFIX)
@@ -1698,93 +1341,30 @@ def test_get_failed_job(self):
def test_query_w_legacy_sql_types(self):
naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat())
- zoned = naive.replace(tzinfo=UTC)
- examples = [
- {"sql": "SELECT 1", "expected": 1},
- {"sql": "SELECT 1.3", "expected": 1.3},
- {"sql": "SELECT TRUE", "expected": True},
- {"sql": 'SELECT "ABC"', "expected": "ABC"},
- {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
- {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned},
- ]
- for example in examples:
- job_config = bigquery.QueryJobConfig()
- job_config.use_legacy_sql = True
- rows = list(Config.CLIENT.query(example["sql"], job_config=job_config))
- self.assertEqual(len(rows), 1)
- self.assertEqual(len(rows[0]), 1)
- self.assertEqual(rows[0][0], example["expected"])
-
- def _generate_standard_sql_types_examples(self):
- naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
- naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
- stamp = "%s %s" % (naive.date().isoformat(), naive.time().isoformat())
- stamp_microseconds = stamp + ".250000"
- zoned = naive.replace(tzinfo=UTC)
- zoned_microseconds = naive_microseconds.replace(tzinfo=UTC)
- numeric = decimal.Decimal("123456789.123456789")
- return [
- {"sql": "SELECT 1", "expected": 1},
- {"sql": "SELECT 1.3", "expected": 1.3},
- {"sql": "SELECT TRUE", "expected": True},
- {"sql": 'SELECT "ABC"', "expected": "ABC"},
- {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
- {"sql": 'SELECT TIMESTAMP "%s"' % (stamp,), "expected": zoned},
- {
- "sql": 'SELECT TIMESTAMP "%s"' % (stamp_microseconds,),
- "expected": zoned_microseconds,
- },
- {"sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp,), "expected": naive},
- {
- "sql": 'SELECT DATETIME(TIMESTAMP "%s")' % (stamp_microseconds,),
- "expected": naive_microseconds,
- },
- {"sql": 'SELECT DATE(TIMESTAMP "%s")' % (stamp,), "expected": naive.date()},
- {"sql": 'SELECT TIME(TIMESTAMP "%s")' % (stamp,), "expected": naive.time()},
- {"sql": 'SELECT NUMERIC "%s"' % (numeric,), "expected": numeric},
- {"sql": "SELECT (1, 2)", "expected": {"_field_1": 1, "_field_2": 2}},
- {
- "sql": "SELECT ((1, 2), (3, 4), 5)",
- "expected": {
- "_field_1": {"_field_1": 1, "_field_2": 2},
- "_field_2": {"_field_1": 3, "_field_2": 4},
- "_field_3": 5,
- },
- },
- {"sql": "SELECT [1, 2, 3]", "expected": [1, 2, 3]},
- {
- "sql": "SELECT ([1, 2], 3, [4, 5])",
- "expected": {"_field_1": [1, 2], "_field_2": 3, "_field_3": [4, 5]},
- },
- {
- "sql": "SELECT [(1, 2, 3), (4, 5, 6)]",
- "expected": [
- {"_field_1": 1, "_field_2": 2, "_field_3": 3},
- {"_field_1": 4, "_field_2": 5, "_field_3": 6},
- ],
- },
- {
- "sql": "SELECT [([1, 2, 3], 4), ([5, 6], 7)]",
- "expected": [
- {u"_field_1": [1, 2, 3], u"_field_2": 4},
- {u"_field_1": [5, 6], u"_field_2": 7},
- ],
- },
- {
- "sql": "SELECT ARRAY(SELECT STRUCT([1, 2]))",
- "expected": [{u"_field_1": [1, 2]}],
- },
- {"sql": "SELECT ST_GeogPoint(1, 2)", "expected": "POINT(1 2)"},
+ zoned = naive.replace(tzinfo=UTC)
+ examples = [
+ {"sql": "SELECT 1", "expected": 1},
+ {"sql": "SELECT 1.3", "expected": 1.3},
+ {"sql": "SELECT TRUE", "expected": True},
+ {"sql": 'SELECT "ABC"', "expected": "ABC"},
+ {"sql": 'SELECT CAST("foo" AS BYTES)', "expected": b"foo"},
+ {"sql": 'SELECT CAST("%s" AS TIMESTAMP)' % (stamp,), "expected": zoned},
]
-
- def test_query_w_standard_sql_types(self):
- examples = self._generate_standard_sql_types_examples()
for example in examples:
- rows = list(Config.CLIENT.query(example["sql"]))
+ job_config = bigquery.QueryJobConfig()
+ job_config.use_legacy_sql = True
+ rows = list(Config.CLIENT.query(example["sql"], job_config=job_config))
self.assertEqual(len(rows), 1)
self.assertEqual(len(rows[0]), 1)
self.assertEqual(rows[0][0], example["expected"])
+ def test_query_w_standard_sql_types(self):
+ for sql, expected in helpers.STANDARD_SQL_EXAMPLES:
+ rows = list(Config.CLIENT.query(sql))
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(len(rows[0]), 1)
+ self.assertEqual(rows[0][0], expected)
+
def test_query_w_failed_query(self):
from google.api_core.exceptions import BadRequest
@@ -1820,8 +1400,7 @@ def test_query_w_timeout(self):
# Even though the query takes >1 second, the call to getQueryResults
# should succeed.
self.assertFalse(query_job.done(timeout=1))
-
- Config.CLIENT.cancel_job(query_job.job_id, location=query_job.location)
+ self.assertIsNotNone(Config.CLIENT.cancel_job(query_job))
def test_query_w_page_size(self):
page_size = 45
@@ -1921,14 +1500,103 @@ def test_query_statistics(self):
self.assertGreater(stages_with_inputs, 0)
self.assertGreater(len(plan), stages_with_inputs)
+ def test_dml_statistics(self):
+ table_schema = (
+ bigquery.SchemaField("foo", "STRING"),
+ bigquery.SchemaField("bar", "INTEGER"),
+ )
+
+ dataset_id = _make_dataset_id("bq_system_test")
+ self.temp_dataset(dataset_id)
+ table_id = "{}.{}.test_dml_statistics".format(Config.CLIENT.project, dataset_id)
+
+ # Create the table before loading so that the column order is deterministic.
+ table = helpers.retry_403(Config.CLIENT.create_table)(
+ Table(table_id, schema=table_schema)
+ )
+ self.to_delete.insert(0, table)
+
+ # Insert a few rows and check the stats.
+ sql = f"""
+ INSERT INTO `{table_id}`
+ VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4);
+ """
+ query_job = Config.CLIENT.query(sql)
+ query_job.result()
+
+ assert query_job.dml_stats is not None
+ assert query_job.dml_stats.inserted_row_count == 4
+ assert query_job.dml_stats.updated_row_count == 0
+ assert query_job.dml_stats.deleted_row_count == 0
+
+ # Update some of the rows.
+ sql = f"""
+ UPDATE `{table_id}`
+ SET bar = bar + 1
+ WHERE bar > 2;
+ """
+ query_job = Config.CLIENT.query(sql)
+ query_job.result()
+
+ assert query_job.dml_stats is not None
+ assert query_job.dml_stats.inserted_row_count == 0
+ assert query_job.dml_stats.updated_row_count == 2
+ assert query_job.dml_stats.deleted_row_count == 0
+
+ # Now delete a few rows and check the stats.
+ sql = f"""
+ DELETE FROM `{table_id}`
+ WHERE foo != "two";
+ """
+ query_job = Config.CLIENT.query(sql)
+ query_job.result()
+
+ assert query_job.dml_stats is not None
+ assert query_job.dml_stats.inserted_row_count == 0
+ assert query_job.dml_stats.updated_row_count == 0
+ assert query_job.dml_stats.deleted_row_count == 3
+
+ def test_transaction_info(self):
+ table_schema = (
+ bigquery.SchemaField("foo", "STRING"),
+ bigquery.SchemaField("bar", "INTEGER"),
+ )
+
+ dataset_id = _make_dataset_id("bq_system_test")
+ self.temp_dataset(dataset_id)
+ table_id = f"{Config.CLIENT.project}.{dataset_id}.test_dml_statistics"
+
+ # Create the table before loading so that the column order is deterministic.
+ table = helpers.retry_403(Config.CLIENT.create_table)(
+ Table(table_id, schema=table_schema)
+ )
+ self.to_delete.insert(0, table)
+
+ # Insert a few rows and check the stats.
+ sql = f"""
+ BEGIN TRANSACTION;
+ INSERT INTO `{table_id}`
+ VALUES ("one", 1), ("two", 2), ("three", 3), ("four", 4);
+
+ UPDATE `{table_id}`
+ SET bar = bar + 1
+ WHERE bar > 2;
+ COMMIT TRANSACTION;
+ """
+ query_job = Config.CLIENT.query(sql)
+ query_job.result()
+
+ # Transaction ID set by the server should be accessible
+ assert query_job.transaction_info is not None
+ assert query_job.transaction_info.transaction_id != ""
+
def test_dbapi_w_standard_sql_types(self):
- examples = self._generate_standard_sql_types_examples()
- for example in examples:
- Config.CURSOR.execute(example["sql"])
+ for sql, expected in helpers.STANDARD_SQL_EXAMPLES:
+ Config.CURSOR.execute(sql)
self.assertEqual(Config.CURSOR.rowcount, 1)
row = Config.CURSOR.fetchone()
self.assertEqual(len(row), 1)
- self.assertEqual(row[0], example["expected"])
+ self.assertEqual(row[0], expected)
row = Config.CURSOR.fetchone()
self.assertIsNone(row)
@@ -1967,20 +1635,6 @@ def test_dbapi_fetchall_from_script(self):
row_tuples = [r.values() for r in rows]
self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")])
- def test_dbapi_create_view(self):
-
- query = """
- CREATE VIEW {}.dbapi_create_view
- AS SELECT name, SUM(number) AS total
- FROM `bigquery-public-data.usa_names.usa_1910_2013`
- GROUP BY name;
- """.format(
- Config.DATASET
- )
-
- Config.CURSOR.execute(query)
- self.assertEqual(Config.CURSOR.rowcount, 0, "expected 0 rows")
-
@unittest.skipIf(
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
)
@@ -2079,7 +1733,7 @@ def _load_table_for_dml(self, rows, dataset_id, table_id):
greeting = bigquery.SchemaField("greeting", "STRING", mode="NULLABLE")
table_ref = dataset.table(table_id)
table_arg = Table(table_ref, schema=[greeting])
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
with _NamedTemporaryFile() as temp:
@@ -2140,7 +1794,9 @@ def test_query_w_query_params(self):
from google.cloud.bigquery.job import QueryJobConfig
from google.cloud.bigquery.query import ArrayQueryParameter
from google.cloud.bigquery.query import ScalarQueryParameter
+ from google.cloud.bigquery.query import ScalarQueryParameterType
from google.cloud.bigquery.query import StructQueryParameter
+ from google.cloud.bigquery.query import StructQueryParameterType
question = "What is the answer to life, the universe, and everything?"
question_param = ScalarQueryParameter(
@@ -2154,6 +1810,10 @@ def test_query_w_query_params(self):
pi_numeric_param = ScalarQueryParameter(
name="pi_numeric_param", type_="NUMERIC", value=pi_numeric
)
+ bignum = decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38))
+ bignum_param = ScalarQueryParameter(
+ name="bignum_param", type_="BIGNUMERIC", value=bignum
+ )
truthy = True
truthy_param = ScalarQueryParameter(name="truthy", type_="BOOL", value=truthy)
beef = b"DEADBEEF"
@@ -2195,6 +1855,14 @@ def test_query_w_query_params(self):
characters_param = ArrayQueryParameter(
name=None, array_type="RECORD", values=[phred_param, bharney_param]
)
+ empty_struct_array_param = ArrayQueryParameter(
+ name="empty_array_param",
+ values=[],
+ array_type=StructQueryParameterType(
+ ScalarQueryParameterType(name="foo", type_="INT64"),
+ ScalarQueryParameterType(name="bar", type_="STRING"),
+ ),
+ )
hero_param = StructQueryParameter("hero", phred_name_param, phred_age_param)
sidekick_param = StructQueryParameter(
"sidekick", bharney_name_param, bharney_age_param
@@ -2285,6 +1953,11 @@ def test_query_w_query_params(self):
],
"query_parameters": [characters_param],
},
+ {
+ "sql": "SELECT @empty_array_param",
+ "expected": [],
+ "query_parameters": [empty_struct_array_param],
+ },
{
"sql": "SELECT @roles",
"expected": {
@@ -2298,7 +1971,13 @@ def test_query_w_query_params(self):
"expected": {"friends": [phred_name, bharney_name]},
"query_parameters": [with_friends_param],
},
+ {
+ "sql": "SELECT @bignum_param",
+ "expected": bignum,
+ "query_parameters": [bignum_param],
+ },
]
+
for example in examples:
jconfig = QueryJobConfig()
jconfig.query_parameters = example["query_parameters"]
@@ -2424,152 +2103,6 @@ def test_query_iter(self):
row_tuples = [r.values() for r in query_job]
self.assertEqual(row_tuples, [(1,)])
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_query_results_to_dataframe(self):
- QUERY = """
- SELECT id, author, time_ts, dead
- FROM `bigquery-public-data.hacker_news.comments`
- LIMIT 10
- """
-
- df = Config.CLIENT.query(QUERY).result().to_dataframe()
-
- self.assertIsInstance(df, pandas.DataFrame)
- self.assertEqual(len(df), 10) # verify the number of rows
- column_names = ["id", "author", "time_ts", "dead"]
- self.assertEqual(list(df), column_names) # verify the column names
- exp_datatypes = {
- "id": int,
- "author": str,
- "time_ts": pandas.Timestamp,
- "dead": bool,
- }
- for index, row in df.iterrows():
- for col in column_names:
- # all the schema fields are nullable, so None is acceptable
- if not row[col] is None:
- self.assertIsInstance(row[col], exp_datatypes[col])
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(
- bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
- )
- def test_query_results_to_dataframe_w_bqstorage(self):
- query = """
- SELECT id, author, time_ts, dead
- FROM `bigquery-public-data.hacker_news.comments`
- LIMIT 10
- """
-
- bqstorage_client = bigquery_storage.BigQueryReadClient(
- credentials=Config.CLIENT._credentials
- )
-
- df = Config.CLIENT.query(query).result().to_dataframe(bqstorage_client)
-
- self.assertIsInstance(df, pandas.DataFrame)
- self.assertEqual(len(df), 10) # verify the number of rows
- column_names = ["id", "author", "time_ts", "dead"]
- self.assertEqual(list(df), column_names)
- exp_datatypes = {
- "id": int,
- "author": str,
- "time_ts": pandas.Timestamp,
- "dead": bool,
- }
- for index, row in df.iterrows():
- for col in column_names:
- # all the schema fields are nullable, so None is acceptable
- if not row[col] is None:
- self.assertIsInstance(row[col], exp_datatypes[col])
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_insert_rows_from_dataframe(self):
- SF = bigquery.SchemaField
- schema = [
- SF("float_col", "FLOAT", mode="REQUIRED"),
- SF("int_col", "INTEGER", mode="REQUIRED"),
- SF("bool_col", "BOOLEAN", mode="REQUIRED"),
- SF("string_col", "STRING", mode="NULLABLE"),
- ]
-
- dataframe = pandas.DataFrame(
- [
- {
- "float_col": 1.11,
- "bool_col": True,
- "string_col": "my string",
- "int_col": 10,
- },
- {
- "float_col": 2.22,
- "bool_col": False,
- "string_col": "another string",
- "int_col": 20,
- },
- {
- "float_col": 3.33,
- "bool_col": False,
- "string_col": "another string",
- "int_col": 30,
- },
- {
- "float_col": 4.44,
- "bool_col": True,
- "string_col": "another string",
- "int_col": 40,
- },
- {
- "float_col": 5.55,
- "bool_col": False,
- "string_col": "another string",
- "int_col": 50,
- },
- {
- "float_col": 6.66,
- "bool_col": True,
- # Include a NaN value, because pandas often uses NaN as a
- # NULL value indicator.
- "string_col": float("NaN"),
- "int_col": 60,
- },
- ]
- )
-
- table_id = "test_table"
- dataset = self.temp_dataset(_make_dataset_id("issue_7553"))
- table_arg = Table(dataset.table(table_id), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
- self.to_delete.insert(0, table)
-
- chunk_errors = Config.CLIENT.insert_rows_from_dataframe(
- table, dataframe, chunk_size=3
- )
- for errors in chunk_errors:
- assert not errors
-
- # Use query to fetch rows instead of listing directly from the table so
- # that we get values from the streaming buffer.
- rows = list(
- Config.CLIENT.query(
- "SELECT * FROM `{}.{}.{}`".format(
- table.project, table.dataset_id, table.table_id
- )
- )
- )
-
- sorted_rows = sorted(rows, key=operator.attrgetter("int_col"))
- row_tuples = [r.values() for r in sorted_rows]
- expected = [
- tuple(None if col != col else col for col in data_row)
- for data_row in dataframe.itertuples(index=False)
- ]
-
- assert len(row_tuples) == len(expected)
-
- for row, expected_row in zip(row_tuples, expected):
- self.assertCountEqual(row, expected_row) # column order does not matter
-
def test_insert_rows_nested_nested(self):
# See #2951
SF = bigquery.SchemaField
@@ -2600,7 +2133,7 @@ def test_insert_rows_nested_nested(self):
table_id = "test_table"
dataset = self.temp_dataset(_make_dataset_id("issue_2951"))
table_arg = Table(dataset.table(table_id), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
Config.CLIENT.insert_rows(table, to_insert)
@@ -2640,7 +2173,7 @@ def test_insert_rows_nested_nested_dictionary(self):
table_id = "test_table"
dataset = self.temp_dataset(_make_dataset_id("issue_2951"))
table_arg = Table(dataset.table(table_id), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
Config.CLIENT.insert_rows(table, to_insert)
@@ -2651,6 +2184,9 @@ def test_insert_rows_nested_nested_dictionary(self):
expected_rows = [("Some value", record)]
self.assertEqual(row_tuples, expected_rows)
+ @pytest.mark.skipif(
+ MTLS_TESTING, reason="mTLS testing has no permission to the max-value.js file"
+ )
def test_create_routine(self):
routine_name = "test_routine"
dataset = self.temp_dataset(_make_dataset_id("create_routine"))
@@ -2676,23 +2212,103 @@ def test_create_routine(self):
)
]
routine.body = "return maxValue(arr)"
+ routine.determinism_level = bigquery.DeterminismLevel.DETERMINISTIC
query_string = "SELECT `{}`([-100.0, 3.14, 100.0, 42.0]) as max_value;".format(
str(routine.reference)
)
- routine = retry_403(Config.CLIENT.create_routine)(routine)
- query_job = retry_403(Config.CLIENT.query)(query_string)
+ routine = helpers.retry_403(Config.CLIENT.create_routine)(routine)
+ query_job = helpers.retry_403(Config.CLIENT.query)(query_string)
rows = list(query_job.result())
assert len(rows) == 1
assert rows[0].max_value == 100.0
+ def test_create_tvf_routine(self):
+ from google.cloud.bigquery import Routine, RoutineArgument, RoutineType
+
+ StandardSqlDataType = bigquery_v2.types.StandardSqlDataType
+ StandardSqlField = bigquery_v2.types.StandardSqlField
+ StandardSqlTableType = bigquery_v2.types.StandardSqlTableType
+
+ INT64 = StandardSqlDataType.TypeKind.INT64
+ STRING = StandardSqlDataType.TypeKind.STRING
+
+ client = Config.CLIENT
+
+ dataset = self.temp_dataset(_make_dataset_id("create_tvf_routine"))
+ routine_ref = dataset.routine("test_tvf_routine")
+
+ routine_body = """
+ SELECT int_col, str_col
+ FROM (
+ UNNEST([1, 2, 3]) int_col
+ JOIN
+ (SELECT str_col FROM UNNEST(["one", "two", "three"]) str_col)
+ ON TRUE
+ )
+ WHERE int_col > threshold
+ """
+
+ return_table_type = StandardSqlTableType(
+ columns=[
+ StandardSqlField(
+ name="int_col", type=StandardSqlDataType(type_kind=INT64),
+ ),
+ StandardSqlField(
+ name="str_col", type=StandardSqlDataType(type_kind=STRING),
+ ),
+ ]
+ )
+
+ routine_args = [
+ RoutineArgument(
+ name="threshold", data_type=StandardSqlDataType(type_kind=INT64),
+ )
+ ]
+
+ routine_def = Routine(
+ routine_ref,
+ type_=RoutineType.TABLE_VALUED_FUNCTION,
+ arguments=routine_args,
+ return_table_type=return_table_type,
+ body=routine_body,
+ )
+
+ # Create TVF routine.
+ client.delete_routine(routine_ref, not_found_ok=True)
+ routine = client.create_routine(routine_def)
+
+ assert routine.body == routine_body
+ assert routine.return_table_type == return_table_type
+ assert routine.arguments == routine_args
+
+ # Execute the routine to see if it's working as expected.
+ query_job = client.query(
+ f"""
+ SELECT int_col, str_col
+ FROM `{routine.reference}`(1)
+ ORDER BY int_col, str_col ASC
+ """
+ )
+
+ result_rows = [tuple(row) for row in query_job.result()]
+ expected = [
+ (2, "one"),
+ (2, "three"),
+ (2, "two"),
+ (3, "one"),
+ (3, "three"),
+ (3, "two"),
+ ]
+ assert result_rows == expected
+
def test_create_table_rows_fetch_nested_schema(self):
table_name = "test_table"
dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema"))
schema = _load_json_schema()
table_arg = Table(dataset.table(table_name), schema=schema)
- table = retry_403(Config.CLIENT.create_table)(table_arg)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
self.assertTrue(_table_exists(table))
self.assertEqual(table.table_id, table_name)
@@ -2744,9 +2360,6 @@ def test_create_table_rows_fetch_nested_schema(self):
self.assertEqual(found[7], e_favtime)
self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"]))
- def _fetch_dataframe(self, query):
- return Config.CLIENT.query(query).result().to_dataframe()
-
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
@unittest.skipIf(
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
@@ -2797,176 +2410,24 @@ def test_nested_table_to_arrow(self):
self.assertEqual(tbl.num_rows, 1)
self.assertEqual(tbl.num_columns, 3)
# Columns may not appear in the requested order.
- self.assertTrue(
- pyarrow.types.is_float64(tbl.schema.field_by_name("float_col").type)
- )
- self.assertTrue(
- pyarrow.types.is_string(tbl.schema.field_by_name("string_col").type)
- )
- record_col = tbl.schema.field_by_name("record_col").type
+ self.assertTrue(pyarrow.types.is_float64(tbl.schema.field("float_col").type))
+ self.assertTrue(pyarrow.types.is_string(tbl.schema.field("string_col").type))
+ record_col = tbl.schema.field("record_col").type
self.assertTrue(pyarrow.types.is_struct(record_col))
- self.assertEqual(record_col.num_children, 2)
+ self.assertEqual(record_col.num_fields, 2)
self.assertEqual(record_col[0].name, "nested_string")
self.assertTrue(pyarrow.types.is_string(record_col[0].type))
self.assertEqual(record_col[1].name, "nested_repeated")
self.assertTrue(pyarrow.types.is_list(record_col[1].type))
self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type))
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_nested_table_to_dataframe(self):
- from google.cloud.bigquery.job import SourceFormat
- from google.cloud.bigquery.job import WriteDisposition
-
- SF = bigquery.SchemaField
- schema = [
- SF("string_col", "STRING", mode="NULLABLE"),
- SF(
- "record_col",
- "RECORD",
- mode="NULLABLE",
- fields=[
- SF("nested_string", "STRING", mode="NULLABLE"),
- SF("nested_repeated", "INTEGER", mode="REPEATED"),
- SF(
- "nested_record",
- "RECORD",
- mode="NULLABLE",
- fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")],
- ),
- ],
- ),
- SF("bigfloat_col", "FLOAT", mode="NULLABLE"),
- SF("smallfloat_col", "FLOAT", mode="NULLABLE"),
- ]
- record = {
- "nested_string": "another string value",
- "nested_repeated": [0, 1, 2],
- "nested_record": {"nested_nested_string": "some deep insight"},
- }
- to_insert = [
- {
- "string_col": "Some value",
- "record_col": record,
- "bigfloat_col": 3.14,
- "smallfloat_col": 2.72,
- }
- ]
- rows = [json.dumps(row) for row in to_insert]
- body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))
- table_id = "test_table"
- dataset = self.temp_dataset(_make_dataset_id("nested_df"))
- table = dataset.table(table_id)
- self.to_delete.insert(0, table)
- job_config = bigquery.LoadJobConfig()
- job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
- job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
- job_config.schema = schema
- # Load a table using a local JSON file from memory.
- Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()
-
- df = Config.CLIENT.list_rows(table, selected_fields=schema).to_dataframe(
- dtypes={"smallfloat_col": "float16"}
- )
-
- self.assertIsInstance(df, pandas.DataFrame)
- self.assertEqual(len(df), 1) # verify the number of rows
- exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"]
- self.assertEqual(list(df), exp_columns) # verify the column names
- row = df.iloc[0]
- # verify the row content
- self.assertEqual(row["string_col"], "Some value")
- expected_keys = tuple(sorted(record.keys()))
- row_keys = tuple(sorted(row["record_col"].keys()))
- self.assertEqual(row_keys, expected_keys)
- # Can't compare numpy arrays, which pyarrow encodes the embedded
- # repeated column to, so convert to list.
- self.assertEqual(list(row["record_col"]["nested_repeated"]), [0, 1, 2])
- # verify that nested data can be accessed with indices/keys
- self.assertEqual(row["record_col"]["nested_repeated"][0], 0)
- self.assertEqual(
- row["record_col"]["nested_record"]["nested_nested_string"],
- "some deep insight",
- )
- # verify dtypes
- self.assertEqual(df.dtypes["bigfloat_col"].name, "float64")
- self.assertEqual(df.dtypes["smallfloat_col"].name, "float16")
-
- def test_list_rows_empty_table(self):
- from google.cloud.bigquery.table import RowIterator
-
- dataset_id = _make_dataset_id("empty_table")
- dataset = self.temp_dataset(dataset_id)
- table_ref = dataset.table("empty_table")
- table = Config.CLIENT.create_table(bigquery.Table(table_ref))
-
- # It's a bit silly to list rows for an empty table, but this does
- # happen as the result of a DDL query from an IPython magic command.
- rows = Config.CLIENT.list_rows(table)
- self.assertIsInstance(rows, RowIterator)
- self.assertEqual(tuple(rows), ())
-
- def test_list_rows_page_size(self):
- from google.cloud.bigquery.job import SourceFormat
- from google.cloud.bigquery.job import WriteDisposition
-
- num_items = 7
- page_size = 3
- num_pages, num_last_page = divmod(num_items, page_size)
-
- SF = bigquery.SchemaField
- schema = [SF("string_col", "STRING", mode="NULLABLE")]
- to_insert = [{"string_col": "item%d" % i} for i in range(num_items)]
- rows = [json.dumps(row) for row in to_insert]
- body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))
-
- table_id = "test_table"
- dataset = self.temp_dataset(_make_dataset_id("nested_df"))
- table = dataset.table(table_id)
- self.to_delete.insert(0, table)
- job_config = bigquery.LoadJobConfig()
- job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
- job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
- job_config.schema = schema
- # Load a table using a local JSON file from memory.
- Config.CLIENT.load_table_from_file(body, table, job_config=job_config).result()
-
- df = Config.CLIENT.list_rows(table, selected_fields=schema, page_size=page_size)
- pages = df.pages
-
- for i in range(num_pages):
- page = next(pages)
- self.assertEqual(page.num_items, page_size)
- page = next(pages)
- self.assertEqual(page.num_items, num_last_page)
-
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(
- bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
- )
- def test_list_rows_max_results_w_bqstorage(self):
- table_ref = DatasetReference("bigquery-public-data", "utility_us").table(
- "country_code_iso"
- )
- bqstorage_client = bigquery_storage.BigQueryReadClient(
- credentials=Config.CLIENT._credentials
- )
-
- row_iterator = Config.CLIENT.list_rows(
- table_ref,
- selected_fields=[bigquery.SchemaField("country_name", "STRING")],
- max_results=100,
- )
- dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
-
- self.assertEqual(len(dataframe.index), 100)
-
def temp_dataset(self, dataset_id, location=None):
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
dataset = Dataset(dataset_ref)
if location:
dataset.location = location
- dataset = retry_403(Config.CLIENT.create_dataset)(dataset)
+ dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset)
self.to_delete.append(dataset)
return dataset
@@ -2990,3 +2451,108 @@ def _table_exists(t):
return True
except NotFound:
return False
+
+
+def test_dbapi_create_view(dataset_id):
+
+ query = f"""
+ CREATE VIEW {dataset_id}.dbapi_create_view
+ AS SELECT name, SUM(number) AS total
+ FROM `bigquery-public-data.usa_names.usa_1910_2013`
+ GROUP BY name;
+ """
+
+ Config.CURSOR.execute(query)
+ assert Config.CURSOR.rowcount == 0, "expected 0 rows"
+
+
+def test_parameterized_types_round_trip(dataset_id):
+ client = Config.CLIENT
+ table_id = f"{dataset_id}.test_parameterized_types_round_trip"
+ fields = (
+ ("n", "NUMERIC"),
+ ("n9", "NUMERIC(9)"),
+ ("n92", "NUMERIC(9, 2)"),
+ ("bn", "BIGNUMERIC"),
+ ("bn9", "BIGNUMERIC(38)"),
+ ("bn92", "BIGNUMERIC(38, 22)"),
+ ("s", "STRING"),
+ ("s9", "STRING(9)"),
+ ("b", "BYTES"),
+ ("b9", "BYTES(9)"),
+ )
+ client.query(
+ "create table {} ({})".format(table_id, ", ".join(" ".join(f) for f in fields))
+ ).result()
+ table = client.get_table(table_id)
+ table_id2 = table_id + "2"
+ client.create_table(Table(f"{client.project}.{table_id2}", table.schema))
+ table2 = client.get_table(table_id2)
+
+ assert tuple(s._key()[:2] for s in table2.schema) == fields
+
+
+def test_table_snapshots(dataset_id):
+ from google.cloud.bigquery import CopyJobConfig
+ from google.cloud.bigquery import OperationType
+
+ client = Config.CLIENT
+
+ source_table_path = f"{client.project}.{dataset_id}.test_table"
+ snapshot_table_path = f"{source_table_path}_snapshot"
+
+ # Create the table before loading so that the column order is predictable.
+ schema = [
+ bigquery.SchemaField("foo", "INTEGER"),
+ bigquery.SchemaField("bar", "STRING"),
+ ]
+ source_table = helpers.retry_403(Config.CLIENT.create_table)(
+ Table(source_table_path, schema=schema)
+ )
+
+ # Populate the table with initial data.
+ rows = [{"foo": 1, "bar": "one"}, {"foo": 2, "bar": "two"}]
+ load_job = Config.CLIENT.load_table_from_json(rows, source_table)
+ load_job.result()
+
+ # Now create a snapshot before modifying the original table data.
+ copy_config = CopyJobConfig()
+ copy_config.operation_type = OperationType.SNAPSHOT
+
+ copy_job = client.copy_table(
+ sources=source_table_path,
+ destination=snapshot_table_path,
+ job_config=copy_config,
+ )
+ copy_job.result()
+
+ # Modify data in original table.
+ sql = f'INSERT INTO `{source_table_path}`(foo, bar) VALUES (3, "three")'
+ query_job = client.query(sql)
+ query_job.result()
+
+ # List rows from the source table and compare them to rows from the snapshot.
+ rows_iter = client.list_rows(source_table_path)
+ rows = sorted(row.values() for row in rows_iter)
+ assert rows == [(1, "one"), (2, "two"), (3, "three")]
+
+ rows_iter = client.list_rows(snapshot_table_path)
+ rows = sorted(row.values() for row in rows_iter)
+ assert rows == [(1, "one"), (2, "two")]
+
+ # Now restore the table from the snapshot and it should again contain the old
+ # set of rows.
+ copy_config = CopyJobConfig()
+ copy_config.operation_type = OperationType.RESTORE
+ copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
+
+ copy_job = client.copy_table(
+ sources=snapshot_table_path,
+ destination=source_table_path,
+ job_config=copy_config,
+ )
+ copy_job.result()
+
+ rows_iter = client.list_rows(source_table_path)
+ rows = sorted(row.values() for row in rows_iter)
+ assert rows == [(1, "one"), (2, "two")]
diff --git a/tests/system/test_job_retry.py b/tests/system/test_job_retry.py
new file mode 100644
index 000000000..520545493
--- /dev/null
+++ b/tests/system/test_job_retry.py
@@ -0,0 +1,72 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import contextlib
+import threading
+import time
+
+import google.api_core.exceptions
+import google.cloud.bigquery
+import pytest
+
+
+def thread(func):
+ thread = threading.Thread(target=func, daemon=True)
+ thread.start()
+ return thread
+
+
+@pytest.mark.parametrize("job_retry_on_query", [True, False])
+def test_query_retry_539(bigquery_client, dataset_id, job_retry_on_query):
+ """
+ Test job_retry
+
+ See: https://github.com/googleapis/python-bigquery/issues/539
+ """
+ from google.api_core import exceptions
+ from google.api_core.retry import if_exception_type, Retry
+
+ table_name = f"{dataset_id}.t539"
+
+ # Without a custom retry, we fail:
+ with pytest.raises(google.api_core.exceptions.NotFound):
+ bigquery_client.query(f"select count(*) from {table_name}").result()
+
+ retry_notfound = Retry(predicate=if_exception_type(exceptions.NotFound))
+
+ job_retry = dict(job_retry=retry_notfound) if job_retry_on_query else {}
+ job = bigquery_client.query(f"select count(*) from {table_name}", **job_retry)
+ job_id = job.job_id
+
+ # We can already know that the job failed, but we're not supposed
+ # to find out until we call result, which is where retry happend
+ assert job.done()
+ assert job.exception() is not None
+
+ @thread
+ def create_table():
+ time.sleep(1) # Give the first retry attempt time to fail.
+ with contextlib.closing(google.cloud.bigquery.Client()) as client:
+ client.query(f"create table {table_name} (id int64)").result()
+
+ job_retry = {} if job_retry_on_query else dict(job_retry=retry_notfound)
+ [[count]] = list(job.result(**job_retry))
+ assert count == 0
+
+ # The job was retried, and thus got a new job id
+ assert job.job_id != job_id
+
+ # Make sure we don't leave a thread behind:
+ create_table.join()
+ bigquery_client.query(f"drop table {table_name}").result()
diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py
new file mode 100644
index 000000000..70388059e
--- /dev/null
+++ b/tests/system/test_list_rows.py
@@ -0,0 +1,112 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import decimal
+
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
+
+
+def test_list_rows_empty_table(bigquery_client: bigquery.Client, table_id: str):
+ from google.cloud.bigquery.table import RowIterator
+
+ table = bigquery_client.create_table(table_id)
+
+ # It's a bit silly to list rows for an empty table, but this does
+ # happen as the result of a DDL query from an IPython magic command.
+ rows = bigquery_client.list_rows(table)
+ assert isinstance(rows, RowIterator)
+ assert tuple(rows) == ()
+
+
+def test_list_rows_page_size(bigquery_client: bigquery.Client, table_id: str):
+ num_items = 7
+ page_size = 3
+ num_pages, num_last_page = divmod(num_items, page_size)
+
+ to_insert = [{"string_col": "item%d" % i, "rowindex": i} for i in range(num_items)]
+ bigquery_client.load_table_from_json(to_insert, table_id).result()
+
+ df = bigquery_client.list_rows(
+ table_id,
+ selected_fields=[bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING)],
+ page_size=page_size,
+ )
+ pages = df.pages
+
+ for i in range(num_pages):
+ page = next(pages)
+ assert page.num_items == page_size
+ page = next(pages)
+ assert page.num_items == num_last_page
+
+
+def test_list_rows_scalars(bigquery_client: bigquery.Client, scalars_table: str):
+ rows = sorted(
+ bigquery_client.list_rows(scalars_table), key=lambda row: row["rowindex"]
+ )
+ row = rows[0]
+ assert row["bool_col"] # True
+ assert row["bytes_col"] == b"Hello, World!"
+ assert row["date_col"] == datetime.date(2021, 7, 21)
+ assert row["datetime_col"] == datetime.datetime(2021, 7, 21, 11, 39, 45)
+ assert row["geography_col"] == "POINT(-122.0838511 37.3860517)"
+ assert row["int64_col"] == 123456789
+ assert row["numeric_col"] == decimal.Decimal("1.23456789")
+ assert row["bignumeric_col"] == decimal.Decimal("10.111213141516171819")
+ assert row["float64_col"] == 1.25
+ assert row["string_col"] == "Hello, World!"
+ assert row["time_col"] == datetime.time(11, 41, 43, 76160)
+ assert row["timestamp_col"] == datetime.datetime(
+ 2021, 7, 21, 17, 43, 43, 945289, tzinfo=datetime.timezone.utc
+ )
+
+ nullrow = rows[1]
+ for column, value in nullrow.items():
+ if column == "rowindex":
+ assert value == 1
+ else:
+ assert value is None
+
+
+def test_list_rows_scalars_extreme(
+ bigquery_client: bigquery.Client, scalars_extreme_table: str
+):
+ rows = sorted(
+ bigquery_client.list_rows(scalars_extreme_table),
+ key=lambda row: row["rowindex"],
+ )
+ row = rows[0]
+ assert row["bool_col"] # True
+ assert row["bytes_col"] == b"\r\n"
+ assert row["date_col"] == datetime.date(9999, 12, 31)
+ assert row["datetime_col"] == datetime.datetime(9999, 12, 31, 23, 59, 59, 999999)
+ assert row["geography_col"] == "POINT(-135 90)"
+ assert row["int64_col"] == 9223372036854775807
+ assert row["numeric_col"] == decimal.Decimal(f"9.{'9' * 37}E+28")
+ assert row["bignumeric_col"] == decimal.Decimal(f"9.{'9' * 75}E+37")
+ assert row["float64_col"] == float("Inf")
+ assert row["string_col"] == "Hello, World"
+ assert row["time_col"] == datetime.time(23, 59, 59, 999999)
+ assert row["timestamp_col"] == datetime.datetime(
+ 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ )
+
+ nullrow = rows[4]
+ for column, value in nullrow.items():
+ if column == "rowindex":
+ assert value == 4
+ else:
+ assert value is None
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
new file mode 100644
index 000000000..93ce23481
--- /dev/null
+++ b/tests/system/test_pandas.py
@@ -0,0 +1,985 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""System tests for pandas connector."""
+
+import collections
+import datetime
+import decimal
+import json
+import io
+import operator
+
+import google.api_core.retry
+import pkg_resources
+import pytest
+
+from google.cloud import bigquery
+from . import helpers
+
+
+bigquery_storage = pytest.importorskip(
+ "google.cloud.bigquery_storage", minversion="2.0.0"
+)
+pandas = pytest.importorskip("pandas", minversion="0.23.0")
+pyarrow = pytest.importorskip("pyarrow", minversion="1.0.0")
+
+
+PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0")
+
+
+class MissingDataError(Exception):
+ pass
+
+
+def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_id):
+ """Test that a DataFrame with dtypes that map well to BigQuery types
+ can be uploaded without specifying a schema.
+
+ https://github.com/googleapis/google-cloud-python/issues/9044
+ """
+ df_data = collections.OrderedDict(
+ [
+ ("bool_col", pandas.Series([True, False, True], dtype="bool")),
+ (
+ "ts_col",
+ pandas.Series(
+ [
+ datetime.datetime(2010, 1, 2, 3, 44, 50),
+ datetime.datetime(2011, 2, 3, 14, 50, 59),
+ datetime.datetime(2012, 3, 14, 15, 16),
+ ],
+ dtype="datetime64[ns]",
+ ).dt.tz_localize(datetime.timezone.utc),
+ ),
+ (
+ "dt_col",
+ pandas.Series(
+ [
+ datetime.datetime(2010, 1, 2, 3, 44, 50),
+ datetime.datetime(2011, 2, 3, 14, 50, 59),
+ datetime.datetime(2012, 3, 14, 15, 16),
+ ],
+ dtype="datetime64[ns]",
+ ),
+ ),
+ ("float32_col", pandas.Series([1.0, 2.0, 3.0], dtype="float32")),
+ ("float64_col", pandas.Series([4.0, 5.0, 6.0], dtype="float64")),
+ ("int8_col", pandas.Series([-12, -11, -10], dtype="int8")),
+ ("int16_col", pandas.Series([-9, -8, -7], dtype="int16")),
+ ("int32_col", pandas.Series([-6, -5, -4], dtype="int32")),
+ ("int64_col", pandas.Series([-3, -2, -1], dtype="int64")),
+ ("uint8_col", pandas.Series([0, 1, 2], dtype="uint8")),
+ ("uint16_col", pandas.Series([3, 4, 5], dtype="uint16")),
+ ("uint32_col", pandas.Series([6, 7, 8], dtype="uint32")),
+ ]
+ )
+ dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
+
+ table_id = "{}.{}.load_table_from_dataframe_w_automatic_schema".format(
+ bigquery_client.project, dataset_id
+ )
+
+ load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id)
+ load_job.result()
+
+ table = bigquery_client.get_table(table_id)
+ assert tuple(table.schema) == (
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ # BigQuery does not support uploading DATETIME values from
+ # Parquet files. See:
+ # https://github.com/googleapis/google-cloud-python/issues/9996
+ bigquery.SchemaField("dt_col", "TIMESTAMP"),
+ bigquery.SchemaField("float32_col", "FLOAT"),
+ bigquery.SchemaField("float64_col", "FLOAT"),
+ bigquery.SchemaField("int8_col", "INTEGER"),
+ bigquery.SchemaField("int16_col", "INTEGER"),
+ bigquery.SchemaField("int32_col", "INTEGER"),
+ bigquery.SchemaField("int64_col", "INTEGER"),
+ bigquery.SchemaField("uint8_col", "INTEGER"),
+ bigquery.SchemaField("uint16_col", "INTEGER"),
+ bigquery.SchemaField("uint32_col", "INTEGER"),
+ )
+ assert table.num_rows == 3
+
+
+@pytest.mark.skipif(
+ PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION,
+ reason="Only `pandas version >=1.0.0` is supported",
+)
+def test_load_table_from_dataframe_w_nullable_int64_datatype(
+ bigquery_client, dataset_id
+):
+ """Test that a DataFrame containing column with None-type values and int64 datatype
+ can be uploaded if a BigQuery schema is specified.
+
+ https://github.com/googleapis/python-bigquery/issues/22
+ """
+ table_id = "{}.{}.load_table_from_dataframe_w_nullable_int64_datatype".format(
+ bigquery_client.project, dataset_id
+ )
+ table_schema = (bigquery.SchemaField("x", "INTEGER", mode="NULLABLE"),)
+ table = helpers.retry_403(bigquery_client.create_table)(
+ bigquery.Table(table_id, schema=table_schema)
+ )
+
+ df_data = collections.OrderedDict(
+ [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
+ )
+ dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
+ load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id)
+ load_job.result()
+ table = bigquery_client.get_table(table_id)
+ assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),)
+ assert table.num_rows == 4
+
+
+@pytest.mark.skipif(
+ PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION,
+ reason="Only `pandas version >=1.0.0` is supported",
+)
+def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(
+ bigquery_client, dataset_id, table_id
+):
+ """Test that a DataFrame containing column with None-type values and int64 datatype
+ can be uploaded without specifying a schema.
+
+ https://github.com/googleapis/python-bigquery/issues/22
+ """
+
+ df_data = collections.OrderedDict(
+ [("x", pandas.Series([1, 2, None, 4], dtype="Int64"))]
+ )
+ dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
+ load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id)
+ load_job.result()
+ table = bigquery_client.get_table(table_id)
+ assert tuple(table.schema) == (bigquery.SchemaField("x", "INTEGER"),)
+ assert table.num_rows == 4
+
+
+def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
+ """Test that a DataFrame with null columns can be uploaded if a
+ BigQuery schema is specified.
+
+ See: https://github.com/googleapis/google-cloud-python/issues/7370
+ """
+ # Schema with all scalar types.
+ scalars_schema = (
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("bytes_col", "BYTES"),
+ bigquery.SchemaField("date_col", "DATE"),
+ bigquery.SchemaField("dt_col", "DATETIME"),
+ bigquery.SchemaField("float_col", "FLOAT"),
+ bigquery.SchemaField("geo_col", "GEOGRAPHY"),
+ bigquery.SchemaField("int_col", "INTEGER"),
+ bigquery.SchemaField("num_col", "NUMERIC"),
+ bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
+ bigquery.SchemaField("str_col", "STRING"),
+ bigquery.SchemaField("time_col", "TIME"),
+ bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ )
+
+ table_schema = scalars_schema + (
+ # TODO: Array columns can't be read due to NULLABLE versus REPEATED
+ # mode mismatch. See:
+ # https://issuetracker.google.com/133415569#comment3
+ # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
+ # TODO: Support writing StructArrays to Parquet. See:
+ # https://jira.apache.org/jira/browse/ARROW-2587
+ # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
+ )
+ num_rows = 100
+ nulls = [None] * num_rows
+ df_data = [
+ ("bool_col", nulls),
+ ("bytes_col", nulls),
+ ("date_col", nulls),
+ ("dt_col", nulls),
+ ("float_col", nulls),
+ ("geo_col", nulls),
+ ("int_col", nulls),
+ ("num_col", nulls),
+ ("bignum_col", nulls),
+ ("str_col", nulls),
+ ("time_col", nulls),
+ ("ts_col", nulls),
+ ]
+ df_data = collections.OrderedDict(df_data)
+ dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
+
+ table_id = "{}.{}.load_table_from_dataframe_w_nulls".format(
+ bigquery_client.project, dataset_id
+ )
+
+ # Create the table before loading so that schema mismatch errors are
+ # identified.
+ table = helpers.retry_403(bigquery_client.create_table)(
+ bigquery.Table(table_id, schema=table_schema)
+ )
+
+ job_config = bigquery.LoadJobConfig(schema=table_schema)
+ load_job = bigquery_client.load_table_from_dataframe(
+ dataframe, table_id, job_config=job_config
+ )
+ load_job.result()
+
+ table = bigquery_client.get_table(table)
+ assert tuple(table.schema) == table_schema
+ assert table.num_rows == num_rows
+
+
+def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id):
+ """Test that a DataFrame with required columns can be uploaded if a
+ BigQuery schema is specified.
+
+ See: https://github.com/googleapis/google-cloud-python/issues/8093
+ """
+ table_schema = (
+ bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
+ bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ )
+
+ records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}]
+ dataframe = pandas.DataFrame(records, columns=["name", "age"])
+ job_config = bigquery.LoadJobConfig(schema=table_schema)
+ table_id = "{}.{}.load_table_from_dataframe_w_required".format(
+ bigquery_client.project, dataset_id
+ )
+
+ # Create the table before loading so that schema mismatch errors are
+ # identified.
+ table = helpers.retry_403(bigquery_client.create_table)(
+ bigquery.Table(table_id, schema=table_schema)
+ )
+
+ job_config = bigquery.LoadJobConfig(schema=table_schema)
+ load_job = bigquery_client.load_table_from_dataframe(
+ dataframe, table_id, job_config=job_config
+ )
+ load_job.result()
+
+ table = bigquery_client.get_table(table)
+ assert tuple(table.schema) == table_schema
+ assert table.num_rows == 2
+
+
+def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id):
+ # Schema with all scalar types.
+ # See:
+ # https://github.com/googleapis/python-bigquery/issues/61
+ # https://issuetracker.google.com/issues/151765076
+ scalars_schema = (
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("bytes_col", "BYTES"),
+ bigquery.SchemaField("date_col", "DATE"),
+ bigquery.SchemaField("dt_col", "DATETIME"),
+ bigquery.SchemaField("float_col", "FLOAT"),
+ bigquery.SchemaField("geo_col", "GEOGRAPHY"),
+ bigquery.SchemaField("int_col", "INTEGER"),
+ bigquery.SchemaField("num_col", "NUMERIC"),
+ bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
+ bigquery.SchemaField("str_col", "STRING"),
+ bigquery.SchemaField("time_col", "TIME"),
+ bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ )
+
+ table_schema = scalars_schema + (
+ # TODO: Array columns can't be read due to NULLABLE versus REPEATED
+ # mode mismatch. See:
+ # https://issuetracker.google.com/133415569#comment3
+ # bigquery.SchemaField("array_col", "INTEGER", mode="REPEATED"),
+ # TODO: Support writing StructArrays to Parquet. See:
+ # https://jira.apache.org/jira/browse/ARROW-2587
+ # bigquery.SchemaField("struct_col", "RECORD", fields=scalars_schema),
+ )
+
+ df_data = [
+ ("bool_col", [True, None, False]),
+ ("bytes_col", [b"abc", None, b"def"]),
+ ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)]),
+ (
+ "dt_col",
+ [
+ datetime.datetime(1, 1, 1, 0, 0, 0),
+ None,
+ datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+ ],
+ ),
+ ("float_col", [float("-inf"), float("nan"), float("inf")]),
+ (
+ "geo_col",
+ ["POINT(30 10)", None, "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"],
+ ),
+ ("int_col", [-9223372036854775808, None, 9223372036854775807]),
+ (
+ "num_col",
+ [
+ decimal.Decimal("-99999999999999999999999999999.999999999"),
+ None,
+ decimal.Decimal("99999999999999999999999999999.999999999"),
+ ],
+ ),
+ (
+ "bignum_col",
+ [
+ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ None,
+ decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
+ ],
+ ),
+ ("str_col", ["abc", None, "def"]),
+ (
+ "time_col",
+ [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
+ ),
+ (
+ "ts_col",
+ [
+ datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
+ None,
+ datetime.datetime(
+ 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ ),
+ ]
+ df_data = collections.OrderedDict(df_data)
+ dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
+
+ table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema".format(
+ bigquery_client.project, dataset_id
+ )
+
+ job_config = bigquery.LoadJobConfig(schema=table_schema)
+ load_job = bigquery_client.load_table_from_dataframe(
+ dataframe, table_id, job_config=job_config
+ )
+ load_job.result()
+
+ table = bigquery_client.get_table(table_id)
+ assert tuple(table.schema) == table_schema
+ assert table.num_rows == 3
+
+
+def test_load_table_from_dataframe_w_struct_datatype(bigquery_client, dataset_id):
+ """Test that a DataFrame with struct datatype can be uploaded if a
+ BigQuery schema is specified.
+
+ https://github.com/googleapis/python-bigquery/issues/21
+ """
+ table_id = "{}.{}.load_table_from_dataframe_w_struct_datatype".format(
+ bigquery_client.project, dataset_id
+ )
+ table_schema = [
+ bigquery.SchemaField(
+ "bar",
+ "RECORD",
+ fields=[
+ bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
+ bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ],
+ mode="REQUIRED",
+ ),
+ ]
+ table = helpers.retry_403(bigquery_client.create_table)(
+ bigquery.Table(table_id, schema=table_schema)
+ )
+
+ df_data = [{"id": 1, "age": 21}, {"id": 2, "age": 22}, {"id": 2, "age": 23}]
+ dataframe = pandas.DataFrame(data={"bar": df_data}, columns=["bar"])
+
+ load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id)
+ load_job.result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == table_schema
+ assert table.num_rows == 3
+
+
+def test_load_table_from_dataframe_w_explicit_schema_source_format_csv(
+ bigquery_client, dataset_id
+):
+ from google.cloud.bigquery.job import SourceFormat
+
+ table_schema = (
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("bytes_col", "BYTES"),
+ bigquery.SchemaField("date_col", "DATE"),
+ bigquery.SchemaField("dt_col", "DATETIME"),
+ bigquery.SchemaField("float_col", "FLOAT"),
+ bigquery.SchemaField("geo_col", "GEOGRAPHY"),
+ bigquery.SchemaField("int_col", "INTEGER"),
+ bigquery.SchemaField("num_col", "NUMERIC"),
+ bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
+ bigquery.SchemaField("str_col", "STRING"),
+ bigquery.SchemaField("time_col", "TIME"),
+ bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ )
+ df_data = collections.OrderedDict(
+ [
+ ("bool_col", [True, None, False]),
+ ("bytes_col", ["abc", None, "def"]),
+ ("date_col", [datetime.date(1, 1, 1), None, datetime.date(9999, 12, 31)],),
+ (
+ "dt_col",
+ [
+ datetime.datetime(1, 1, 1, 0, 0, 0),
+ None,
+ datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+ ],
+ ),
+ ("float_col", [float("-inf"), float("nan"), float("inf")]),
+ (
+ "geo_col",
+ [
+ "POINT(30 10)",
+ None,
+ "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
+ ],
+ ),
+ ("int_col", [-9223372036854775808, None, 9223372036854775807]),
+ (
+ "num_col",
+ [
+ decimal.Decimal("-99999999999999999999999999999.999999999"),
+ None,
+ decimal.Decimal("99999999999999999999999999999.999999999"),
+ ],
+ ),
+ (
+ "bignum_col",
+ [
+ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ None,
+ decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
+ ],
+ ),
+ ("str_col", ["abc", None, "def"]),
+ (
+ "time_col",
+ [datetime.time(0, 0, 0), None, datetime.time(23, 59, 59, 999999)],
+ ),
+ (
+ "ts_col",
+ [
+ datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
+ None,
+ datetime.datetime(
+ 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ ),
+ ]
+ )
+ dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
+
+ table_id = "{}.{}.load_table_from_dataframe_w_explicit_schema_csv".format(
+ bigquery_client.project, dataset_id
+ )
+
+ job_config = bigquery.LoadJobConfig(
+ schema=table_schema, source_format=SourceFormat.CSV
+ )
+ load_job = bigquery_client.load_table_from_dataframe(
+ dataframe, table_id, job_config=job_config
+ )
+ load_job.result()
+
+ table = bigquery_client.get_table(table_id)
+ assert tuple(table.schema) == table_schema
+ assert table.num_rows == 3
+
+
+def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(
+ bigquery_client, dataset_id, table_id
+):
+ from google.cloud.bigquery.job import SourceFormat
+
+ table_schema = (bigquery.SchemaField("float_col", "FLOAT"),)
+ df_data = collections.OrderedDict(
+ [
+ (
+ "float_col",
+ [
+ 0.14285714285714285,
+ 0.51428571485748,
+ 0.87128748,
+ 1.807960649,
+ 2.0679610649,
+ 2.4406779661016949,
+ 3.7148514257,
+ 3.8571428571428572,
+ 1.51251252e40,
+ ],
+ ),
+ ]
+ )
+ dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
+
+ job_config = bigquery.LoadJobConfig(
+ schema=table_schema, source_format=SourceFormat.CSV
+ )
+ load_job = bigquery_client.load_table_from_dataframe(
+ dataframe, table_id, job_config=job_config
+ )
+ load_job.result()
+
+ table = bigquery_client.get_table(table_id)
+ rows = bigquery_client.list_rows(table_id)
+ floats = [r.values()[0] for r in rows]
+ assert tuple(table.schema) == table_schema
+ assert table.num_rows == 9
+ assert floats == df_data["float_col"]
+
+
+def test_query_results_to_dataframe(bigquery_client):
+ QUERY = """
+ SELECT id, author, time_ts, dead
+ FROM `bigquery-public-data.hacker_news.comments`
+ LIMIT 10
+ """
+
+ df = bigquery_client.query(QUERY).result().to_dataframe()
+
+ assert isinstance(df, pandas.DataFrame)
+ assert len(df) == 10 # verify the number of rows
+ column_names = ["id", "author", "time_ts", "dead"]
+ assert list(df) == column_names # verify the column names
+ exp_datatypes = {
+ "id": int,
+ "author": str,
+ "time_ts": pandas.Timestamp,
+ "dead": bool,
+ }
+ for _, row in df.iterrows():
+ for col in column_names:
+ # all the schema fields are nullable, so None is acceptable
+ if not row[col] is None:
+ assert isinstance(row[col], exp_datatypes[col])
+
+
+def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
+ query = """
+ SELECT id, author, time_ts, dead
+ FROM `bigquery-public-data.hacker_news.comments`
+ LIMIT 10
+ """
+
+ bqstorage_client = bigquery_storage.BigQueryReadClient(
+ credentials=bigquery_client._credentials
+ )
+
+ df = bigquery_client.query(query).result().to_dataframe(bqstorage_client)
+
+ assert isinstance(df, pandas.DataFrame)
+ assert len(df) == 10 # verify the number of rows
+ column_names = ["id", "author", "time_ts", "dead"]
+ assert list(df) == column_names
+ exp_datatypes = {
+ "id": int,
+ "author": str,
+ "time_ts": pandas.Timestamp,
+ "dead": bool,
+ }
+ for index, row in df.iterrows():
+ for col in column_names:
+ # all the schema fields are nullable, so None is acceptable
+ if not row[col] is None:
+ assert isinstance(row[col], exp_datatypes[col])
+
+
+def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
+ SF = bigquery.SchemaField
+ schema = [
+ SF("float_col", "FLOAT", mode="REQUIRED"),
+ SF("int_col", "INTEGER", mode="REQUIRED"),
+ SF("bool_col", "BOOLEAN", mode="REQUIRED"),
+ SF("string_col", "STRING", mode="NULLABLE"),
+ ]
+
+ dataframe = pandas.DataFrame(
+ [
+ {
+ "float_col": 1.11,
+ "bool_col": True,
+ "string_col": "my string",
+ "int_col": 10,
+ },
+ {
+ "float_col": 2.22,
+ "bool_col": False,
+ "string_col": "another string",
+ "int_col": 20,
+ },
+ {
+ "float_col": 3.33,
+ "bool_col": False,
+ "string_col": "another string",
+ "int_col": 30,
+ },
+ {
+ "float_col": 4.44,
+ "bool_col": True,
+ "string_col": "another string",
+ "int_col": 40,
+ },
+ {
+ "float_col": 5.55,
+ "bool_col": False,
+ "string_col": "another string",
+ "int_col": 50,
+ },
+ {
+ "float_col": 6.66,
+ "bool_col": True,
+ # Include a NaN value, because pandas often uses NaN as a
+ # NULL value indicator.
+ "string_col": float("NaN"),
+ "int_col": 60,
+ },
+ ]
+ )
+
+ table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe"
+ table_arg = bigquery.Table(table_id, schema=schema)
+ table = helpers.retry_403(bigquery_client.create_table)(table_arg)
+
+ chunk_errors = bigquery_client.insert_rows_from_dataframe(
+ table, dataframe, chunk_size=3
+ )
+ for errors in chunk_errors:
+ assert not errors
+ expected = [
+ # Pandas often represents NULL values as NaN. Convert to None for
+ # easier comparison.
+ tuple(None if col != col else col for col in data_row)
+ for data_row in dataframe.itertuples(index=False)
+ ]
+
+ # Use query to fetch rows instead of listing directly from the table so
+ # that we get values from the streaming buffer "within a few seconds".
+ # https://cloud.google.com/bigquery/streaming-data-into-bigquery#dataavailability
+ @google.api_core.retry.Retry(
+ predicate=google.api_core.retry.if_exception_type(MissingDataError)
+ )
+ def get_rows():
+ rows = list(
+ bigquery_client.query(
+ "SELECT * FROM `{}.{}.{}`".format(
+ table.project, table.dataset_id, table.table_id
+ )
+ )
+ )
+ if len(rows) != len(expected):
+ raise MissingDataError()
+ return rows
+
+ rows = get_rows()
+ sorted_rows = sorted(rows, key=operator.attrgetter("int_col"))
+ row_tuples = [r.values() for r in sorted_rows]
+
+ for row, expected_row in zip(row_tuples, expected):
+ assert (
+ # Use Counter to verify the same number of values in each, because
+ # column order does not matter.
+ collections.Counter(row)
+ == collections.Counter(expected_row)
+ )
+
+
+def test_nested_table_to_dataframe(bigquery_client, dataset_id):
+ from google.cloud.bigquery.job import SourceFormat
+ from google.cloud.bigquery.job import WriteDisposition
+
+ SF = bigquery.SchemaField
+ schema = [
+ SF("string_col", "STRING", mode="NULLABLE"),
+ SF(
+ "record_col",
+ "RECORD",
+ mode="NULLABLE",
+ fields=[
+ SF("nested_string", "STRING", mode="NULLABLE"),
+ SF("nested_repeated", "INTEGER", mode="REPEATED"),
+ SF(
+ "nested_record",
+ "RECORD",
+ mode="NULLABLE",
+ fields=[SF("nested_nested_string", "STRING", mode="NULLABLE")],
+ ),
+ ],
+ ),
+ SF("bigfloat_col", "FLOAT", mode="NULLABLE"),
+ SF("smallfloat_col", "FLOAT", mode="NULLABLE"),
+ ]
+ record = {
+ "nested_string": "another string value",
+ "nested_repeated": [0, 1, 2],
+ "nested_record": {"nested_nested_string": "some deep insight"},
+ }
+ to_insert = [
+ {
+ "string_col": "Some value",
+ "record_col": record,
+ "bigfloat_col": 3.14,
+ "smallfloat_col": 2.72,
+ }
+ ]
+ rows = [json.dumps(row) for row in to_insert]
+ body = io.BytesIO("{}\n".format("\n".join(rows)).encode("ascii"))
+ table_id = f"{bigquery_client.project}.{dataset_id}.test_nested_table_to_dataframe"
+ job_config = bigquery.LoadJobConfig()
+ job_config.write_disposition = WriteDisposition.WRITE_TRUNCATE
+ job_config.source_format = SourceFormat.NEWLINE_DELIMITED_JSON
+ job_config.schema = schema
+ # Load a table using a local JSON file from memory.
+ bigquery_client.load_table_from_file(body, table_id, job_config=job_config).result()
+
+ df = bigquery_client.list_rows(table_id, selected_fields=schema).to_dataframe(
+ dtypes={"smallfloat_col": "float16"}
+ )
+
+ assert isinstance(df, pandas.DataFrame)
+ assert len(df) == 1 # verify the number of rows
+ exp_columns = ["string_col", "record_col", "bigfloat_col", "smallfloat_col"]
+ assert list(df) == exp_columns # verify the column names
+ row = df.iloc[0]
+ # verify the row content
+ assert row["string_col"] == "Some value"
+ expected_keys = tuple(sorted(record.keys()))
+ row_keys = tuple(sorted(row["record_col"].keys()))
+ assert row_keys == expected_keys
+ # Can't compare numpy arrays, which pyarrow encodes the embedded
+ # repeated column to, so convert to list.
+ assert list(row["record_col"]["nested_repeated"]) == [0, 1, 2]
+ # verify that nested data can be accessed with indices/keys
+ assert row["record_col"]["nested_repeated"][0] == 0
+ assert (
+ row["record_col"]["nested_record"]["nested_nested_string"]
+ == "some deep insight"
+ )
+ # verify dtypes
+ assert df.dtypes["bigfloat_col"].name == "float64"
+ assert df.dtypes["smallfloat_col"].name == "float16"
+
+
+def test_list_rows_max_results_w_bqstorage(bigquery_client):
+ table_ref = bigquery.DatasetReference("bigquery-public-data", "utility_us").table(
+ "country_code_iso"
+ )
+ bqstorage_client = bigquery_storage.BigQueryReadClient(
+ credentials=bigquery_client._credentials
+ )
+
+ row_iterator = bigquery_client.list_rows(
+ table_ref,
+ selected_fields=[bigquery.SchemaField("country_name", "STRING")],
+ max_results=100,
+ )
+ with pytest.warns(
+ UserWarning, match="Cannot use bqstorage_client if max_results is set"
+ ):
+ dataframe = row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
+
+ assert len(dataframe.index) == 100
+
+
+def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
+ df = pandas.DataFrame(
+ dict(
+ dt=[
+ datetime.datetime(2020, 1, 8, 8, 0, 0),
+ datetime.datetime(
+ 2020,
+ 1,
+ 8,
+ 8,
+ 0,
+ 0,
+ tzinfo=datetime.timezone(datetime.timedelta(hours=-7)),
+ ),
+ ],
+ t=[datetime.time(0, 0, 10, 100001), None],
+ )
+ )
+ table = f"{dataset_id}.test_upload_time_and_datetime"
+ bigquery_client.load_table_from_dataframe(df, table).result()
+ data = list(map(list, bigquery_client.list_rows(table)))
+ assert data == [
+ [
+ datetime.datetime(2020, 1, 8, 8, 0, tzinfo=datetime.timezone.utc),
+ datetime.time(0, 0, 10, 100001),
+ ],
+ [datetime.datetime(2020, 1, 8, 15, 0, tzinfo=datetime.timezone.utc), None],
+ ]
+
+ from google.cloud.bigquery import job, schema
+
+ table = f"{dataset_id}.test_upload_time_and_datetime_dt"
+ config = job.LoadJobConfig(
+ schema=[schema.SchemaField("dt", "DATETIME"), schema.SchemaField("t", "TIME")]
+ )
+
+ bigquery_client.load_table_from_dataframe(df, table, job_config=config).result()
+ data = list(map(list, bigquery_client.list_rows(table)))
+ assert data == [
+ [datetime.datetime(2020, 1, 8, 8, 0), datetime.time(0, 0, 10, 100001)],
+ [datetime.datetime(2020, 1, 8, 15, 0), None],
+ ]
+
+
+def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ bigquery_client.query(
+ f"create table {dataset_id}.lake (name string, geog geography)"
+ ).result()
+ bigquery_client.query(
+ f"""
+ insert into {dataset_id}.lake (name, geog) values
+ ('foo', st_geogfromtext('point(0 0)')),
+ ('bar', st_geogfromtext('point(0 1)')),
+ ('baz', null)
+ """
+ ).result()
+ df = bigquery_client.query(
+ f"select * from {dataset_id}.lake order by name"
+ ).to_dataframe(geography_as_object=True)
+ assert list(df["name"]) == ["bar", "baz", "foo"]
+ assert df["geog"][0] == wkt.loads("point(0 1)")
+ assert pandas.isna(df["geog"][1])
+ assert df["geog"][2] == wkt.loads("point(0 0)")
+
+
+def test_to_geodataframe(bigquery_client, dataset_id):
+ geopandas = pytest.importorskip("geopandas")
+ from shapely import wkt
+
+ bigquery_client.query(
+ f"create table {dataset_id}.geolake (name string, geog geography)"
+ ).result()
+ bigquery_client.query(
+ f"""
+ insert into {dataset_id}.geolake (name, geog) values
+ ('foo', st_geogfromtext('point(0 0)')),
+ ('bar', st_geogfromtext('polygon((0 0, 1 0, 1 1, 0 0))')),
+ ('baz', null)
+ """
+ ).result()
+ df = bigquery_client.query(
+ f"select * from {dataset_id}.geolake order by name"
+ ).to_geodataframe()
+ assert df["geog"][0] == wkt.loads("polygon((0 0, 1 0, 1 1, 0 0))")
+ assert pandas.isna(df["geog"][1])
+ assert df["geog"][2] == wkt.loads("point(0 0)")
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert isinstance(df["geog"], geopandas.GeoSeries)
+ assert df.area[0] == 0.5
+ assert pandas.isna(df.area[1])
+ assert df.area[2] == 0.0
+ assert df.crs.srs == "EPSG:4326"
+ assert df.crs.name == "WGS 84"
+ assert df.geog.crs.srs == "EPSG:4326"
+ assert df.geog.crs.name == "WGS 84"
+
+
+def test_load_geodataframe(bigquery_client, dataset_id):
+ geopandas = pytest.importorskip("geopandas")
+ import pandas
+ from shapely import wkt
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = geopandas.GeoDataFrame(
+ pandas.DataFrame(
+ dict(
+ name=["foo", "bar"],
+ geo1=[None, None],
+ geo2=[None, wkt.loads("Point(1 1)")],
+ )
+ ),
+ geometry="geo1",
+ )
+
+ table_id = f"{dataset_id}.lake_from_gp"
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo1", "GEOGRAPHY", "NULLABLE"),
+ SchemaField("geo2", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", None, "POINT(1 1)"],
+ ["foo", None, None],
+ ]
+
+
+def test_load_dataframe_w_shapely(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = pandas.DataFrame(
+ dict(name=["foo", "bar"], geo=[None, wkt.loads("Point(1 1)")])
+ )
+
+ table_id = f"{dataset_id}.lake_from_shapes"
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ]
+
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ["foo", None],
+ ]
+
+
+def test_load_dataframe_w_wkb(bigquery_client, dataset_id):
+ wkt = pytest.importorskip("shapely.wkt")
+ from shapely import wkb
+ from google.cloud.bigquery.schema import SchemaField
+
+ df = pandas.DataFrame(
+ dict(name=["foo", "bar"], geo=[None, wkb.dumps(wkt.loads("Point(1 1)"))])
+ )
+
+ table_id = f"{dataset_id}.lake_from_wkb"
+ # We create the table first, to inform the interpretation of the wkb data
+ bigquery_client.query(
+ f"create table {table_id} (name string, geo GEOGRAPHY)"
+ ).result()
+ bigquery_client.load_table_from_dataframe(df, table_id).result()
+
+ table = bigquery_client.get_table(table_id)
+ assert table.schema == [
+ SchemaField("name", "STRING", "NULLABLE"),
+ SchemaField("geo", "GEOGRAPHY", "NULLABLE"),
+ ]
+ assert sorted(map(list, bigquery_client.list_rows(table_id))) == [
+ ["bar", "POINT(1 1)"],
+ ["foo", None],
+ ]
diff --git a/tests/system/test_structs.py b/tests/system/test_structs.py
new file mode 100644
index 000000000..20740f614
--- /dev/null
+++ b/tests/system/test_structs.py
@@ -0,0 +1,31 @@
+import datetime
+
+import pytest
+
+from google.cloud.bigquery.dbapi import connect
+
+person_type = "struct>>"
+person_type_sized = (
+ "struct>>"
+)
+
+
+@pytest.mark.parametrize("person_type_decl", [person_type, person_type_sized])
+def test_structs(bigquery_client, dataset_id, person_type_decl, table_id):
+ conn = connect(bigquery_client)
+ cursor = conn.cursor()
+ cursor.execute(f"create table {table_id} (person {person_type_decl})")
+ data = dict(
+ name="par",
+ children=[
+ dict(name="ch1", bdate=datetime.date(2021, 1, 1)),
+ dict(name="ch2", bdate=datetime.date(2021, 1, 2)),
+ ],
+ )
+ cursor.execute(
+ f"insert into {table_id} (person) values (%(v:{person_type})s)", dict(v=data),
+ )
+
+ cursor.execute(f"select * from {table_id}")
+ [[result]] = list(cursor)
+ assert result == data
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
index df379f1e9..4de65971c 100644
--- a/tests/unit/__init__.py
+++ b/tests/unit/__init__.py
@@ -1,4 +1,5 @@
-# Copyright 2016 Google LLC
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -11,3 +12,4 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+#
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
new file mode 100644
index 000000000..7a67ea6b5
--- /dev/null
+++ b/tests/unit/conftest.py
@@ -0,0 +1,37 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from .helpers import make_client
+
+
+@pytest.fixture
+def client():
+ yield make_client()
+
+
+@pytest.fixture
+def PROJECT():
+ yield "PROJECT"
+
+
+@pytest.fixture
+def DS_ID():
+ yield "DATASET_ID"
+
+
+@pytest.fixture
+def LOCATION():
+ yield "us-central"
diff --git a/tests/unit/gapic/__init__.py b/tests/unit/gapic/__init__.py
new file mode 100644
index 000000000..4de65971c
--- /dev/null
+++ b/tests/unit/gapic/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py
index eea345e89..67aeaca35 100644
--- a/tests/unit/helpers.py
+++ b/tests/unit/helpers.py
@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import google.cloud.bigquery.client
+import google.cloud.bigquery.dataset
+import mock
+import pytest
+
def make_connection(*responses):
import google.cloud.bigquery._http
@@ -21,6 +26,8 @@ def make_connection(*responses):
mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection)
mock_conn.user_agent = "testing 1.2.3"
mock_conn.api_request.side_effect = list(responses) + [NotFound("miss")]
+ mock_conn.API_BASE_URL = "https://bigquery.googleapis.com"
+ mock_conn.get_api_base_url_for_mtls = mock.Mock(return_value=mock_conn.API_BASE_URL)
return mock_conn
@@ -29,3 +36,47 @@ def _to_pyarrow(value):
import pyarrow
return pyarrow.array([value])[0]
+
+
+def make_client(project="PROJECT", **kw):
+ credentials = mock.Mock(spec=google.auth.credentials.Credentials)
+ return google.cloud.bigquery.client.Client(project, credentials, **kw)
+
+
+def make_dataset_reference_string(project, ds_id):
+ return f"{project}.{ds_id}"
+
+
+def make_dataset(project, ds_id):
+ return google.cloud.bigquery.dataset.Dataset(
+ google.cloud.bigquery.dataset.DatasetReference(project, ds_id)
+ )
+
+
+def make_dataset_list_item(project, ds_id):
+ return google.cloud.bigquery.dataset.DatasetListItem(
+ dict(datasetReference=dict(projectId=project, datasetId=ds_id))
+ )
+
+
+def identity(x):
+ return x
+
+
+def get_reference(x):
+ return x.reference
+
+
+dataset_like = [
+ (google.cloud.bigquery.dataset.DatasetReference, identity),
+ (make_dataset, identity),
+ (make_dataset_list_item, get_reference),
+ (
+ make_dataset_reference_string,
+ google.cloud.bigquery.dataset.DatasetReference.from_string,
+ ),
+]
+
+dataset_polymorphic = pytest.mark.parametrize(
+ "make_dataset,get_reference", dataset_like
+)
diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py
index ea071c5ac..c792214e7 100644
--- a/tests/unit/job/helpers.py
+++ b/tests/unit/job/helpers.py
@@ -162,6 +162,7 @@ def _verifyInitialReadonlyProperties(self, job):
self.assertIsNone(job.created)
self.assertIsNone(job.started)
self.assertIsNone(job.ended)
+ self.assertIsNone(job.transaction_info)
# derived from resource['status']
self.assertIsNone(job.error_result)
diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py
index 44bbc2c77..c3f7854e3 100644
--- a/tests/unit/job/test_base.py
+++ b/tests/unit/job/test_base.py
@@ -227,6 +227,20 @@ def test_script_statistics(self):
self.assertEqual(stack_frame.end_column, 14)
self.assertEqual(stack_frame.text, "QUERY TEXT")
+ def test_transaction_info(self):
+ from google.cloud.bigquery.job.base import TransactionInfo
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, client)
+ assert job.transaction_info is None
+
+ statistics = job._properties["statistics"] = {}
+ assert job.transaction_info is None
+
+ statistics["transactionInfo"] = {"transactionId": "123-abc-xyz"}
+ assert isinstance(job.transaction_info, TransactionInfo)
+ assert job.transaction_info.transaction_id == "123-abc-xyz"
+
def test_num_child_jobs(self):
client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, client)
@@ -281,11 +295,11 @@ def test_user_email(self):
@staticmethod
def _datetime_and_millis():
import datetime
- import pytz
from google.cloud._helpers import _millis
now = datetime.datetime.utcnow().replace(
- microsecond=123000, tzinfo=pytz.UTC # stats timestamps have ms precision
+ microsecond=123000,
+ tzinfo=datetime.timezone.utc, # stats timestamps have ms precision
)
return now, _millis(now)
@@ -319,6 +333,30 @@ def test_ended(self):
stats["endTime"] = millis
self.assertEqual(job.ended, now)
+ def test_reservation_usage_no_stats(self):
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, client)
+ job._properties["statistics"] = {}
+ self.assertEqual(job.reservation_usage, [])
+
+ def test_reservation_usage_stats_exist(self):
+ from google.cloud.bigquery.job import ReservationUsage
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, client)
+ job._properties["statistics"] = {
+ "reservationUsage": [
+ {"name": "slot_foo", "slotMs": "42"},
+ {"name": "slot_bar", "slotMs": "123"},
+ ],
+ }
+
+ expected = [
+ ReservationUsage(name="slot_foo", slot_ms=42),
+ ReservationUsage(name="slot_bar", slot_ms=123),
+ ]
+ self.assertEqual(job.reservation_usage, expected)
+
def test__job_statistics(self):
statistics = {"foo": "bar"}
client = _make_client(project=self.PROJECT)
@@ -943,7 +981,7 @@ def test_result_w_retry_wo_state(self):
custom_predicate = mock.Mock()
custom_predicate.return_value = True
custom_retry = google.api_core.retry.Retry(
- predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.001,
+ predicate=custom_predicate, initial=0.001, maximum=0.001, deadline=0.1,
)
self.assertIs(job.result(retry=custom_retry), job)
diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py
index fb0c87391..992efcf6b 100644
--- a/tests/unit/job/test_copy.py
+++ b/tests/unit/job/test_copy.py
@@ -28,18 +28,34 @@ def _get_target_class():
return CopyJobConfig
+ def test_ctor_defaults(self):
+ from google.cloud.bigquery.job import OperationType
+
+ config = self._make_one()
+
+ assert config.create_disposition is None
+ assert config.write_disposition is None
+ assert config.destination_encryption_configuration is None
+ assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED
+
def test_ctor_w_properties(self):
from google.cloud.bigquery.job import CreateDisposition
+ from google.cloud.bigquery.job import OperationType
from google.cloud.bigquery.job import WriteDisposition
create_disposition = CreateDisposition.CREATE_NEVER
write_disposition = WriteDisposition.WRITE_TRUNCATE
+ snapshot_operation = OperationType.SNAPSHOT
+
config = self._get_target_class()(
- create_disposition=create_disposition, write_disposition=write_disposition
+ create_disposition=create_disposition,
+ write_disposition=write_disposition,
+ operation_type=snapshot_operation,
)
self.assertEqual(config.create_disposition, create_disposition)
self.assertEqual(config.write_disposition, write_disposition)
+ self.assertEqual(config.operation_type, snapshot_operation)
def test_to_api_repr_with_encryption(self):
from google.cloud.bigquery.encryption_configuration import (
@@ -70,6 +86,22 @@ def test_to_api_repr_with_encryption_none(self):
resource, {"copy": {"destinationEncryptionConfiguration": None}}
)
+ def test_operation_type_setting_none(self):
+ from google.cloud.bigquery.job import OperationType
+
+ config = self._make_one(operation_type=OperationType.SNAPSHOT)
+
+ # Setting it to None is the same as setting it to OPERATION_TYPE_UNSPECIFIED.
+ config.operation_type = None
+ assert config.operation_type == OperationType.OPERATION_TYPE_UNSPECIFIED
+
+ def test_operation_type_setting_non_none(self):
+ from google.cloud.bigquery.job import OperationType
+
+ config = self._make_one(operation_type=None)
+ config.operation_type = OperationType.RESTORE
+ assert config.operation_type == OperationType.RESTORE
+
class TestCopyJob(_Base):
JOB_TYPE = "copy"
diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py
index c18f51bff..cbe087dac 100644
--- a/tests/unit/job/test_load_config.py
+++ b/tests/unit/job/test_load_config.py
@@ -122,6 +122,45 @@ def test_create_disposition_setter(self):
config.create_disposition = disposition
self.assertEqual(config._properties["load"]["createDisposition"], disposition)
+ def test_decimal_target_types_miss(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.decimal_target_types)
+
+ def test_decimal_target_types_hit(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ config = self._get_target_class()()
+ decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]
+ config._properties["load"]["decimalTargetTypes"] = decimal_target_types
+
+ expected = frozenset(decimal_target_types)
+ self.assertEqual(config.decimal_target_types, expected)
+
+ def test_decimal_target_types_setter(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ decimal_target_types = (DecimalTargetType.NUMERIC, DecimalTargetType.BIGNUMERIC)
+ config = self._get_target_class()()
+ config.decimal_target_types = decimal_target_types
+ self.assertEqual(
+ config._properties["load"]["decimalTargetTypes"],
+ list(decimal_target_types),
+ )
+
+ def test_decimal_target_types_setter_w_none(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ config = self._get_target_class()()
+ decimal_target_types = [DecimalTargetType.BIGNUMERIC]
+ config._properties["load"]["decimalTargetTypes"] = decimal_target_types
+
+ config.decimal_target_types = None
+
+ self.assertIsNone(config.decimal_target_types)
+ self.assertNotIn("decimalTargetTypes", config._properties["load"])
+
+ config.decimal_target_types = None # No error if unsetting an unset property.
+
def test_destination_encryption_configuration_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.destination_encryption_configuration)
@@ -385,6 +424,17 @@ def test_null_marker_setter(self):
config.null_marker = null_marker
self.assertEqual(config._properties["load"]["nullMarker"], null_marker)
+ def test_projection_fields_miss(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.projection_fields)
+
+ def test_projection_fields_hit(self):
+ config = self._get_target_class()()
+ fields = ["email", "postal_code"]
+ config.projection_fields = fields
+ self.assertEqual(config._properties["load"]["projectionFields"], fields)
+ self.assertEqual(config.projection_fields, fields)
+
def test_quote_character_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.quote_character)
@@ -434,13 +484,13 @@ def test_schema_setter_fields(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
}
self.assertEqual(
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
@@ -449,24 +499,20 @@ def test_schema_setter_fields(self):
def test_schema_setter_valid_mappings_list(self):
config = self._get_target_class()()
- schema = [
- {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
- {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
- ]
- config.schema = schema
-
full_name_repr = {
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
}
age_repr = {
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
}
+ schema = [full_name_repr, age_repr]
+ config.schema = schema
self.assertEqual(
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
)
@@ -708,3 +754,38 @@ def test_write_disposition_setter(self):
self.assertEqual(
config._properties["load"]["writeDisposition"], write_disposition
)
+
+ def test_parquet_options_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.parquet_options)
+
+ def test_parquet_options_hit(self):
+ config = self._get_target_class()()
+ config._properties["load"]["parquetOptions"] = dict(
+ enumAsString=True, enableListInference=False
+ )
+ self.assertTrue(config.parquet_options.enum_as_string)
+ self.assertFalse(config.parquet_options.enable_list_inference)
+
+ def test_parquet_options_setter(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ parquet_options = ParquetOptions.from_api_repr(
+ dict(enumAsString=False, enableListInference=True)
+ )
+ config = self._get_target_class()()
+
+ config.parquet_options = parquet_options
+ self.assertEqual(
+ config._properties["load"]["parquetOptions"],
+ {"enumAsString": False, "enableListInference": True},
+ )
+
+ def test_parquet_options_setter_clearing(self):
+ config = self._get_target_class()()
+ config._properties["load"]["parquetOptions"] = dict(
+ enumAsString=False, enableListInference=True
+ )
+
+ config.parquet_options = None
+ self.assertNotIn("parquetOptions", config._properties["load"])
diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py
index a4ab11ab6..d41370520 100644
--- a/tests/unit/job/test_query.py
+++ b/tests/unit/job/test_query.py
@@ -16,6 +16,7 @@
import copy
import http
import textwrap
+import types
import freezegun
from google.api_core import exceptions
@@ -109,6 +110,36 @@ def _verify_table_definitions(self, job, config):
self.assertIsNotNone(expected_ec)
self.assertEqual(found_ec.to_api_repr(), expected_ec)
+ def _verify_dml_stats_resource_properties(self, job, resource):
+ query_stats = resource.get("statistics", {}).get("query", {})
+
+ if "dmlStats" in query_stats:
+ resource_dml_stats = query_stats["dmlStats"]
+ job_dml_stats = job.dml_stats
+ assert str(job_dml_stats.inserted_row_count) == resource_dml_stats.get(
+ "insertedRowCount", "0"
+ )
+ assert str(job_dml_stats.updated_row_count) == resource_dml_stats.get(
+ "updatedRowCount", "0"
+ )
+ assert str(job_dml_stats.deleted_row_count) == resource_dml_stats.get(
+ "deletedRowCount", "0"
+ )
+ else:
+ assert job.dml_stats is None
+
+ def _verify_transaction_info_resource_properties(self, job, resource):
+ resource_stats = resource.get("statistics", {})
+
+ if "transactionInfo" in resource_stats:
+ resource_transaction_info = resource_stats["transactionInfo"]
+ job_transaction_info = job.transaction_info
+ assert job_transaction_info.transaction_id == resource_transaction_info.get(
+ "transactionId"
+ )
+ else:
+ assert job.transaction_info is None
+
def _verify_configuration_properties(self, job, configuration):
if "dryRun" in configuration:
self.assertEqual(job.dry_run, configuration["dryRun"])
@@ -117,6 +148,8 @@ def _verify_configuration_properties(self, job, configuration):
def _verifyResourceProperties(self, job, resource):
self._verifyReadonlyResourceProperties(job, resource)
+ self._verify_dml_stats_resource_properties(job, resource)
+ self._verify_transaction_info_resource_properties(job, resource)
configuration = resource.get("configuration", {})
self._verify_configuration_properties(job, configuration)
@@ -129,16 +162,19 @@ def _verifyResourceProperties(self, job, resource):
self._verify_table_definitions(job, query_config)
self.assertEqual(job.query, query_config["query"])
+
if "createDisposition" in query_config:
self.assertEqual(job.create_disposition, query_config["createDisposition"])
else:
self.assertIsNone(job.create_disposition)
+
if "defaultDataset" in query_config:
ds_ref = job.default_dataset
ds_ref = {"projectId": ds_ref.project, "datasetId": ds_ref.dataset_id}
self.assertEqual(ds_ref, query_config["defaultDataset"])
else:
self.assertIsNone(job.default_dataset)
+
if "destinationTable" in query_config:
table = job.destination
tb_ref = {
@@ -149,14 +185,17 @@ def _verifyResourceProperties(self, job, resource):
self.assertEqual(tb_ref, query_config["destinationTable"])
else:
self.assertIsNone(job.destination)
+
if "priority" in query_config:
self.assertEqual(job.priority, query_config["priority"])
else:
self.assertIsNone(job.priority)
+
if "writeDisposition" in query_config:
self.assertEqual(job.write_disposition, query_config["writeDisposition"])
else:
self.assertIsNone(job.write_disposition)
+
if "destinationEncryptionConfiguration" in query_config:
self.assertIsNotNone(job.destination_encryption_configuration)
self.assertEqual(
@@ -165,6 +204,7 @@ def _verifyResourceProperties(self, job, resource):
)
else:
self.assertIsNone(job.destination_encryption_configuration)
+
if "schemaUpdateOptions" in query_config:
self.assertEqual(
job.schema_update_options, query_config["schemaUpdateOptions"]
@@ -189,6 +229,7 @@ def test_ctor_defaults(self):
self.assertIsNone(job.create_disposition)
self.assertIsNone(job.default_dataset)
self.assertIsNone(job.destination)
+ self.assertIsNone(job.dml_stats)
self.assertIsNone(job.flatten_results)
self.assertIsNone(job.priority)
self.assertIsNone(job.use_query_cache)
@@ -277,6 +318,42 @@ def test_from_api_repr_with_encryption(self):
self.assertIs(job._client, client)
self._verifyResourceProperties(job, RESOURCE)
+ def test_from_api_repr_with_dml_stats(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ "id": self.JOB_ID,
+ "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
+ "configuration": {"query": {"query": self.QUERY}},
+ "statistics": {
+ "query": {
+ "dmlStats": {"insertedRowCount": "15", "updatedRowCount": "2"},
+ },
+ },
+ }
+ klass = self._get_target_class()
+
+ job = klass.from_api_repr(RESOURCE, client=client)
+
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
+ def test_from_api_repr_with_transaction_info(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = {
+ "id": self.JOB_ID,
+ "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
+ "configuration": {"query": {"query": self.QUERY}},
+ "statistics": {"transactionInfo": {"transactionId": "1a2b-3c4d"}},
+ }
+ klass = self._get_target_class()
+
+ job = klass.from_api_repr(RESOURCE, client=client)
+
+ self.assertIs(job._client, client)
+ self._verifyResourceProperties(job, RESOURCE)
+
def test_from_api_repr_w_properties(self):
from google.cloud.bigquery.job import CreateDisposition
from google.cloud.bigquery.job import SchemaUpdateOption
@@ -308,16 +385,7 @@ def test_cancelled(self):
self.assertTrue(job.cancelled())
- def test_done(self):
- client = _make_client(project=self.PROJECT)
- resource = self._make_resource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
- job._query_results = google.cloud.bigquery.query._QueryResults.from_api_repr(
- {"jobComplete": True, "jobReference": resource["jobReference"]}
- )
- self.assertTrue(job.done())
-
- def test_done_w_timeout(self):
+ def test__done_or_raise_w_timeout(self):
client = _make_client(project=self.PROJECT)
resource = self._make_resource(ended=False)
job = self._get_target_class().from_api_repr(resource, client)
@@ -325,7 +393,7 @@ def test_done_w_timeout(self):
with mock.patch.object(
client, "_get_query_results"
) as fake_get_results, mock.patch.object(job, "reload") as fake_reload:
- job.done(timeout=42)
+ job._done_or_raise(timeout=42)
fake_get_results.assert_called_once()
call_args = fake_get_results.call_args
@@ -334,7 +402,7 @@ def test_done_w_timeout(self):
call_args = fake_reload.call_args
self.assertEqual(call_args.kwargs.get("timeout"), 42)
- def test_done_w_timeout_and_longer_internal_api_timeout(self):
+ def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self):
client = _make_client(project=self.PROJECT)
resource = self._make_resource(ended=False)
job = self._get_target_class().from_api_repr(resource, client)
@@ -343,7 +411,7 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self):
with mock.patch.object(
client, "_get_query_results"
) as fake_get_results, mock.patch.object(job, "reload") as fake_reload:
- job.done(timeout=5.5)
+ job._done_or_raise(timeout=5.5)
# The expected timeout used is simply the given timeout, as the latter
# is shorter than the job's internal done timeout.
@@ -356,6 +424,61 @@ def test_done_w_timeout_and_longer_internal_api_timeout(self):
call_args = fake_reload.call_args
self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout)
+ def test__done_or_raise_w_query_results_error_reload_ok(self):
+ client = _make_client(project=self.PROJECT)
+ bad_request_error = exceptions.BadRequest("Error in query")
+ client._get_query_results = mock.Mock(side_effect=bad_request_error)
+
+ resource = self._make_resource(ended=False)
+ job = self._get_target_class().from_api_repr(resource, client)
+ job._exception = None
+
+ def fake_reload(self, *args, **kwargs):
+ self._properties["status"]["state"] = "DONE"
+ self.set_exception(copy.copy(bad_request_error))
+
+ fake_reload_method = types.MethodType(fake_reload, job)
+
+ with mock.patch.object(job, "reload", new=fake_reload_method):
+ job._done_or_raise()
+
+ assert isinstance(job._exception, exceptions.BadRequest)
+
+ def test__done_or_raise_w_query_results_error_reload_error(self):
+ client = _make_client(project=self.PROJECT)
+ bad_request_error = exceptions.BadRequest("Error in query")
+ client._get_query_results = mock.Mock(side_effect=bad_request_error)
+
+ resource = self._make_resource(ended=False)
+ job = self._get_target_class().from_api_repr(resource, client)
+ reload_error = exceptions.DataLoss("Oops, sorry!")
+ job.reload = mock.Mock(side_effect=reload_error)
+ job._exception = None
+
+ job._done_or_raise()
+
+ assert job._exception is bad_request_error
+
+ def test__done_or_raise_w_job_query_results_ok_reload_error(self):
+ client = _make_client(project=self.PROJECT)
+ query_results = google.cloud.bigquery.query._QueryResults(
+ properties={
+ "jobComplete": True,
+ "jobReference": {"projectId": self.PROJECT, "jobId": "12345"},
+ }
+ )
+ client._get_query_results = mock.Mock(return_value=query_results)
+
+ resource = self._make_resource(ended=False)
+ job = self._get_target_class().from_api_repr(resource, client)
+ retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError)
+ job.reload = mock.Mock(side_effect=retry_error)
+ job._exception = None
+
+ job._done_or_raise()
+
+ assert job._exception is retry_error
+
def test_query_plan(self):
from google.cloud._helpers import _RFC3339_MICROS
from google.cloud.bigquery.job import QueryPlanEntry
@@ -768,6 +891,23 @@ def test_estimated_bytes_processed(self):
query_stats["estimatedBytesProcessed"] = str(est_bytes)
self.assertEqual(job.estimated_bytes_processed, est_bytes)
+ def test_dml_stats(self):
+ from google.cloud.bigquery.job.query import DmlStats
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ assert job.dml_stats is None
+
+ statistics = job._properties["statistics"] = {}
+ assert job.dml_stats is None
+
+ query_stats = statistics["query"] = {}
+ assert job.dml_stats is None
+
+ query_stats["dmlStats"] = {"insertedRowCount": "35"}
+ assert isinstance(job.dml_stats, DmlStats)
+ assert job.dml_stats.inserted_row_count == 35
+
def test_result(self):
from google.cloud.bigquery.table import RowIterator
@@ -973,7 +1113,7 @@ def test_result_w_retry(self):
initial=0.001,
maximum=0.001,
multiplier=1.0,
- deadline=0.001,
+ deadline=0.1,
predicate=custom_predicate,
)
@@ -1826,8 +1966,6 @@ def test_reload_w_timeout(self):
)
def test_iter(self):
- import types
-
begun_resource = self._make_resource()
query_resource = {
"jobComplete": True,
diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py
index db03d6a3b..109cf7e44 100644
--- a/tests/unit/job/test_query_config.py
+++ b/tests/unit/job/test_query_config.py
@@ -253,3 +253,59 @@ def test_from_api_repr_with_encryption(self):
self.assertEqual(
config.destination_encryption_configuration.kms_key_name, self.KMS_KEY_NAME
)
+
+ def test_to_api_repr_with_script_options_none(self):
+ config = self._make_one()
+ config.script_options = None
+
+ resource = config.to_api_repr()
+
+ self.assertEqual(resource, {"query": {"scriptOptions": None}})
+ self.assertIsNone(config.script_options)
+
+ def test_to_api_repr_with_script_options(self):
+ from google.cloud.bigquery import KeyResultStatementKind
+ from google.cloud.bigquery import ScriptOptions
+
+ config = self._make_one()
+ config.script_options = ScriptOptions(
+ statement_timeout_ms=60,
+ statement_byte_budget=999,
+ key_result_statement=KeyResultStatementKind.FIRST_SELECT,
+ )
+
+ resource = config.to_api_repr()
+
+ expected_script_options_repr = {
+ "statementTimeoutMs": "60",
+ "statementByteBudget": "999",
+ "keyResultStatement": KeyResultStatementKind.FIRST_SELECT,
+ }
+ self.assertEqual(
+ resource, {"query": {"scriptOptions": expected_script_options_repr}}
+ )
+
+ def test_from_api_repr_with_script_options(self):
+ from google.cloud.bigquery import KeyResultStatementKind
+ from google.cloud.bigquery import ScriptOptions
+
+ resource = {
+ "query": {
+ "scriptOptions": {
+ "statementTimeoutMs": "42",
+ "statementByteBudget": "123",
+ "keyResultStatement": KeyResultStatementKind.LAST,
+ },
+ },
+ }
+ klass = self._get_target_class()
+
+ config = klass.from_api_repr(resource)
+
+ script_options = config.script_options
+ self.assertIsInstance(script_options, ScriptOptions)
+ self.assertEqual(script_options.statement_timeout_ms, 42)
+ self.assertEqual(script_options.statement_byte_budget, 123)
+ self.assertEqual(
+ script_options.key_result_statement, KeyResultStatementKind.LAST
+ )
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index d1600ad43..b5af90c0b 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -23,6 +23,14 @@
import pandas
except (ImportError, AttributeError): # pragma: NO COVER
pandas = None
+try:
+ import shapely
+except (ImportError, AttributeError): # pragma: NO COVER
+ shapely = None
+try:
+ import geopandas
+except (ImportError, AttributeError): # pragma: NO COVER
+ geopandas = None
try:
import pyarrow
except (ImportError, AttributeError): # pragma: NO COVER
@@ -41,6 +49,22 @@
from .helpers import _make_job_resource
+@pytest.fixture
+def table_read_options_kwarg():
+ # Create a BigQuery Storage table read options object with pyarrow compression
+ # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is
+ # installed to support the compression.
+ if not hasattr(bigquery_storage, "ArrowSerializationOptions"):
+ return {}
+
+ read_options = bigquery_storage.ReadSession.TableReadOptions(
+ arrow_serialization_options=bigquery_storage.ArrowSerializationOptions(
+ buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
+ )
+ )
+ return {"read_options": read_options}
+
+
@pytest.mark.parametrize(
"query,expected",
(
@@ -82,7 +106,7 @@ def test__contains_order_by(query, expected):
"SelecT name, age froM table OrdeR \n\t BY other_column;",
),
)
-def test_to_dataframe_bqstorage_preserve_order(query):
+def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
job_resource = _make_job_resource(
@@ -123,8 +147,10 @@ def test_to_dataframe_bqstorage_preserve_order(query):
destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format(
**job_resource["configuration"]["query"]["destinationTable"]
)
- expected_session = bigquery_storage.types.ReadSession(
- table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW,
+ expected_session = bigquery_storage.ReadSession(
+ table=destination_table,
+ data_format=bigquery_storage.DataFormat.ARROW,
+ **table_read_options_kwarg,
)
bqstorage_client.create_read_session.assert_called_once_with(
parent="projects/test-project",
@@ -220,6 +246,41 @@ def test_to_arrow():
]
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
+def test_to_arrow_max_results_no_progress_bar():
+ from google.cloud.bigquery import table
+ from google.cloud.bigquery.job import QueryJob as target_class
+ from google.cloud.bigquery.schema import SchemaField
+
+ connection = _make_connection({})
+ client = _make_client(connection=connection)
+ begun_resource = _make_job_resource(job_type="query")
+ job = target_class.from_api_repr(begun_resource, client)
+
+ schema = [
+ SchemaField("name", "STRING", mode="REQUIRED"),
+ SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ rows = [
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
+ ]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ row_iterator = table.RowIterator(client, api_request, path, schema)
+
+ result_patch = mock.patch(
+ "google.cloud.bigquery.job.QueryJob.result", return_value=row_iterator,
+ )
+ with result_patch as result_patch_tqdm:
+ tbl = job.to_arrow(create_bqstorage_client=False, max_results=123)
+
+ result_patch_tqdm.assert_called_once_with(max_results=123)
+
+ assert isinstance(tbl, pyarrow.Table)
+ assert tbl.num_rows == 2
+
+
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
def test_to_arrow_w_tqdm_w_query_plan():
@@ -272,7 +333,9 @@ def test_to_arrow_w_tqdm_w_query_plan():
assert result_patch_tqdm.call_count == 3
assert isinstance(tbl, pyarrow.Table)
assert tbl.num_rows == 2
- result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)
+ result_patch_tqdm.assert_called_with(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
+ )
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@@ -323,7 +386,9 @@ def test_to_arrow_w_tqdm_w_pending_status():
assert result_patch_tqdm.call_count == 2
assert isinstance(tbl, pyarrow.Table)
assert tbl.num_rows == 2
- result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)
+ result_patch_tqdm.assert_called_with(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
+ )
@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@@ -368,38 +433,41 @@ def test_to_arrow_w_tqdm_wo_query_plan():
result_patch_tqdm.assert_called()
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_to_dataframe():
+def _make_job(schema=(), rows=()):
from google.cloud.bigquery.job import QueryJob as target_class
begun_resource = _make_job_resource(job_type="query")
query_resource = {
"jobComplete": True,
"jobReference": begun_resource["jobReference"],
- "totalRows": "4",
+ "totalRows": str(len(rows)),
"schema": {
"fields": [
- {"name": "name", "type": "STRING", "mode": "NULLABLE"},
- {"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
+ dict(name=field[0], type=field[1], mode=field[2]) for field in schema
]
},
}
- tabledata_resource = {
- "rows": [
- {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
- {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
- {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
- {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
- ]
- }
+ tabledata_resource = {"rows": [{"f": [{"v": v} for v in row]} for row in rows]}
done_resource = copy.deepcopy(begun_resource)
done_resource["status"] = {"state": "DONE"}
connection = _make_connection(
begun_resource, query_resource, done_resource, tabledata_resource
)
client = _make_client(connection=connection)
- job = target_class.from_api_repr(begun_resource, client)
+ return target_class.from_api_repr(begun_resource, client)
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_to_dataframe():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("age", "INTEGER", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "32"),
+ ("Bharney Rhubble", "33"),
+ ("Wylma Phlyntstone", "29"),
+ ("Bhettye Rhubble", "27"),
+ ),
+ )
df = job.to_dataframe(create_bqstorage_client=False)
assert isinstance(df, pandas.DataFrame)
@@ -431,7 +499,7 @@ def test_to_dataframe_ddl_query():
@pytest.mark.skipif(
bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
)
-def test_to_dataframe_bqstorage():
+def test_to_dataframe_bqstorage(table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
resource = _make_job_resource(job_type="query", ended=True)
@@ -468,8 +536,10 @@ def test_to_dataframe_bqstorage():
destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format(
**resource["configuration"]["query"]["destinationTable"]
)
- expected_session = bigquery_storage.types.ReadSession(
- table=destination_table, data_format=bigquery_storage.types.DataFormat.ARROW,
+ expected_session = bigquery_storage.ReadSession(
+ table=destination_table,
+ data_format=bigquery_storage.DataFormat.ARROW,
+ **table_read_options_kwarg,
)
bqstorage_client.create_read_session.assert_called_once_with(
parent=f"projects/{client.project}",
@@ -478,6 +548,52 @@ def test_to_dataframe_bqstorage():
)
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_to_dataframe_bqstorage_no_pyarrow_compression():
+ from google.cloud.bigquery.job import QueryJob as target_class
+
+ resource = _make_job_resource(job_type="query", ended=True)
+ query_resource = {
+ "jobComplete": True,
+ "jobReference": resource["jobReference"],
+ "totalRows": "4",
+ "schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]},
+ }
+ connection = _make_connection(query_resource)
+ client = _make_client(connection=connection)
+ job = target_class.from_api_repr(resource, client)
+ bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
+ session = bigquery_storage.types.ReadSession()
+ session.avro_schema.schema = json.dumps(
+ {
+ "type": "record",
+ "name": "__root__",
+ "fields": [{"name": "name", "type": ["null", "string"]}],
+ }
+ )
+ bqstorage_client.create_read_session.return_value = session
+
+ with mock.patch(
+ "google.cloud.bigquery._pandas_helpers._ARROW_COMPRESSION_SUPPORT", new=False
+ ):
+ job.to_dataframe(bqstorage_client=bqstorage_client)
+
+ destination_table = "projects/{projectId}/datasets/{datasetId}/tables/{tableId}".format(
+ **resource["configuration"]["query"]["destinationTable"]
+ )
+ expected_session = bigquery_storage.ReadSession(
+ table=destination_table, data_format=bigquery_storage.DataFormat.ARROW,
+ )
+ bqstorage_client.create_read_session.assert_called_once_with(
+ parent=f"projects/{client.project}",
+ read_session=expected_session,
+ max_stream_count=0,
+ )
+
+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_to_dataframe_column_dtypes():
from google.cloud.bigquery.job import QueryJob as target_class
@@ -650,7 +766,9 @@ def test_to_dataframe_w_tqdm_pending():
assert isinstance(df, pandas.DataFrame)
assert len(df) == 4 # verify the number of rows
assert list(df) == ["name", "age"] # verify the column names
- result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)
+ result_patch_tqdm.assert_called_with(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
+ )
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@@ -708,4 +826,147 @@ def test_to_dataframe_w_tqdm():
assert isinstance(df, pandas.DataFrame)
assert len(df) == 4 # verify the number of rows
assert list(df), ["name", "age"] # verify the column names
- result_patch_tqdm.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL)
+ result_patch_tqdm.assert_called_with(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
+ )
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
+def test_to_dataframe_w_tqdm_max_results():
+ from google.cloud.bigquery import table
+ from google.cloud.bigquery.job import QueryJob as target_class
+ from google.cloud.bigquery.schema import SchemaField
+ from google.cloud.bigquery._tqdm_helpers import _PROGRESS_BAR_UPDATE_INTERVAL
+
+ begun_resource = _make_job_resource(job_type="query")
+ schema = [
+ SchemaField("name", "STRING", mode="NULLABLE"),
+ SchemaField("age", "INTEGER", mode="NULLABLE"),
+ ]
+ rows = [{"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}]
+
+ connection = _make_connection({})
+ client = _make_client(connection=connection)
+ job = target_class.from_api_repr(begun_resource, client)
+
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ row_iterator = table.RowIterator(client, api_request, path, schema)
+
+ job._properties["statistics"] = {
+ "query": {
+ "queryPlan": [
+ {"name": "S00: Input", "id": "0", "status": "COMPLETE"},
+ {"name": "S01: Output", "id": "1", "status": "COMPLETE"},
+ ]
+ },
+ }
+ reload_patch = mock.patch(
+ "google.cloud.bigquery.job._AsyncJob.reload", autospec=True
+ )
+ result_patch = mock.patch(
+ "google.cloud.bigquery.job.QueryJob.result",
+ side_effect=[concurrent.futures.TimeoutError, row_iterator],
+ )
+
+ with result_patch as result_patch_tqdm, reload_patch:
+ job.to_dataframe(
+ progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3
+ )
+
+ assert result_patch_tqdm.call_count == 2
+ result_patch_tqdm.assert_called_with(
+ timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3
+ )
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(shapely is None, reason="Requires `shapely`")
+def test_to_dataframe_geography_as_object():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "Point(0 0)"),
+ ("Bharney Rhubble", "Point(0 1)"),
+ ("Wylma Phlyntstone", None),
+ ),
+ )
+ df = job.to_dataframe(create_bqstorage_client=False, geography_as_object=True)
+
+ assert isinstance(df, pandas.DataFrame)
+ assert len(df) == 3 # verify the number of rows
+ assert list(df) == ["name", "geog"] # verify the column names
+ assert [v.__class__.__name__ for v in df.geog] == [
+ "Point",
+ "Point",
+ "float",
+ ] # float because nan
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_to_geodataframe():
+ job = _make_job(
+ (("name", "STRING", "NULLABLE"), ("geog", "GEOGRAPHY", "NULLABLE")),
+ (
+ ("Phred Phlyntstone", "Point(0 0)"),
+ ("Bharney Rhubble", "Point(0 1)"),
+ ("Wylma Phlyntstone", None),
+ ),
+ )
+ df = job.to_geodataframe(create_bqstorage_client=False)
+
+ assert isinstance(df, geopandas.GeoDataFrame)
+ assert len(df) == 3 # verify the number of rows
+ assert list(df) == ["name", "geog"] # verify the column names
+ assert [v.__class__.__name__ for v in df.geog] == [
+ "Point",
+ "Point",
+ "NoneType",
+ ] # float because nan
+ assert isinstance(df.geog, geopandas.GeoSeries)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@mock.patch("google.cloud.bigquery.job.query.wait_for_query")
+def test_query_job_to_geodataframe_delegation(wait_for_query):
+ """
+ QueryJob.to_geodataframe just delegates to RowIterator.to_geodataframe.
+
+ This test just demonstrates that. We don't need to test all the
+ variations, which are tested for RowIterator.
+ """
+ import numpy
+
+ job = _make_job()
+ bqstorage_client = object()
+ dtypes = dict(xxx=numpy.dtype("int64"))
+ progress_bar_type = "normal"
+ create_bqstorage_client = False
+ date_as_object = False
+ max_results = 42
+ geography_column = "g"
+
+ df = job.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ max_results=max_results,
+ geography_column=geography_column,
+ )
+
+ wait_for_query.assert_called_once_with(
+ job, progress_bar_type, max_results=max_results
+ )
+ row_iterator = wait_for_query.return_value
+ row_iterator.to_geodataframe.assert_called_once_with(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
+ )
+ assert df is row_iterator.to_geodataframe.return_value
diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py
index 09a0efc45..e70eb097c 100644
--- a/tests/unit/job/test_query_stats.py
+++ b/tests/unit/job/test_query_stats.py
@@ -15,6 +15,43 @@
from .helpers import _Base
+class TestDmlStats:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import DmlStats
+
+ return DmlStats
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_defaults(self):
+ dml_stats = self._make_one()
+ assert dml_stats.inserted_row_count == 0
+ assert dml_stats.deleted_row_count == 0
+ assert dml_stats.updated_row_count == 0
+
+ def test_from_api_repr_partial_stats(self):
+ klass = self._get_target_class()
+ result = klass.from_api_repr({"deletedRowCount": "12"})
+
+ assert isinstance(result, klass)
+ assert result.inserted_row_count == 0
+ assert result.deleted_row_count == 12
+ assert result.updated_row_count == 0
+
+ def test_from_api_repr_full_stats(self):
+ klass = self._get_target_class()
+ result = klass.from_api_repr(
+ {"updatedRowCount": "4", "insertedRowCount": "7", "deletedRowCount": "25"}
+ )
+
+ assert isinstance(result, klass)
+ assert result.inserted_row_count == 7
+ assert result.deleted_row_count == 25
+ assert result.updated_row_count == 4
+
+
class TestQueryPlanEntryStep(_Base):
KIND = "KIND"
SUBSTEPS = ("SUB1", "SUB2")
diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py
index b02ace1db..fdaf13324 100644
--- a/tests/unit/routine/test_routine.py
+++ b/tests/unit/routine/test_routine.py
@@ -18,6 +18,7 @@
import pytest
import google.cloud._helpers
+from google.cloud import bigquery
from google.cloud import bigquery_v2
@@ -73,6 +74,7 @@ def test_ctor_w_properties(target_class):
)
type_ = "SCALAR_FUNCTION"
description = "A routine description."
+ determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC
actual_routine = target_class(
routine_id,
@@ -82,6 +84,7 @@ def test_ctor_w_properties(target_class):
return_type=return_type,
type_=type_,
description=description,
+ determinism_level=determinism_level,
)
ref = RoutineReference.from_string(routine_id)
@@ -92,6 +95,9 @@ def test_ctor_w_properties(target_class):
assert actual_routine.return_type == return_type
assert actual_routine.type_ == type_
assert actual_routine.description == description
+ assert (
+ actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC
+ )
def test_from_api_repr(target_class):
@@ -120,6 +126,7 @@ def test_from_api_repr(target_class):
"routineType": "SCALAR_FUNCTION",
"someNewField": "someValue",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC,
}
actual_routine = target_class.from_api_repr(resource)
@@ -149,9 +156,84 @@ def test_from_api_repr(target_class):
assert actual_routine.return_type == bigquery_v2.types.StandardSqlDataType(
type_kind=bigquery_v2.types.StandardSqlDataType.TypeKind.INT64
)
+ assert actual_routine.return_table_type is None
assert actual_routine.type_ == "SCALAR_FUNCTION"
assert actual_routine._properties["someNewField"] == "someValue"
assert actual_routine.description == "A routine description."
+ assert actual_routine.determinism_level == "DETERMINISTIC"
+
+
+def test_from_api_repr_tvf_function(target_class):
+ from google.cloud.bigquery.routine import RoutineArgument
+ from google.cloud.bigquery.routine import RoutineReference
+ from google.cloud.bigquery.routine import RoutineType
+
+ StandardSqlDataType = bigquery_v2.types.StandardSqlDataType
+ StandardSqlField = bigquery_v2.types.StandardSqlField
+ StandardSqlTableType = bigquery_v2.types.StandardSqlTableType
+
+ creation_time = datetime.datetime(
+ 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC
+ )
+ modified_time = datetime.datetime(
+ 2011, 10, 1, 16, 0, 0, tzinfo=google.cloud._helpers.UTC
+ )
+ resource = {
+ "routineReference": {
+ "projectId": "my-project",
+ "datasetId": "my_dataset",
+ "routineId": "my_routine",
+ },
+ "etag": "abcdefg",
+ "creationTime": str(google.cloud._helpers._millis(creation_time)),
+ "lastModifiedTime": str(google.cloud._helpers._millis(modified_time)),
+ "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a",
+ "arguments": [{"name": "a", "dataType": {"typeKind": "INT64"}}],
+ "language": "SQL",
+ "returnTableType": {
+ "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}]
+ },
+ "routineType": "TABLE_VALUED_FUNCTION",
+ "someNewField": "someValue",
+ "description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC,
+ }
+ actual_routine = target_class.from_api_repr(resource)
+
+ assert actual_routine.project == "my-project"
+ assert actual_routine.dataset_id == "my_dataset"
+ assert actual_routine.routine_id == "my_routine"
+ assert (
+ actual_routine.path
+ == "/projects/my-project/datasets/my_dataset/routines/my_routine"
+ )
+ assert actual_routine.reference == RoutineReference.from_string(
+ "my-project.my_dataset.my_routine"
+ )
+ assert actual_routine.etag == "abcdefg"
+ assert actual_routine.created == creation_time
+ assert actual_routine.modified == modified_time
+ assert actual_routine.arguments == [
+ RoutineArgument(
+ name="a",
+ data_type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64),
+ )
+ ]
+ assert actual_routine.body == "SELECT x FROM UNNEST([1,2,3]) x WHERE x > a"
+ assert actual_routine.language == "SQL"
+ assert actual_routine.return_type is None
+ assert actual_routine.return_table_type == StandardSqlTableType(
+ columns=[
+ StandardSqlField(
+ name="int_col",
+ type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64),
+ )
+ ]
+ )
+ assert actual_routine.type_ == RoutineType.TABLE_VALUED_FUNCTION
+ assert actual_routine._properties["someNewField"] == "someValue"
+ assert actual_routine.description == "A routine description."
+ assert actual_routine.determinism_level == "DETERMINISTIC"
def test_from_api_repr_w_minimal_resource(target_class):
@@ -177,6 +259,7 @@ def test_from_api_repr_w_minimal_resource(target_class):
assert actual_routine.return_type is None
assert actual_routine.type_ is None
assert actual_routine.description is None
+ assert actual_routine.determinism_level is None
def test_from_api_repr_w_unknown_fields(target_class):
@@ -208,6 +291,7 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["arguments"],
{"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}]},
@@ -220,6 +304,7 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["body"],
{"definitionBody": "x * 3"},
@@ -232,6 +317,7 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["language"],
{"language": "SQL"},
@@ -244,10 +330,29 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["return_type"],
{"returnType": {"typeKind": "INT64"}},
),
+ (
+ {
+ "definitionBody": "SELECT x FROM UNNEST([1,2,3]) x WHERE x > 1",
+ "language": "SQL",
+ "returnTableType": {
+ "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}]
+ },
+ "routineType": "TABLE_VALUED_FUNCTION",
+ "description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
+ },
+ ["return_table_type"],
+ {
+ "returnTableType": {
+ "columns": [{"name": "int_col", "type": {"typeKind": "INT64"}}]
+ }
+ },
+ ),
(
{
"arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}],
@@ -256,6 +361,7 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["type_"],
{"routineType": "SCALAR_FUNCTION"},
@@ -268,13 +374,37 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": {"typeKind": "INT64"},
"routineType": "SCALAR_FUNCTION",
"description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
},
["description"],
{"description": "A routine description."},
),
+ (
+ {
+ "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}],
+ "definitionBody": "x * 3",
+ "language": "SQL",
+ "returnType": {"typeKind": "INT64"},
+ "routineType": "SCALAR_FUNCTION",
+ "description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
+ },
+ ["determinism_level"],
+ {
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED
+ },
+ ),
(
{},
- ["arguments", "language", "body", "type_", "return_type", "description"],
+ [
+ "arguments",
+ "language",
+ "body",
+ "type_",
+ "return_type",
+ "description",
+ "determinism_level",
+ ],
{
"arguments": None,
"definitionBody": None,
@@ -282,6 +412,7 @@ def test_from_api_repr_w_unknown_fields(target_class):
"returnType": None,
"routineType": None,
"description": None,
+ "determinismLevel": None,
},
),
(
@@ -322,6 +453,41 @@ def test_set_return_type_w_none(object_under_test):
assert object_under_test._properties["returnType"] is None
+def test_set_return_table_type_w_none(object_under_test):
+ object_under_test.return_table_type = None
+ assert object_under_test.return_table_type is None
+ assert object_under_test._properties["returnTableType"] is None
+
+
+def test_set_return_table_type_w_not_none(object_under_test):
+ StandardSqlDataType = bigquery_v2.types.StandardSqlDataType
+ StandardSqlField = bigquery_v2.types.StandardSqlField
+ StandardSqlTableType = bigquery_v2.types.StandardSqlTableType
+
+ table_type = StandardSqlTableType(
+ columns=[
+ StandardSqlField(
+ name="int_col",
+ type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.INT64),
+ ),
+ StandardSqlField(
+ name="str_col",
+ type=StandardSqlDataType(type_kind=StandardSqlDataType.TypeKind.STRING),
+ ),
+ ]
+ )
+
+ object_under_test.return_table_type = table_type
+
+ assert object_under_test.return_table_type == table_type
+ assert object_under_test._properties["returnTableType"] == {
+ "columns": [
+ {"name": "int_col", "type": {"typeKind": "INT64"}},
+ {"name": "str_col", "type": {"typeKind": "STRING"}},
+ ]
+ }
+
+
def test_set_description_w_none(object_under_test):
object_under_test.description = None
assert object_under_test.description is None
diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py
index 0fdf1142f..f8d00e67d 100644
--- a/tests/unit/test__helpers.py
+++ b/tests/unit/test__helpers.py
@@ -19,6 +19,75 @@
import mock
+try:
+ from google.cloud import bigquery_storage
+except ImportError: # pragma: NO COVER
+ bigquery_storage = None
+
+
+@unittest.skipIf(bigquery_storage is None, "Requires `google-cloud-bigquery-storage`")
+class TestBQStorageVersions(unittest.TestCase):
+ def _object_under_test(self):
+ from google.cloud.bigquery import _helpers
+
+ return _helpers.BQStorageVersions()
+
+ def _call_fut(self):
+ from google.cloud.bigquery import _helpers
+
+ _helpers.BQ_STORAGE_VERSIONS._installed_version = None
+ return _helpers.BQ_STORAGE_VERSIONS.verify_version()
+
+ def test_raises_no_error_w_recent_bqstorage(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"):
+ try:
+ self._call_fut()
+ except LegacyBigQueryStorageError: # pragma: NO COVER
+ self.fail("Legacy error raised with a non-legacy dependency version.")
+
+ def test_raises_error_w_legacy_bqstorage(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"):
+ with self.assertRaises(LegacyBigQueryStorageError):
+ self._call_fut()
+
+ def test_raises_error_w_unknown_bqstorage_version(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
+ with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module:
+ del fake_module.__version__
+ error_pattern = r"version found: 0.0.0"
+ with self.assertRaisesRegex(LegacyBigQueryStorageError, error_pattern):
+ self._call_fut()
+
+ def test_installed_version_returns_cached(self):
+ versions = self._object_under_test()
+ versions._installed_version = object()
+ assert versions.installed_version is versions._installed_version
+
+ def test_installed_version_returns_parsed_version(self):
+ versions = self._object_under_test()
+
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"):
+ version = versions.installed_version
+
+ assert version.major == 1
+ assert version.minor == 2
+ assert version.micro == 3
+
+ def test_is_read_session_optional_true(self):
+ versions = self._object_under_test()
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"):
+ assert versions.is_read_session_optional
+
+ def test_is_read_session_optional_false(self):
+ versions = self._object_under_test()
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"):
+ assert not versions.is_read_session_optional
+
class Test_not_null(unittest.TestCase):
def _call_fut(self, value, field):
@@ -618,9 +687,48 @@ def _call_fut(self, value):
return _float_to_json(value)
+ def test_w_none(self):
+ self.assertEqual(self._call_fut(None), None)
+
+ def test_w_non_numeric(self):
+ with self.assertRaises(TypeError):
+ self._call_fut(object())
+
+ def test_w_integer(self):
+ result = self._call_fut(123)
+ self.assertIsInstance(result, float)
+ self.assertEqual(result, 123.0)
+
def test_w_float(self):
self.assertEqual(self._call_fut(1.23), 1.23)
+ def test_w_float_as_string(self):
+ self.assertEqual(self._call_fut("1.23"), 1.23)
+
+ def test_w_nan(self):
+ result = self._call_fut(float("nan"))
+ self.assertEqual(result.lower(), "nan")
+
+ def test_w_nan_as_string(self):
+ result = self._call_fut("NaN")
+ self.assertEqual(result.lower(), "nan")
+
+ def test_w_infinity(self):
+ result = self._call_fut(float("inf"))
+ self.assertEqual(result.lower(), "inf")
+
+ def test_w_infinity_as_string(self):
+ result = self._call_fut("inf")
+ self.assertEqual(result.lower(), "inf")
+
+ def test_w_negative_infinity(self):
+ result = self._call_fut(float("-inf"))
+ self.assertEqual(result.lower(), "-inf")
+
+ def test_w_negative_infinity_as_string(self):
+ result = self._call_fut("-inf")
+ self.assertEqual(result.lower(), "-inf")
+
class Test_decimal_to_json(unittest.TestCase):
def _call_fut(self, value):
@@ -1159,3 +1267,18 @@ def fake_isinstance(instance, target_class):
"google.cloud.bigquery.schema.isinstance", side_effect=fake_isinstance
)
return patcher
+
+
+def test_decimal_as_float_api_repr():
+ """Make sure decimals get converted to float."""
+ import google.cloud.bigquery.query
+ from decimal import Decimal
+
+ param = google.cloud.bigquery.query.ScalarQueryParameter(
+ "x", "FLOAT64", Decimal(42)
+ )
+ assert param.to_api_repr() == {
+ "parameterType": {"type": "FLOAT64"},
+ "parameterValue": {"value": 42.0},
+ "name": "x",
+ }
diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py
index 78e59cb30..09f6d29d7 100644
--- a/tests/unit/test__http.py
+++ b/tests/unit/test__http.py
@@ -32,6 +32,9 @@ def _get_target_class():
return Connection
def _make_one(self, *args, **kw):
+ if "api_endpoint" not in kw:
+ kw["api_endpoint"] = "https://bigquery.googleapis.com"
+
return self._get_target_class()(*args, **kw)
def test_build_api_url_no_extra_query_params(self):
@@ -138,3 +141,14 @@ def test_extra_headers_replace(self):
url=expected_uri,
timeout=self._get_default_timeout(),
)
+
+ def test_ctor_mtls(self):
+ conn = self._make_one(object(), api_endpoint=None)
+ self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, True)
+ self.assertEqual(conn.API_BASE_URL, "https://bigquery.googleapis.com")
+ self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com")
+
+ conn = self._make_one(object(), api_endpoint="http://foo")
+ self.assertEqual(conn.ALLOW_AUTO_SWITCH_TO_MTLS_URL, False)
+ self.assertEqual(conn.API_BASE_URL, "http://foo")
+ self.assertEqual(conn.API_BASE_MTLS_URL, "https://bigquery.mtls.googleapis.com")
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index ef0c40e1a..a9b0ae21f 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -17,7 +17,9 @@
import decimal
import functools
import operator
+import queue
import warnings
+import pkg_resources
import mock
@@ -34,12 +36,32 @@
# Mock out pyarrow when missing, because methods from pyarrow.types are
# used in test parameterization.
pyarrow = mock.Mock()
+try:
+ import geopandas
+except ImportError: # pragma: NO COVER
+ geopandas = None
+
import pytest
-import pytz
from google import api_core
+from google.cloud.bigquery import _helpers
from google.cloud.bigquery import schema
+try:
+ from google.cloud import bigquery_storage
+
+ _helpers.BQ_STORAGE_VERSIONS.verify_version()
+except ImportError: # pragma: NO COVER
+ bigquery_storage = None
+
+PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
+
+if pandas is not None:
+ PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+else:
+ # Set to less than MIN version.
+ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")
+
@pytest.fixture
def module_under_test():
@@ -70,6 +92,15 @@ def is_numeric(type_):
)(type_)
+def is_bignumeric(type_):
+ # See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#numeric-type
+ return all_(
+ pyarrow.types.is_decimal,
+ lambda type_: type_.precision == 76,
+ lambda type_: type_.scale == 38,
+ )(type_)
+
+
def is_timestamp(type_):
# See: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp-type
return all_(
@@ -120,6 +151,7 @@ def test_all_():
("FLOAT", "NULLABLE", pyarrow.types.is_float64),
("FLOAT64", "NULLABLE", pyarrow.types.is_float64),
("NUMERIC", "NULLABLE", is_numeric),
+ ("BIGNUMERIC", "NULLABLE", is_bignumeric),
("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean),
("BOOL", "NULLABLE", pyarrow.types.is_boolean),
("TIMESTAMP", "NULLABLE", is_timestamp),
@@ -198,6 +230,11 @@ def test_all_():
"REPEATED",
all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)),
),
+ (
+ "BIGNUMERIC",
+ "REPEATED",
+ all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)),
+ ),
(
"BOOLEAN",
"REPEATED",
@@ -270,34 +307,38 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
schema.SchemaField("field05", "FLOAT"),
schema.SchemaField("field06", "FLOAT64"),
schema.SchemaField("field07", "NUMERIC"),
- schema.SchemaField("field08", "BOOLEAN"),
- schema.SchemaField("field09", "BOOL"),
- schema.SchemaField("field10", "TIMESTAMP"),
- schema.SchemaField("field11", "DATE"),
- schema.SchemaField("field12", "TIME"),
- schema.SchemaField("field13", "DATETIME"),
- schema.SchemaField("field14", "GEOGRAPHY"),
+ schema.SchemaField("field08", "BIGNUMERIC"),
+ schema.SchemaField("field09", "BOOLEAN"),
+ schema.SchemaField("field10", "BOOL"),
+ schema.SchemaField("field11", "TIMESTAMP"),
+ schema.SchemaField("field12", "DATE"),
+ schema.SchemaField("field13", "TIME"),
+ schema.SchemaField("field14", "DATETIME"),
+ schema.SchemaField("field15", "GEOGRAPHY"),
)
+
field = schema.SchemaField("ignored_name", bq_type, mode="NULLABLE", fields=fields)
actual = module_under_test.bq_to_arrow_data_type(field)
- expected = pyarrow.struct(
- (
- pyarrow.field("field01", pyarrow.string()),
- pyarrow.field("field02", pyarrow.binary()),
- pyarrow.field("field03", pyarrow.int64()),
- pyarrow.field("field04", pyarrow.int64()),
- pyarrow.field("field05", pyarrow.float64()),
- pyarrow.field("field06", pyarrow.float64()),
- pyarrow.field("field07", module_under_test.pyarrow_numeric()),
- pyarrow.field("field08", pyarrow.bool_()),
- pyarrow.field("field09", pyarrow.bool_()),
- pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
- pyarrow.field("field11", pyarrow.date32()),
- pyarrow.field("field12", module_under_test.pyarrow_time()),
- pyarrow.field("field13", module_under_test.pyarrow_datetime()),
- pyarrow.field("field14", pyarrow.string()),
- )
+
+ expected = (
+ pyarrow.field("field01", pyarrow.string()),
+ pyarrow.field("field02", pyarrow.binary()),
+ pyarrow.field("field03", pyarrow.int64()),
+ pyarrow.field("field04", pyarrow.int64()),
+ pyarrow.field("field05", pyarrow.float64()),
+ pyarrow.field("field06", pyarrow.float64()),
+ pyarrow.field("field07", module_under_test.pyarrow_numeric()),
+ pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+ pyarrow.field("field09", pyarrow.bool_()),
+ pyarrow.field("field10", pyarrow.bool_()),
+ pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+ pyarrow.field("field12", pyarrow.date32()),
+ pyarrow.field("field13", module_under_test.pyarrow_time()),
+ pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+ pyarrow.field("field15", pyarrow.string()),
)
+ expected = pyarrow.struct(expected)
+
assert pyarrow.types.is_struct(actual)
assert actual.num_fields == len(fields)
assert actual.equals(expected)
@@ -314,34 +355,38 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
schema.SchemaField("field05", "FLOAT"),
schema.SchemaField("field06", "FLOAT64"),
schema.SchemaField("field07", "NUMERIC"),
- schema.SchemaField("field08", "BOOLEAN"),
- schema.SchemaField("field09", "BOOL"),
- schema.SchemaField("field10", "TIMESTAMP"),
- schema.SchemaField("field11", "DATE"),
- schema.SchemaField("field12", "TIME"),
- schema.SchemaField("field13", "DATETIME"),
- schema.SchemaField("field14", "GEOGRAPHY"),
+ schema.SchemaField("field08", "BIGNUMERIC"),
+ schema.SchemaField("field09", "BOOLEAN"),
+ schema.SchemaField("field10", "BOOL"),
+ schema.SchemaField("field11", "TIMESTAMP"),
+ schema.SchemaField("field12", "DATE"),
+ schema.SchemaField("field13", "TIME"),
+ schema.SchemaField("field14", "DATETIME"),
+ schema.SchemaField("field15", "GEOGRAPHY"),
)
+
field = schema.SchemaField("ignored_name", bq_type, mode="REPEATED", fields=fields)
actual = module_under_test.bq_to_arrow_data_type(field)
- expected_value_type = pyarrow.struct(
- (
- pyarrow.field("field01", pyarrow.string()),
- pyarrow.field("field02", pyarrow.binary()),
- pyarrow.field("field03", pyarrow.int64()),
- pyarrow.field("field04", pyarrow.int64()),
- pyarrow.field("field05", pyarrow.float64()),
- pyarrow.field("field06", pyarrow.float64()),
- pyarrow.field("field07", module_under_test.pyarrow_numeric()),
- pyarrow.field("field08", pyarrow.bool_()),
- pyarrow.field("field09", pyarrow.bool_()),
- pyarrow.field("field10", module_under_test.pyarrow_timestamp()),
- pyarrow.field("field11", pyarrow.date32()),
- pyarrow.field("field12", module_under_test.pyarrow_time()),
- pyarrow.field("field13", module_under_test.pyarrow_datetime()),
- pyarrow.field("field14", pyarrow.string()),
- )
+
+ expected = (
+ pyarrow.field("field01", pyarrow.string()),
+ pyarrow.field("field02", pyarrow.binary()),
+ pyarrow.field("field03", pyarrow.int64()),
+ pyarrow.field("field04", pyarrow.int64()),
+ pyarrow.field("field05", pyarrow.float64()),
+ pyarrow.field("field06", pyarrow.float64()),
+ pyarrow.field("field07", module_under_test.pyarrow_numeric()),
+ pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+ pyarrow.field("field09", pyarrow.bool_()),
+ pyarrow.field("field10", pyarrow.bool_()),
+ pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+ pyarrow.field("field12", pyarrow.date32()),
+ pyarrow.field("field13", module_under_test.pyarrow_time()),
+ pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+ pyarrow.field("field15", pyarrow.string()),
)
+ expected_value_type = pyarrow.struct(expected)
+
assert pyarrow.types.is_list(actual)
assert pyarrow.types.is_struct(actual.value_type)
assert actual.value_type.num_fields == len(fields)
@@ -385,15 +430,26 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
decimal.Decimal("999.123456789"),
],
),
+ (
+ "BIGNUMERIC",
+ [
+ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ None,
+ decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
+ decimal.Decimal("3.141592653589793238462643383279"),
+ ],
+ ),
("BOOLEAN", [True, None, False, None]),
("BOOL", [False, None, True, None]),
(
"TIMESTAMP",
[
- datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
+ datetime.datetime(1, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
None,
- datetime.datetime(9999, 12, 31, 23, 59, 59, 999999, tzinfo=pytz.utc),
- datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
+ datetime.datetime(
+ 9999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
],
),
(
@@ -533,6 +589,60 @@ def test_bq_to_arrow_array_w_special_floats(module_under_test):
assert roundtrip[3] is None
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_dtype(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = geopandas.GeoSeries([None, wkt.loads("point(0 0)")])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+ # All na:
+ series = geopandas.GeoSeries([None, None])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ assert array.type == pyarrow.string()
+ assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_type_shapely_data(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = pandas.Series([None, wkt.loads("point(0 0)")])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == [None, wkb.dumps(series[1])]
+
+ # All na:
+ series = pandas.Series([None, None])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ assert array.type == pyarrow.string()
+ assert array.to_pylist() == list(series)
+
+
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
+ from shapely import wkb, wkt
+
+ bq_field = schema.SchemaField("field_name", "GEOGRAPHY")
+
+ series = pandas.Series([None, wkb.dumps(wkt.loads("point(0 0)"))])
+ array = module_under_test.bq_to_arrow_array(series, bq_field)
+ # The result is binary, because we use wkb format
+ assert array.type == pyarrow.binary()
+ assert array.to_pylist() == list(series)
+
+
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_schema_w_unknown_type(module_under_test):
fields = (
@@ -680,6 +790,37 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name(
assert columns_and_indexes == expected
+@pytest.mark.skipif(
+ pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
+ reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA",
+)
+def test_dataframe_to_json_generator(module_under_test):
+ utcnow = datetime.datetime.utcnow()
+ df_data = collections.OrderedDict(
+ [
+ ("a_series", [pandas.NA, 2, 3, 4]),
+ ("b_series", [0.1, float("NaN"), 0.3, 0.4]),
+ ("c_series", ["a", "b", pandas.NA, "d"]),
+ ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]),
+ ("e_series", [True, False, True, None]),
+ ]
+ )
+ dataframe = pandas.DataFrame(
+ df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
+ )
+
+ dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()})
+
+ rows = module_under_test.dataframe_to_json_generator(dataframe)
+ expected = [
+ {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True},
+ {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False},
+ {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True},
+ {"a_series": 4, "b_series": 0.4, "c_series": "d"},
+ ]
+ assert list(rows) == expected
+
+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_list_columns_and_indexes_with_named_index(module_under_test):
df_data = collections.OrderedDict(
@@ -841,42 +982,44 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
schema.SchemaField("field05", "FLOAT", mode="REQUIRED"),
schema.SchemaField("field06", "FLOAT64", mode="REQUIRED"),
schema.SchemaField("field07", "NUMERIC", mode="REQUIRED"),
- schema.SchemaField("field08", "BOOLEAN", mode="REQUIRED"),
- schema.SchemaField("field09", "BOOL", mode="REQUIRED"),
- schema.SchemaField("field10", "TIMESTAMP", mode="REQUIRED"),
- schema.SchemaField("field11", "DATE", mode="REQUIRED"),
- schema.SchemaField("field12", "TIME", mode="REQUIRED"),
- schema.SchemaField("field13", "DATETIME", mode="REQUIRED"),
- schema.SchemaField("field14", "GEOGRAPHY", mode="REQUIRED"),
- )
- dataframe = pandas.DataFrame(
- {
- "field01": ["hello", "world"],
- "field02": [b"abd", b"efg"],
- "field03": [1, 2],
- "field04": [3, 4],
- "field05": [1.25, 9.75],
- "field06": [-1.75, -3.5],
- "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")],
- "field08": [True, False],
- "field09": [False, True],
- "field10": [
- datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=pytz.utc),
- datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=pytz.utc),
- ],
- "field11": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)],
- "field12": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)],
- "field13": [
- datetime.datetime(1970, 1, 1, 0, 0, 0),
- datetime.datetime(2012, 12, 21, 9, 7, 42),
- ],
- "field14": [
- "POINT(30 10)",
- "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
- ],
- }
+ schema.SchemaField("field08", "BIGNUMERIC", mode="REQUIRED"),
+ schema.SchemaField("field09", "BOOLEAN", mode="REQUIRED"),
+ schema.SchemaField("field10", "BOOL", mode="REQUIRED"),
+ schema.SchemaField("field11", "TIMESTAMP", mode="REQUIRED"),
+ schema.SchemaField("field12", "DATE", mode="REQUIRED"),
+ schema.SchemaField("field13", "TIME", mode="REQUIRED"),
+ schema.SchemaField("field14", "DATETIME", mode="REQUIRED"),
+ schema.SchemaField("field15", "GEOGRAPHY", mode="REQUIRED"),
)
+ data = {
+ "field01": ["hello", "world"],
+ "field02": [b"abd", b"efg"],
+ "field03": [1, 2],
+ "field04": [3, 4],
+ "field05": [1.25, 9.75],
+ "field06": [-1.75, -3.5],
+ "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")],
+ "field08": [
+ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
+ ],
+ "field09": [True, False],
+ "field10": [False, True],
+ "field11": [
+ datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc),
+ ],
+ "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)],
+ "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)],
+ "field14": [
+ datetime.datetime(1970, 1, 1, 0, 0, 0),
+ datetime.datetime(2012, 12, 21, 9, 7, 42),
+ ],
+ "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"],
+ }
+ dataframe = pandas.DataFrame(data)
+
arrow_table = module_under_test.dataframe_to_arrow(dataframe, bq_schema)
arrow_schema = arrow_table.schema
@@ -1074,6 +1217,28 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
assert "struct_field" in str(expected_warnings[0])
+@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
+def test_dataframe_to_bq_schema_geography(module_under_test):
+ from shapely import wkt
+
+ df = geopandas.GeoDataFrame(
+ pandas.DataFrame(
+ dict(
+ name=["foo", "bar"],
+ geo1=[None, None],
+ geo2=[None, wkt.loads("Point(1 1)")],
+ )
+ ),
+ geometry="geo1",
+ )
+ bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
+ assert bq_schema == (
+ schema.SchemaField("name", "STRING"),
+ schema.SchemaField("geo1", "GEOGRAPHY"),
+ schema.SchemaField("geo2", "GEOGRAPHY"),
+ )
+
+
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_augment_schema_type_detection_succeeds(module_under_test):
@@ -1089,6 +1254,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
"bytes_field": b"some bytes",
"string_field": u"some characters",
"numeric_field": decimal.Decimal("123.456"),
+ "bignumeric_field": decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
}
]
)
@@ -1108,6 +1274,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"),
schema.SchemaField("string_field", field_type=None, mode="NULLABLE"),
schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"),
+ schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"),
)
with warnings.catch_warnings(record=True) as warned:
@@ -1130,7 +1297,11 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
schema.SchemaField("bytes_field", field_type="BYTES", mode="NULLABLE"),
schema.SchemaField("string_field", field_type="STRING", mode="NULLABLE"),
schema.SchemaField("numeric_field", field_type="NUMERIC", mode="NULLABLE"),
+ schema.SchemaField(
+ "bignumeric_field", field_type="BIGNUMERIC", mode="NULLABLE"
+ ),
)
+
by_name = operator.attrgetter("name")
assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name)
@@ -1201,6 +1372,132 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
assert schema_arg == expected_schema_arg
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test__download_table_bqstorage_stream_includes_read_session(
+ monkeypatch, module_under_test
+):
+ import google.cloud.bigquery_storage_v1.reader
+ import google.cloud.bigquery_storage_v1.types
+
+ monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None)
+ monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0")
+ bqstorage_client = mock.create_autospec(
+ bigquery_storage.BigQueryReadClient, instance=True
+ )
+ reader = mock.create_autospec(
+ google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True
+ )
+ bqstorage_client.read_rows.return_value = reader
+ session = google.cloud.bigquery_storage_v1.types.ReadSession()
+
+ module_under_test._download_table_bqstorage_stream(
+ module_under_test._DownloadState(),
+ bqstorage_client,
+ session,
+ google.cloud.bigquery_storage_v1.types.ReadStream(name="test"),
+ queue.Queue(),
+ mock.Mock(),
+ )
+
+ reader.rows.assert_called_once_with(session)
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None
+ or not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional,
+ reason="Requires `google-cloud-bigquery-storage` >= 2.6.0",
+)
+def test__download_table_bqstorage_stream_omits_read_session(
+ monkeypatch, module_under_test
+):
+ import google.cloud.bigquery_storage_v1.reader
+ import google.cloud.bigquery_storage_v1.types
+
+ monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None)
+ monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0")
+ bqstorage_client = mock.create_autospec(
+ bigquery_storage.BigQueryReadClient, instance=True
+ )
+ reader = mock.create_autospec(
+ google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True
+ )
+ bqstorage_client.read_rows.return_value = reader
+ session = google.cloud.bigquery_storage_v1.types.ReadSession()
+
+ module_under_test._download_table_bqstorage_stream(
+ module_under_test._DownloadState(),
+ bqstorage_client,
+ session,
+ google.cloud.bigquery_storage_v1.types.ReadStream(name="test"),
+ queue.Queue(),
+ mock.Mock(),
+ )
+
+ reader.rows.assert_called_once_with()
+
+
+@pytest.mark.parametrize(
+ "stream_count,maxsize_kwarg,expected_call_count,expected_maxsize",
+ [
+ (3, {"max_queue_size": 2}, 3, 2), # custom queue size
+ (4, {}, 4, 4), # default queue size
+ (7, {"max_queue_size": None}, 7, 0), # infinite queue size
+ ],
+)
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test__download_table_bqstorage(
+ module_under_test,
+ stream_count,
+ maxsize_kwarg,
+ expected_call_count,
+ expected_maxsize,
+):
+ from google.cloud.bigquery import dataset
+ from google.cloud.bigquery import table
+
+ queue_used = None # A reference to the queue used by code under test.
+
+ bqstorage_client = mock.create_autospec(
+ bigquery_storage.BigQueryReadClient, instance=True
+ )
+ fake_session = mock.Mock(streams=["stream/s{i}" for i in range(stream_count)])
+ bqstorage_client.create_read_session.return_value = fake_session
+
+ table_ref = table.TableReference(
+ dataset.DatasetReference("project-x", "dataset-y"), "table-z",
+ )
+
+ def fake_download_stream(
+ download_state, bqstorage_client, session, stream, worker_queue, page_to_item
+ ):
+ nonlocal queue_used
+ queue_used = worker_queue
+ try:
+ worker_queue.put_nowait("result_page")
+ except queue.Full: # pragma: NO COVER
+ pass
+
+ download_stream = mock.Mock(side_effect=fake_download_stream)
+
+ with mock.patch.object(
+ module_under_test, "_download_table_bqstorage_stream", new=download_stream
+ ):
+ result_gen = module_under_test._download_table_bqstorage(
+ "some-project", table_ref, bqstorage_client, **maxsize_kwarg
+ )
+ list(result_gen)
+
+ # Timing-safe, as the method under test should block until the pool shutdown is
+ # complete, at which point all download stream workers have already been submitted
+ # to the thread pool.
+ assert download_stream.call_count == stream_count # once for each stream
+ assert queue_used.maxsize == expected_maxsize
+
+
@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_download_arrow_row_iterator_unknown_field_type(module_under_test):
fake_page = api_core.page_iterator.Page(
@@ -1334,6 +1631,26 @@ def test_download_dataframe_row_iterator_dict_sequence_schema(module_under_test)
result = next(results_gen)
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test):
dataframe = module_under_test._row_iterator_page_to_dataframe([], [], {})
assert isinstance(dataframe, pandas.DataFrame)
+
+
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_field_type_override(module_under_test):
+ # When loading pandas data, we may need to override the type
+ # decision based on data contents, because GEOGRAPHY data can be
+ # stored as either text or binary.
+
+ assert (
+ module_under_test.bq_to_arrow_field(schema.SchemaField("g", "GEOGRAPHY")).type
+ == pyarrow.string()
+ )
+
+ assert (
+ module_under_test.bq_to_arrow_field(
+ schema.SchemaField("g", "GEOGRAPHY"), pyarrow.binary(),
+ ).type
+ == pyarrow.binary()
+ )
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index 625256e6e..e9204f1de 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -27,9 +27,9 @@
import warnings
import mock
+import packaging
import requests
import pytest
-import pytz
import pkg_resources
try:
@@ -56,6 +56,7 @@
import google.cloud._helpers
from google.cloud import bigquery_v2
from google.cloud.bigquery.dataset import DatasetReference
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
try:
from google.cloud import bigquery_storage
@@ -65,7 +66,12 @@
from tests.unit.helpers import make_connection
PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
-PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+
+if pandas is not None:
+ PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+else:
+ # Set to less than MIN version.
+ PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")
def _make_credentials():
@@ -362,7 +368,7 @@ def test__get_query_results_miss_w_client_location(self):
method="GET",
path="/projects/PROJECT/queries/nothere",
query_params={"maxResults": 0, "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test__get_query_results_hit(self):
@@ -423,7 +429,9 @@ def test_get_service_account_email_w_alternate_project(self):
service_account_email = client.get_service_account_email(project=project)
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_once_with(method="GET", path=path, timeout=None)
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, timeout=DEFAULT_TIMEOUT
+ )
self.assertEqual(service_account_email, email)
def test_get_service_account_email_w_custom_retry(self):
@@ -466,221 +474,6 @@ def test_get_service_account_email_w_custom_retry(self):
],
)
- def test_list_projects_defaults(self):
- from google.cloud.bigquery.client import Project
-
- PROJECT_1 = "PROJECT_ONE"
- PROJECT_2 = "PROJECT_TWO"
- TOKEN = "TOKEN"
- DATA = {
- "nextPageToken": TOKEN,
- "projects": [
- {
- "kind": "bigquery#project",
- "id": PROJECT_1,
- "numericId": 1,
- "projectReference": {"projectId": PROJECT_1},
- "friendlyName": "One",
- },
- {
- "kind": "bigquery#project",
- "id": PROJECT_2,
- "numericId": 2,
- "projectReference": {"projectId": PROJECT_2},
- "friendlyName": "Two",
- },
- ],
- }
- creds = _make_credentials()
- client = self._make_one(PROJECT_1, creds)
- conn = client._connection = make_connection(DATA)
- iterator = client.list_projects()
-
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
- projects = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(projects), len(DATA["projects"]))
- for found, expected in zip(projects, DATA["projects"]):
- self.assertIsInstance(found, Project)
- self.assertEqual(found.project_id, expected["id"])
- self.assertEqual(found.numeric_id, expected["numericId"])
- self.assertEqual(found.friendly_name, expected["friendlyName"])
- self.assertEqual(token, TOKEN)
-
- conn.api_request.assert_called_once_with(
- method="GET", path="/projects", query_params={}, timeout=None
- )
-
- def test_list_projects_w_timeout(self):
- PROJECT_1 = "PROJECT_ONE"
- TOKEN = "TOKEN"
- DATA = {
- "nextPageToken": TOKEN,
- "projects": [],
- }
- creds = _make_credentials()
- client = self._make_one(PROJECT_1, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_projects(timeout=7.5)
-
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
-
- conn.api_request.assert_called_once_with(
- method="GET", path="/projects", query_params={}, timeout=7.5
- )
-
- def test_list_projects_explicit_response_missing_projects_key(self):
- TOKEN = "TOKEN"
- DATA = {}
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_projects(max_results=3, page_token=TOKEN)
-
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
- projects = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(projects), 0)
- self.assertIsNone(token)
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects",
- query_params={"maxResults": 3, "pageToken": TOKEN},
- timeout=None,
- )
-
- def test_list_datasets_defaults(self):
- from google.cloud.bigquery.dataset import DatasetListItem
-
- DATASET_1 = "dataset_one"
- DATASET_2 = "dataset_two"
- PATH = "projects/%s/datasets" % self.PROJECT
- TOKEN = "TOKEN"
- DATA = {
- "nextPageToken": TOKEN,
- "datasets": [
- {
- "kind": "bigquery#dataset",
- "id": "%s:%s" % (self.PROJECT, DATASET_1),
- "datasetReference": {
- "datasetId": DATASET_1,
- "projectId": self.PROJECT,
- },
- "friendlyName": None,
- },
- {
- "kind": "bigquery#dataset",
- "id": "%s:%s" % (self.PROJECT, DATASET_2),
- "datasetReference": {
- "datasetId": DATASET_2,
- "projectId": self.PROJECT,
- },
- "friendlyName": "Two",
- },
- ],
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_datasets()
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- datasets = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(datasets), len(DATA["datasets"]))
- for found, expected in zip(datasets, DATA["datasets"]):
- self.assertIsInstance(found, DatasetListItem)
- self.assertEqual(found.full_dataset_id, expected["id"])
- self.assertEqual(found.friendly_name, expected["friendlyName"])
- self.assertEqual(token, TOKEN)
-
- conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params={}, timeout=None
- )
-
- def test_list_datasets_w_project_and_timeout(self):
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection({})
-
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- list(client.list_datasets(project="other-project", timeout=7.5))
-
- final_attributes.assert_called_once_with(
- {"path": "/projects/other-project/datasets"}, client, None
- )
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/other-project/datasets",
- query_params={},
- timeout=7.5,
- )
-
- def test_list_datasets_explicit_response_missing_datasets_key(self):
- PATH = "projects/%s/datasets" % self.PROJECT
- TOKEN = "TOKEN"
- FILTER = "FILTER"
- DATA = {}
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_datasets(
- include_all=True, filter=FILTER, max_results=3, page_token=TOKEN
- )
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- datasets = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(datasets), 0)
- self.assertIsNone(token)
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/%s" % PATH,
- query_params={
- "all": True,
- "filter": FILTER,
- "maxResults": 3,
- "pageToken": TOKEN,
- },
- timeout=None,
- )
-
def test_dataset_with_specified_project(self):
from google.cloud.bigquery.dataset import DatasetReference
@@ -817,7 +610,7 @@ def test_get_dataset(self):
@unittest.skipIf(
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
)
- def test_create_bqstorage_client(self):
+ def test_ensure_bqstorage_client_creating_new_instance(self):
mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
mock_client_instance = object()
mock_client.return_value = mock_client_instance
@@ -827,12 +620,19 @@ def test_create_bqstorage_client(self):
with mock.patch(
"google.cloud.bigquery_storage.BigQueryReadClient", mock_client
):
- bqstorage_client = client._create_bqstorage_client()
+ bqstorage_client = client._ensure_bqstorage_client(
+ client_options=mock.sentinel.client_options,
+ client_info=mock.sentinel.client_info,
+ )
self.assertIs(bqstorage_client, mock_client_instance)
- mock_client.assert_called_once_with(credentials=creds)
+ mock_client.assert_called_once_with(
+ credentials=creds,
+ client_options=mock.sentinel.client_options,
+ client_info=mock.sentinel.client_info,
+ )
- def test_create_bqstorage_client_missing_dependency(self):
+ def test_ensure_bqstorage_client_missing_dependency(self):
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
@@ -845,7 +645,7 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level):
no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import)
with no_bqstorage, warnings.catch_warnings(record=True) as warned:
- bqstorage_client = client._create_bqstorage_client()
+ bqstorage_client = client._ensure_bqstorage_client()
self.assertIsNone(bqstorage_client)
matching_warnings = [
@@ -856,489 +656,115 @@ def fail_bqstorage_import(name, globals, locals, fromlist, level):
]
assert matching_warnings, "Missing dependency warning not raised."
- def test_create_dataset_minimal(self):
- from google.cloud.bigquery.dataset import Dataset
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ def test_ensure_bqstorage_client_obsolete_dependency(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
- PATH = "projects/%s/datasets" % self.PROJECT
- RESOURCE = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- }
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(RESOURCE)
-
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- before = Dataset(ds_ref)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- after = client.create_dataset(before, timeout=7.5)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
-
- self.assertEqual(after.dataset_id, self.DS_ID)
- self.assertEqual(after.project, self.PROJECT)
- self.assertEqual(after.etag, RESOURCE["etag"])
- self.assertEqual(after.full_dataset_id, RESOURCE["id"])
- conn.api_request.assert_called_once_with(
- method="POST",
- path="/%s" % PATH,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- },
- timeout=7.5,
+ patcher = mock.patch(
+ "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version",
+ side_effect=LegacyBigQueryStorageError("BQ Storage too old"),
)
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ bqstorage_client = client._ensure_bqstorage_client()
- def test_create_dataset_w_attrs(self):
- from google.cloud.bigquery.dataset import Dataset, AccessEntry
+ self.assertIsNone(bqstorage_client)
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
- PATH = "projects/%s/datasets" % self.PROJECT
- DESCRIPTION = "DESC"
- FRIENDLY_NAME = "FN"
- LOCATION = "US"
- USER_EMAIL = "phred@example.com"
- LABELS = {"color": "red"}
- VIEW = {
- "projectId": "my-proj",
- "datasetId": "starry-skies",
- "tableId": "northern-hemisphere",
- }
- RESOURCE = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "description": DESCRIPTION,
- "friendlyName": FRIENDLY_NAME,
- "location": LOCATION,
- "defaultTableExpirationMs": "3600",
- "labels": LABELS,
- "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
- }
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ def test_ensure_bqstorage_client_existing_client_check_passes(self):
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(RESOURCE)
- entries = [
- AccessEntry("OWNER", "userByEmail", USER_EMAIL),
- AccessEntry(None, "view", VIEW),
- ]
+ mock_storage_client = mock.sentinel.mock_storage_client
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- before = Dataset(ds_ref)
- before.access_entries = entries
- before.description = DESCRIPTION
- before.friendly_name = FRIENDLY_NAME
- before.default_table_expiration_ms = 3600
- before.location = LOCATION
- before.labels = LABELS
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- after = client.create_dataset(before)
+ bqstorage_client = client._ensure_bqstorage_client(
+ bqstorage_client=mock_storage_client
+ )
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ self.assertIs(bqstorage_client, mock_storage_client)
- self.assertEqual(after.dataset_id, self.DS_ID)
- self.assertEqual(after.project, self.PROJECT)
- self.assertEqual(after.etag, RESOURCE["etag"])
- self.assertEqual(after.full_dataset_id, RESOURCE["id"])
- self.assertEqual(after.description, DESCRIPTION)
- self.assertEqual(after.friendly_name, FRIENDLY_NAME)
- self.assertEqual(after.location, LOCATION)
- self.assertEqual(after.default_table_expiration_ms, 3600)
- self.assertEqual(after.labels, LABELS)
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ def test_ensure_bqstorage_client_existing_client_check_fails(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
- conn.api_request.assert_called_once_with(
- method="POST",
- path="/%s" % PATH,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "description": DESCRIPTION,
- "friendlyName": FRIENDLY_NAME,
- "location": LOCATION,
- "defaultTableExpirationMs": "3600",
- "access": [
- {"role": "OWNER", "userByEmail": USER_EMAIL},
- {"view": VIEW},
- ],
- "labels": LABELS,
- },
- timeout=None,
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ mock_storage_client = mock.sentinel.mock_storage_client
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version",
+ side_effect=LegacyBigQueryStorageError("BQ Storage too old"),
)
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ bqstorage_client = client._ensure_bqstorage_client(mock_storage_client)
- def test_create_dataset_w_custom_property(self):
- # The library should handle sending properties to the API that are not
- # yet part of the library
- from google.cloud.bigquery.dataset import Dataset
+ self.assertIsNone(bqstorage_client)
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
+ def test_create_routine_w_minimal_resource(self):
+ from google.cloud.bigquery.routine import Routine
+ from google.cloud.bigquery.routine import RoutineReference
- path = "/projects/%s/datasets" % self.PROJECT
+ creds = _make_credentials()
+ path = "/projects/test-routine-project/datasets/test_routines/routines"
resource = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "newAlphaProperty": "unreleased property",
+ "routineReference": {
+ "projectId": "test-routine-project",
+ "datasetId": "test_routines",
+ "routineId": "minimal_routine",
+ }
}
- creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
conn = client._connection = make_connection(resource)
-
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- before = Dataset(ds_ref)
- before._properties["newAlphaProperty"] = "unreleased property"
+ full_routine_id = "test-routine-project.test_routines.minimal_routine"
+ routine = Routine(full_routine_id)
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
- after = client.create_dataset(before)
+ actual_routine = client.create_routine(routine, timeout=7.5)
final_attributes.assert_called_once_with({"path": path}, client, None)
- self.assertEqual(after.dataset_id, self.DS_ID)
- self.assertEqual(after.project, self.PROJECT)
- self.assertEqual(after._properties["newAlphaProperty"], "unreleased property")
-
conn.api_request.assert_called_once_with(
- method="POST",
- path=path,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "newAlphaProperty": "unreleased property",
- "labels": {},
- },
- timeout=None,
+ method="POST", path=path, data=resource, timeout=7.5,
+ )
+ self.assertEqual(
+ actual_routine.reference, RoutineReference.from_string(full_routine_id)
)
- def test_create_dataset_w_client_location_wo_dataset_location(self):
- from google.cloud.bigquery.dataset import Dataset
+ def test_create_routine_w_conflict(self):
+ from google.cloud.bigquery.routine import Routine
- PATH = "projects/%s/datasets" % self.PROJECT
- RESOURCE = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "location": self.LOCATION,
- }
creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.AlreadyExists("routine already exists")
)
- conn = client._connection = make_connection(RESOURCE)
-
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- before = Dataset(ds_ref)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- after = client.create_dataset(before)
+ path = "/projects/test-routine-project/datasets/test_routines/routines"
+ full_routine_id = "test-routine-project.test_routines.minimal_routine"
+ routine = Routine(full_routine_id)
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ with pytest.raises(google.api_core.exceptions.AlreadyExists):
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ client.create_routine(routine)
- self.assertEqual(after.dataset_id, self.DS_ID)
- self.assertEqual(after.project, self.PROJECT)
- self.assertEqual(after.etag, RESOURCE["etag"])
- self.assertEqual(after.full_dataset_id, RESOURCE["id"])
- self.assertEqual(after.location, self.LOCATION)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path="/%s" % PATH,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": self.LOCATION,
- },
- timeout=None,
- )
-
- def test_create_dataset_w_client_location_w_dataset_location(self):
- from google.cloud.bigquery.dataset import Dataset
-
- PATH = "projects/%s/datasets" % self.PROJECT
- OTHER_LOCATION = "EU"
- RESOURCE = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "location": OTHER_LOCATION,
- }
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- conn = client._connection = make_connection(RESOURCE)
-
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- before = Dataset(ds_ref)
- before.location = OTHER_LOCATION
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- after = client.create_dataset(before)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
-
- self.assertEqual(after.dataset_id, self.DS_ID)
- self.assertEqual(after.project, self.PROJECT)
- self.assertEqual(after.etag, RESOURCE["etag"])
- self.assertEqual(after.full_dataset_id, RESOURCE["id"])
- self.assertEqual(after.location, OTHER_LOCATION)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path="/%s" % PATH,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": OTHER_LOCATION,
- },
- timeout=None,
- )
-
- def test_create_dataset_w_reference(self):
- path = "/projects/%s/datasets" % self.PROJECT
- resource = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "location": self.LOCATION,
- }
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- conn = client._connection = make_connection(resource)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- dataset = client.create_dataset(DatasetReference(self.PROJECT, self.DS_ID))
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
- self.assertEqual(dataset.etag, resource["etag"])
- self.assertEqual(dataset.full_dataset_id, resource["id"])
- self.assertEqual(dataset.location, self.LOCATION)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path=path,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": self.LOCATION,
- },
- timeout=None,
- )
-
- def test_create_dataset_w_fully_qualified_string(self):
- path = "/projects/%s/datasets" % self.PROJECT
- resource = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "location": self.LOCATION,
- }
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- conn = client._connection = make_connection(resource)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- dataset = client.create_dataset("{}.{}".format(self.PROJECT, self.DS_ID))
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
- self.assertEqual(dataset.etag, resource["etag"])
- self.assertEqual(dataset.full_dataset_id, resource["id"])
- self.assertEqual(dataset.location, self.LOCATION)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path=path,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": self.LOCATION,
- },
- timeout=None,
- )
-
- def test_create_dataset_w_string(self):
- path = "/projects/%s/datasets" % self.PROJECT
- resource = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "%s:%s" % (self.PROJECT, self.DS_ID),
- "location": self.LOCATION,
- }
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- conn = client._connection = make_connection(resource)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- dataset = client.create_dataset(self.DS_ID)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
- self.assertEqual(dataset.etag, resource["etag"])
- self.assertEqual(dataset.full_dataset_id, resource["id"])
- self.assertEqual(dataset.location, self.LOCATION)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path=path,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": self.LOCATION,
- },
- timeout=None,
- )
-
- def test_create_dataset_alreadyexists_w_exists_ok_false(self):
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- client._connection = make_connection(
- google.api_core.exceptions.AlreadyExists("dataset already exists")
- )
-
- with pytest.raises(google.api_core.exceptions.AlreadyExists):
- client.create_dataset(self.DS_ID)
-
- def test_create_dataset_alreadyexists_w_exists_ok_true(self):
- post_path = "/projects/{}/datasets".format(self.PROJECT)
- get_path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
- resource = {
- "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
- "etag": "etag",
- "id": "{}:{}".format(self.PROJECT, self.DS_ID),
- "location": self.LOCATION,
- }
- creds = _make_credentials()
- client = self._make_one(
- project=self.PROJECT, credentials=creds, location=self.LOCATION
- )
- conn = client._connection = make_connection(
- google.api_core.exceptions.AlreadyExists("dataset already exists"), resource
- )
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- dataset = client.create_dataset(self.DS_ID, exists_ok=True)
-
- final_attributes.assert_called_with({"path": get_path}, client, None)
-
- self.assertEqual(dataset.dataset_id, self.DS_ID)
- self.assertEqual(dataset.project, self.PROJECT)
- self.assertEqual(dataset.etag, resource["etag"])
- self.assertEqual(dataset.full_dataset_id, resource["id"])
- self.assertEqual(dataset.location, self.LOCATION)
-
- conn.api_request.assert_has_calls(
- [
- mock.call(
- method="POST",
- path=post_path,
- data={
- "datasetReference": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- },
- "labels": {},
- "location": self.LOCATION,
- },
- timeout=None,
- ),
- mock.call(method="GET", path=get_path, timeout=None),
- ]
- )
-
- def test_create_routine_w_minimal_resource(self):
- from google.cloud.bigquery.routine import Routine
- from google.cloud.bigquery.routine import RoutineReference
-
- creds = _make_credentials()
- path = "/projects/test-routine-project/datasets/test_routines/routines"
- resource = {
- "routineReference": {
- "projectId": "test-routine-project",
- "datasetId": "test_routines",
- "routineId": "minimal_routine",
- }
- }
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(resource)
- full_routine_id = "test-routine-project.test_routines.minimal_routine"
- routine = Routine(full_routine_id)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- actual_routine = client.create_routine(routine, timeout=7.5)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- conn.api_request.assert_called_once_with(
- method="POST", path=path, data=resource, timeout=7.5,
- )
- self.assertEqual(
- actual_routine.reference, RoutineReference.from_string(full_routine_id)
- )
-
- def test_create_routine_w_conflict(self):
- from google.cloud.bigquery.routine import Routine
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(
- google.api_core.exceptions.AlreadyExists("routine already exists")
- )
- path = "/projects/test-routine-project/datasets/test_routines/routines"
- full_routine_id = "test-routine-project.test_routines.minimal_routine"
- routine = Routine(full_routine_id)
-
- with pytest.raises(google.api_core.exceptions.AlreadyExists):
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- client.create_routine(routine)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
+ final_attributes.assert_called_once_with({"path": path}, client, None)
resource = {
"routineReference": {
@@ -1348,7 +774,7 @@ def test_create_routine_w_conflict(self):
}
}
conn.api_request.assert_called_once_with(
- method="POST", path=path, data=resource, timeout=None,
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`")
@@ -1384,7 +810,7 @@ def test_span_status_is_set(self):
}
}
conn.api_request.assert_called_once_with(
- method="POST", path=path, data=resource, timeout=None,
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
)
def test_create_routine_w_conflict_exists_ok(self):
@@ -1420,11 +846,13 @@ def test_create_routine_w_conflict_exists_ok(self):
self.assertEqual(actual_routine.routine_id, "minimal_routine")
conn.api_request.assert_has_calls(
[
- mock.call(method="POST", path=path, data=resource, timeout=None,),
+ mock.call(
+ method="POST", path=path, data=resource, timeout=DEFAULT_TIMEOUT,
+ ),
mock.call(
method="GET",
path="/projects/test-routine-project/datasets/test_routines/routines/minimal_routine",
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
]
)
@@ -1500,7 +928,7 @@ def test_create_table_w_custom_property(self):
"newAlphaProperty": "unreleased property",
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got._properties["newAlphaProperty"], "unreleased property")
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1541,7 +969,7 @@ def test_create_table_w_encryption_configuration(self):
"labels": {},
"encryptionConfiguration": {"kmsKeyName": self.KMS_KEY_NAME},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1577,7 +1005,7 @@ def test_create_table_w_day_partition_and_expire(self):
"timePartitioning": {"type": "DAY", "expirationMs": "100"},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(table.time_partitioning.type_, "DAY")
self.assertEqual(table.time_partitioning.expiration_ms, 100)
@@ -1600,13 +1028,13 @@ def test_create_table_w_schema_and_query(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
]
},
@@ -1645,20 +1073,20 @@ def test_create_table_w_schema_and_query(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
]
},
"view": {"query": query, "useLegacySql": False},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
self.assertEqual(got.project, self.PROJECT)
@@ -1713,7 +1141,7 @@ def test_create_table_w_external(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
self.assertEqual(got.project, self.PROJECT)
@@ -1752,7 +1180,7 @@ def test_create_table_w_reference(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1786,7 +1214,7 @@ def test_create_table_w_fully_qualified_string(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1818,7 +1246,7 @@ def test_create_table_w_string(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(got.table_id, self.TABLE_ID)
@@ -1853,7 +1281,7 @@ def test_create_table_alreadyexists_w_exists_ok_false(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_create_table_alreadyexists_w_exists_ok_true(self):
@@ -1896,9 +1324,9 @@ def test_create_table_alreadyexists_w_exists_ok_true(self):
},
"labels": {},
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
- mock.call(method="GET", path=get_path, timeout=None),
+ mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
]
)
@@ -1971,7 +1399,7 @@ def test_get_model_w_string(self):
final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None)
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % path, timeout=None
+ method="GET", path="/%s" % path, timeout=DEFAULT_TIMEOUT
)
self.assertEqual(got.model_id, self.MODEL_ID)
@@ -2057,6 +1485,7 @@ def test_get_table_sets_user_agent(self):
url=mock.ANY, method=mock.ANY, headers=mock.ANY, data=mock.ANY
)
http.reset_mock()
+ http.is_mtls = False
mock_response.status_code = 200
mock_response.json.return_value = self._make_table_resource()
user_agent_override = client_info.ClientInfo(user_agent="my-application/1.2.3")
@@ -2079,7 +1508,7 @@ def test_get_table_sets_user_agent(self):
"User-Agent": expected_user_agent,
},
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIn("my-application/1.2.3", expected_user_agent)
@@ -2422,7 +1851,7 @@ def test_update_dataset_w_custom_property(self):
data={"newAlphaProperty": "unreleased property"},
path=path,
headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(dataset.dataset_id, self.DS_ID)
@@ -2596,12 +2025,14 @@ def test_update_table(self):
"type": "STRING",
"mode": "REQUIRED",
"description": None,
+ "policyTags": {"names": []},
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "description": "New field description",
+ "policyTags": {"names": []},
},
]
},
@@ -2612,8 +2043,10 @@ def test_update_table(self):
}
)
schema = [
- SchemaField("full_name", "STRING", mode="REQUIRED"),
- SchemaField("age", "INTEGER", mode="REQUIRED"),
+ SchemaField("full_name", "STRING", mode="REQUIRED", description=None),
+ SchemaField(
+ "age", "INTEGER", mode="REQUIRED", description="New field description"
+ ),
]
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
@@ -2641,12 +2074,14 @@ def test_update_table(self):
"type": "STRING",
"mode": "REQUIRED",
"description": None,
+ "policyTags": {"names": []},
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "description": "New field description",
+ "policyTags": {"names": []},
},
]
},
@@ -2706,7 +2141,7 @@ def test_update_table_w_custom_property(self):
path="/%s" % path,
data={"newAlphaProperty": "unreleased property"},
headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(
updated_table._properties["newAlphaProperty"], "unreleased property"
@@ -2741,7 +2176,7 @@ def test_update_table_only_use_legacy_sql(self):
path="/%s" % path,
data={"view": {"useLegacySql": True}},
headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
@@ -2767,18 +2202,36 @@ def test_update_table_w_query(self):
"type": "STRING",
"mode": "REQUIRED",
"description": None,
+ "policyTags": {"names": []},
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
+ "description": "this is a column",
+ "policyTags": {"names": []},
+ },
+ {
+ "name": "country",
+ "type": "STRING",
+ "mode": "NULLABLE",
+ "policyTags": {"names": []},
},
]
}
schema = [
- SchemaField("full_name", "STRING", mode="REQUIRED"),
- SchemaField("age", "INTEGER", mode="REQUIRED"),
+ SchemaField(
+ "full_name",
+ "STRING",
+ mode="REQUIRED",
+ # Explicitly unset the description.
+ description=None,
+ ),
+ SchemaField(
+ "age", "INTEGER", mode="REQUIRED", description="this is a column"
+ ),
+ # Omit the description to not make updates to it.
+ SchemaField("country", "STRING"),
]
resource = self._make_table_resource()
resource.update(
@@ -2821,7 +2274,7 @@ def test_update_table_w_query(self):
"schema": schema_resource,
},
headers=None,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_update_table_w_schema_None(self):
@@ -2932,433 +2385,64 @@ def test_update_table_delete_property(self):
self.assertEqual(req[1]["data"], sent)
self.assertIsNone(table3.description)
- def test_list_tables_empty_w_timeout(self):
- path = "/projects/{}/datasets/{}/tables".format(self.PROJECT, self.DS_ID)
+ def test_delete_job_metadata_not_found(self):
creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection({})
+ client = self._make_one("client-proj", creds, location="client-loc")
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.NotFound("job not found"),
+ google.api_core.exceptions.NotFound("job not found"),
+ )
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
- iterator = client.list_tables(dataset, timeout=7.5)
- self.assertIs(iterator.dataset, dataset)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
- tables = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(tables, [])
- self.assertIsNone(token)
- conn.api_request.assert_called_once_with(
- method="GET", path=path, query_params={}, timeout=7.5
- )
-
- def test_list_models_empty_w_timeout(self):
- path = "/projects/{}/datasets/{}/models".format(self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection({})
-
- dataset_id = "{}.{}".format(self.PROJECT, self.DS_ID)
- iterator = client.list_models(dataset_id, timeout=7.5)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
- models = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(models, [])
- self.assertIsNone(token)
- conn.api_request.assert_called_once_with(
- method="GET", path=path, query_params={}, timeout=7.5
- )
-
- def test_list_models_defaults(self):
- from google.cloud.bigquery.model import Model
-
- MODEL_1 = "model_one"
- MODEL_2 = "model_two"
- PATH = "projects/%s/datasets/%s/models" % (self.PROJECT, self.DS_ID)
- TOKEN = "TOKEN"
- DATA = {
- "nextPageToken": TOKEN,
- "models": [
- {
- "modelReference": {
- "modelId": MODEL_1,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- }
- },
- {
- "modelReference": {
- "modelId": MODEL_2,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- }
- },
- ],
- }
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(DATA)
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
-
- iterator = client.list_models(dataset)
- self.assertIs(iterator.dataset, dataset)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- models = list(page)
- token = iterator.next_page_token
+ with self.assertRaises(google.api_core.exceptions.NotFound):
+ client.delete_job_metadata("my-job")
- self.assertEqual(len(models), len(DATA["models"]))
- for found, expected in zip(models, DATA["models"]):
- self.assertIsInstance(found, Model)
- self.assertEqual(found.model_id, expected["modelReference"]["modelId"])
- self.assertEqual(token, TOKEN)
+ conn.api_request.reset_mock()
+ client.delete_job_metadata("my-job", not_found_ok=True)
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params={}, timeout=None
+ method="DELETE",
+ path="/projects/client-proj/jobs/my-job/delete",
+ query_params={"location": "client-loc"},
+ timeout=DEFAULT_TIMEOUT,
)
- def test_list_models_wrong_type(self):
+ def test_delete_job_metadata_with_id(self):
creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.list_models(DatasetReference(self.PROJECT, self.DS_ID).model("foo"))
-
- def test_list_routines_empty_w_timeout(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
+ client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection({})
- iterator = client.list_routines("test-routines.test_routines", timeout=7.5)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with(
- {"path": "/projects/test-routines/datasets/test_routines/routines"},
- client,
- None,
- )
- routines = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(routines, [])
- self.assertIsNone(token)
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/test-routines/datasets/test_routines/routines",
- query_params={},
- timeout=7.5,
- )
-
- def test_list_routines_defaults(self):
- from google.cloud.bigquery.routine import Routine
-
- project_id = "test-routines"
- dataset_id = "test_routines"
- path = "/projects/test-routines/datasets/test_routines/routines"
- routine_1 = "routine_one"
- routine_2 = "routine_two"
- token = "TOKEN"
- resource = {
- "nextPageToken": token,
- "routines": [
- {
- "routineReference": {
- "routineId": routine_1,
- "datasetId": dataset_id,
- "projectId": project_id,
- }
- },
- {
- "routineReference": {
- "routineId": routine_2,
- "datasetId": dataset_id,
- "projectId": project_id,
- }
- },
- ],
- }
-
- creds = _make_credentials()
- client = self._make_one(project=project_id, credentials=creds)
- conn = client._connection = make_connection(resource)
- dataset = DatasetReference(client.project, dataset_id)
-
- iterator = client.list_routines(dataset)
- self.assertIs(iterator.dataset, dataset)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
- routines = list(page)
- actual_token = iterator.next_page_token
-
- self.assertEqual(len(routines), len(resource["routines"]))
- for found, expected in zip(routines, resource["routines"]):
- self.assertIsInstance(found, Routine)
- self.assertEqual(
- found.routine_id, expected["routineReference"]["routineId"]
- )
- self.assertEqual(actual_token, token)
-
- conn.api_request.assert_called_once_with(
- method="GET", path=path, query_params={}, timeout=None
- )
-
- def test_list_routines_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.list_routines(
- DatasetReference(self.PROJECT, self.DS_ID).table("foo")
- )
-
- def test_list_tables_defaults(self):
- from google.cloud.bigquery.table import TableListItem
-
- TABLE_1 = "table_one"
- TABLE_2 = "table_two"
- PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID)
- TOKEN = "TOKEN"
- DATA = {
- "nextPageToken": TOKEN,
- "tables": [
- {
- "kind": "bigquery#table",
- "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1),
- "tableReference": {
- "tableId": TABLE_1,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- },
- "type": "TABLE",
- },
- {
- "kind": "bigquery#table",
- "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2),
- "tableReference": {
- "tableId": TABLE_2,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- },
- "type": "TABLE",
- },
- ],
- }
-
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(DATA)
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
-
- iterator = client.list_tables(dataset)
- self.assertIs(iterator.dataset, dataset)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- tables = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(tables), len(DATA["tables"]))
- for found, expected in zip(tables, DATA["tables"]):
- self.assertIsInstance(found, TableListItem)
- self.assertEqual(found.full_table_id, expected["id"])
- self.assertEqual(found.table_type, expected["type"])
- self.assertEqual(token, TOKEN)
+ client.delete_job_metadata("my-job", project="param-proj", location="param-loc")
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % PATH, query_params={}, timeout=None
+ method="DELETE",
+ path="/projects/param-proj/jobs/my-job/delete",
+ query_params={"location": "param-loc"},
+ timeout=DEFAULT_TIMEOUT,
)
- def test_list_tables_explicit(self):
- from google.cloud.bigquery.table import TableListItem
+ def test_delete_job_metadata_with_resource(self):
+ from google.cloud.bigquery.job import QueryJob
- TABLE_1 = "table_one"
- TABLE_2 = "table_two"
- PATH = "projects/%s/datasets/%s/tables" % (self.PROJECT, self.DS_ID)
- TOKEN = "TOKEN"
- DATA = {
- "tables": [
- {
- "kind": "bigquery#dataset",
- "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_1),
- "tableReference": {
- "tableId": TABLE_1,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- },
- "type": "TABLE",
- },
- {
- "kind": "bigquery#dataset",
- "id": "%s:%s.%s" % (self.PROJECT, self.DS_ID, TABLE_2),
- "tableReference": {
- "tableId": TABLE_2,
- "datasetId": self.DS_ID,
- "projectId": self.PROJECT,
- },
- "type": "TABLE",
- },
- ]
+ query_resource = {
+ "jobReference": {
+ "projectId": "job-based-proj",
+ "jobId": "query_job",
+ "location": "us-east1",
+ },
+ "configuration": {"query": {}},
}
-
creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(DATA)
- dataset = DatasetReference(self.PROJECT, self.DS_ID)
-
- iterator = client.list_tables(
- # Test with string for dataset ID.
- self.DS_ID,
- max_results=3,
- page_token=TOKEN,
- )
- self.assertEqual(iterator.dataset, dataset)
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- tables = list(page)
- token = iterator.next_page_token
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = make_connection(query_resource)
+ job_from_resource = QueryJob.from_api_repr(query_resource, client)
- self.assertEqual(len(tables), len(DATA["tables"]))
- for found, expected in zip(tables, DATA["tables"]):
- self.assertIsInstance(found, TableListItem)
- self.assertEqual(found.full_table_id, expected["id"])
- self.assertEqual(found.table_type, expected["type"])
- self.assertIsNone(token)
+ client.delete_job_metadata(job_from_resource)
conn.api_request.assert_called_once_with(
- method="GET",
- path="/%s" % PATH,
- query_params={"maxResults": 3, "pageToken": TOKEN},
- timeout=None,
- )
-
- def test_list_tables_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.list_tables(DatasetReference(self.PROJECT, self.DS_ID).table("foo"))
-
- def test_delete_dataset(self):
- from google.cloud.bigquery.dataset import Dataset
- from google.cloud.bigquery.dataset import DatasetReference
-
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- datasets = (ds_ref, Dataset(ds_ref), "{}.{}".format(self.PROJECT, self.DS_ID))
- PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection(*([{}] * len(datasets)))
- for arg in datasets:
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- client.delete_dataset(arg, timeout=7.5)
-
- final_attributes.assert_called_once_with(
- {"path": "/%s" % PATH}, client, None
- )
-
- conn.api_request.assert_called_with(
- method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5
- )
-
- def test_delete_dataset_delete_contents(self):
- from google.cloud.bigquery.dataset import Dataset
-
- PATH = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- conn = client._connection = make_connection({}, {})
- ds_ref = DatasetReference(self.PROJECT, self.DS_ID)
- for arg in (ds_ref, Dataset(ds_ref)):
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- client.delete_dataset(arg, delete_contents=True)
-
- final_attributes.assert_called_once_with(
- {"path": "/%s" % PATH, "deleteContents": True}, client, None
- )
- conn.api_request.assert_called_with(
- method="DELETE",
- path="/%s" % PATH,
- query_params={"deleteContents": "true"},
- timeout=None,
- )
-
- def test_delete_dataset_wrong_type(self):
- creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
- with self.assertRaises(TypeError):
- client.delete_dataset(
- DatasetReference(self.PROJECT, self.DS_ID).table("foo")
- )
-
- def test_delete_dataset_w_not_found_ok_false(self):
- path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
- conn = client._connection = make_connection(
- google.api_core.exceptions.NotFound("dataset not found")
- )
-
- with self.assertRaises(google.api_core.exceptions.NotFound):
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- client.delete_dataset(self.DS_ID)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- conn.api_request.assert_called_with(
- method="DELETE", path=path, query_params={}, timeout=None
- )
-
- def test_delete_dataset_w_not_found_ok_true(self):
- path = "/projects/{}/datasets/{}".format(self.PROJECT, self.DS_ID)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
- conn = client._connection = make_connection(
- google.api_core.exceptions.NotFound("dataset not found")
- )
-
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- client.delete_dataset(self.DS_ID, not_found_ok=True)
-
- final_attributes.assert_called_once_with({"path": path}, client, None)
-
- conn.api_request.assert_called_with(
- method="DELETE", path=path, query_params={}, timeout=None
+ method="DELETE",
+ path="/projects/job-based-proj/jobs/query_job/delete",
+ query_params={"location": "us-east1"},
+ timeout=DEFAULT_TIMEOUT,
)
def test_delete_model(self):
@@ -3413,7 +2497,9 @@ def test_delete_model_w_not_found_ok_false(self):
with self.assertRaises(google.api_core.exceptions.NotFound):
client.delete_model("{}.{}".format(self.DS_ID, self.MODEL_ID))
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_model_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}/models/{}".format(
@@ -3434,7 +2520,9 @@ def test_delete_model_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_routine(self):
from google.cloud.bigquery.routine import Routine
@@ -3488,7 +2576,7 @@ def test_delete_routine_w_not_found_ok_false(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
conn.api_request.assert_called_with(
- method="DELETE", path=path, timeout=None,
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT,
)
def test_delete_routine_w_not_found_ok_true(self):
@@ -3510,7 +2598,7 @@ def test_delete_routine_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
conn.api_request.assert_called_with(
- method="DELETE", path=path, timeout=None,
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT,
)
def test_delete_table(self):
@@ -3574,7 +2662,9 @@ def test_delete_table_w_not_found_ok_false(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def test_delete_table_w_not_found_ok_true(self):
path = "/projects/{}/datasets/{}/tables/{}".format(
@@ -3596,7 +2686,9 @@ def test_delete_table_w_not_found_ok_true(self):
final_attributes.assert_called_once_with({"path": path}, client, None)
- conn.api_request.assert_called_with(method="DELETE", path=path, timeout=None)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, timeout=DEFAULT_TIMEOUT
+ )
def _create_job_helper(self, job_config):
from google.cloud.bigquery import _helpers
@@ -3618,7 +2710,7 @@ def _create_job_helper(self, job_config):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=RESOURCE,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_create_job_load_config(self):
@@ -3767,7 +2859,7 @@ def test_create_job_query_config_w_rateLimitExceeded_error(self):
method="POST",
path="/projects/PROJECT/jobs",
data=data_without_destination,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
)
@@ -3801,32 +2893,31 @@ def test_get_job_miss_w_explict_project(self):
conn = client._connection = make_connection()
with self.assertRaises(NotFound):
- client.get_job(JOB_ID, project=OTHER_PROJECT, location=self.LOCATION)
+ client.get_job(JOB_ID, project=OTHER_PROJECT)
conn.api_request.assert_called_once_with(
method="GET",
path="/projects/OTHER_PROJECT/jobs/NONESUCH",
- query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_TIMEOUT,
)
def test_get_job_miss_w_client_location(self):
from google.cloud.exceptions import NotFound
- OTHER_PROJECT = "OTHER_PROJECT"
JOB_ID = "NONESUCH"
creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds, location=self.LOCATION)
+ client = self._make_one("client-proj", creds, location="client-loc")
conn = client._connection = make_connection()
with self.assertRaises(NotFound):
- client.get_job(JOB_ID, project=OTHER_PROJECT)
+ client.get_job(JOB_ID)
conn.api_request.assert_called_once_with(
method="GET",
- path="/projects/OTHER_PROJECT/jobs/NONESUCH",
- query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ path="/projects/client-proj/jobs/NONESUCH",
+ query_params={"projection": "full", "location": "client-loc"},
+ timeout=DEFAULT_TIMEOUT,
)
def test_get_job_hit_w_timeout(self):
@@ -3839,7 +2930,11 @@ def test_get_job_hit_w_timeout(self):
QUERY = "SELECT * from test_dataset:test_table"
ASYNC_QUERY_DATA = {
"id": "{}:{}".format(self.PROJECT, JOB_ID),
- "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"},
+ "jobReference": {
+ "projectId": "resource-proj",
+ "jobId": "query_job",
+ "location": "us-east1",
+ },
"state": "DONE",
"configuration": {
"query": {
@@ -3857,18 +2952,21 @@ def test_get_job_hit_w_timeout(self):
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection(ASYNC_QUERY_DATA)
+ job_from_resource = QueryJob.from_api_repr(ASYNC_QUERY_DATA, client)
- job = client.get_job(JOB_ID, timeout=7.5)
+ job = client.get_job(job_from_resource, timeout=7.5)
self.assertIsInstance(job, QueryJob)
self.assertEqual(job.job_id, JOB_ID)
+ self.assertEqual(job.project, "resource-proj")
+ self.assertEqual(job.location, "us-east1")
self.assertEqual(job.create_disposition, CreateDisposition.CREATE_IF_NEEDED)
self.assertEqual(job.write_disposition, WriteDisposition.WRITE_TRUNCATE)
conn.api_request.assert_called_once_with(
method="GET",
- path="/projects/PROJECT/jobs/query_job",
- query_params={"projection": "full"},
+ path="/projects/resource-proj/jobs/query_job",
+ query_params={"projection": "full", "location": "us-east1"},
timeout=7.5,
)
@@ -3888,7 +2986,7 @@ def test_cancel_job_miss_w_explict_project(self):
method="POST",
path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel",
query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_cancel_job_miss_w_client_location(self):
@@ -3907,7 +3005,7 @@ def test_cancel_job_miss_w_client_location(self):
method="POST",
path="/projects/OTHER_PROJECT/jobs/NONESUCH/cancel",
query_params={"projection": "full", "location": self.LOCATION},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_cancel_job_hit(self):
@@ -3917,7 +3015,11 @@ def test_cancel_job_hit(self):
QUERY = "SELECT * from test_dataset:test_table"
QUERY_JOB_RESOURCE = {
"id": "{}:{}".format(self.PROJECT, JOB_ID),
- "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"},
+ "jobReference": {
+ "projectId": "job-based-proj",
+ "jobId": "query_job",
+ "location": "asia-northeast1",
+ },
"state": "RUNNING",
"configuration": {"query": {"query": QUERY}},
}
@@ -3925,322 +3027,46 @@ def test_cancel_job_hit(self):
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
conn = client._connection = make_connection(RESOURCE)
+ job_from_resource = QueryJob.from_api_repr(QUERY_JOB_RESOURCE, client)
- job = client.cancel_job(JOB_ID)
+ job = client.cancel_job(job_from_resource)
self.assertIsInstance(job, QueryJob)
self.assertEqual(job.job_id, JOB_ID)
+ self.assertEqual(job.project, "job-based-proj")
+ self.assertEqual(job.location, "asia-northeast1")
self.assertEqual(job.query, QUERY)
conn.api_request.assert_called_once_with(
method="POST",
- path="/projects/PROJECT/jobs/query_job/cancel",
- query_params={"projection": "full"},
- timeout=None,
- )
-
- def test_cancel_job_w_timeout(self):
- JOB_ID = "query_job"
- QUERY = "SELECT * from test_dataset:test_table"
- QUERY_JOB_RESOURCE = {
- "id": "{}:{}".format(self.PROJECT, JOB_ID),
- "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"},
- "state": "RUNNING",
- "configuration": {"query": {"query": QUERY}},
- }
- RESOURCE = {"job": QUERY_JOB_RESOURCE}
-
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(RESOURCE)
-
- client.cancel_job(JOB_ID, timeout=7.5)
-
- conn.api_request.assert_called_once_with(
- method="POST",
- path="/projects/{}/jobs/query_job/cancel".format(self.PROJECT),
- query_params={"projection": "full"},
- timeout=7.5,
- )
-
- def test_list_jobs_defaults(self):
- from google.cloud.bigquery.job import CopyJob
- from google.cloud.bigquery.job import CreateDisposition
- from google.cloud.bigquery.job import ExtractJob
- from google.cloud.bigquery.job import LoadJob
- from google.cloud.bigquery.job import QueryJob
- from google.cloud.bigquery.job import WriteDisposition
-
- SOURCE_TABLE = "source_table"
- DESTINATION_TABLE = "destination_table"
- QUERY_DESTINATION_TABLE = "query_destination_table"
- SOURCE_URI = "gs://test_bucket/src_object*"
- DESTINATION_URI = "gs://test_bucket/dst_object*"
- JOB_TYPES = {
- "load_job": LoadJob,
- "copy_job": CopyJob,
- "extract_job": ExtractJob,
- "query_job": QueryJob,
- }
- PATH = "projects/%s/jobs" % self.PROJECT
- TOKEN = "TOKEN"
- QUERY = "SELECT * from test_dataset:test_table"
- ASYNC_QUERY_DATA = {
- "id": "%s:%s" % (self.PROJECT, "query_job"),
- "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"},
- "state": "DONE",
- "configuration": {
- "query": {
- "query": QUERY,
- "destinationTable": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": QUERY_DESTINATION_TABLE,
- },
- "createDisposition": CreateDisposition.CREATE_IF_NEEDED,
- "writeDisposition": WriteDisposition.WRITE_TRUNCATE,
- }
- },
- }
- EXTRACT_DATA = {
- "id": "%s:%s" % (self.PROJECT, "extract_job"),
- "jobReference": {"projectId": self.PROJECT, "jobId": "extract_job"},
- "state": "DONE",
- "configuration": {
- "extract": {
- "sourceTable": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": SOURCE_TABLE,
- },
- "destinationUris": [DESTINATION_URI],
- }
- },
- }
- COPY_DATA = {
- "id": "%s:%s" % (self.PROJECT, "copy_job"),
- "jobReference": {"projectId": self.PROJECT, "jobId": "copy_job"},
- "state": "DONE",
- "configuration": {
- "copy": {
- "sourceTables": [
- {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": SOURCE_TABLE,
- }
- ],
- "destinationTable": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": DESTINATION_TABLE,
- },
- }
- },
- }
- LOAD_DATA = {
- "id": "%s:%s" % (self.PROJECT, "load_job"),
- "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"},
- "state": "DONE",
- "configuration": {
- "load": {
- "destinationTable": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": SOURCE_TABLE,
- },
- "sourceUris": [SOURCE_URI],
- }
- },
- }
- DATA = {
- "nextPageToken": TOKEN,
- "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA],
- }
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_jobs()
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), len(DATA["jobs"]))
- for found, expected in zip(jobs, DATA["jobs"]):
- name = expected["jobReference"]["jobId"]
- self.assertIsInstance(found, JOB_TYPES[name])
- self.assertEqual(found.job_id, name)
- self.assertEqual(token, TOKEN)
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/%s" % PATH,
- query_params={"projection": "full"},
- timeout=None,
- )
-
- def test_list_jobs_load_job_wo_sourceUris(self):
- from google.cloud.bigquery.job import LoadJob
-
- SOURCE_TABLE = "source_table"
- JOB_TYPES = {"load_job": LoadJob}
- PATH = "projects/%s/jobs" % self.PROJECT
- TOKEN = "TOKEN"
- LOAD_DATA = {
- "id": "%s:%s" % (self.PROJECT, "load_job"),
- "jobReference": {"projectId": self.PROJECT, "jobId": "load_job"},
- "state": "DONE",
- "configuration": {
- "load": {
- "destinationTable": {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": SOURCE_TABLE,
- }
- }
- },
- }
- DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]}
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_jobs()
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), len(DATA["jobs"]))
- for found, expected in zip(jobs, DATA["jobs"]):
- name = expected["jobReference"]["jobId"]
- self.assertIsInstance(found, JOB_TYPES[name])
- self.assertEqual(found.job_id, name)
- self.assertEqual(token, TOKEN)
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/%s" % PATH,
- query_params={"projection": "full"},
- timeout=None,
- )
-
- def test_list_jobs_explicit_missing(self):
- PATH = "projects/%s/jobs" % self.PROJECT
- DATA = {}
- TOKEN = "TOKEN"
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection(DATA)
-
- iterator = client.list_jobs(
- max_results=1000, page_token=TOKEN, all_users=True, state_filter="done"
- )
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- page = next(iterator.pages)
-
- final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
- jobs = list(page)
- token = iterator.next_page_token
-
- self.assertEqual(len(jobs), 0)
- self.assertIsNone(token)
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/%s" % PATH,
- query_params={
- "projection": "full",
- "maxResults": 1000,
- "pageToken": TOKEN,
- "allUsers": True,
- "stateFilter": "done",
- },
- timeout=None,
- )
-
- def test_list_jobs_w_project(self):
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection({})
-
- list(client.list_jobs(project="other-project"))
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/other-project/jobs",
- query_params={"projection": "full"},
- timeout=None,
- )
-
- def test_list_jobs_w_timeout(self):
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection({})
-
- list(client.list_jobs(timeout=7.5))
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/{}/jobs".format(self.PROJECT),
- query_params={"projection": "full"},
- timeout=7.5,
- )
-
- def test_list_jobs_w_time_filter(self):
- creds = _make_credentials()
- client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection({})
-
- # One millisecond after the unix epoch.
- start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000)
- # One millisecond after the the 2038 31-bit signed int rollover
- end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000)
- end_time_millis = (((2 ** 31) - 1) * 1000) + 1
-
- list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time))
-
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/%s/jobs" % self.PROJECT,
- query_params={
- "projection": "full",
- "minCreationTime": "1",
- "maxCreationTime": str(end_time_millis),
- },
- timeout=None,
+ path="/projects/job-based-proj/jobs/query_job/cancel",
+ query_params={"projection": "full", "location": "asia-northeast1"},
+ timeout=DEFAULT_TIMEOUT,
)
- def test_list_jobs_w_parent_job_filter(self):
- from google.cloud.bigquery import job
+ def test_cancel_job_w_timeout(self):
+ JOB_ID = "query_job"
+ QUERY = "SELECT * from test_dataset:test_table"
+ QUERY_JOB_RESOURCE = {
+ "id": "{}:{}".format(self.PROJECT, JOB_ID),
+ "jobReference": {"projectId": self.PROJECT, "jobId": "query_job"},
+ "state": "RUNNING",
+ "configuration": {"query": {"query": QUERY}},
+ }
+ RESOURCE = {"job": QUERY_JOB_RESOURCE}
creds = _make_credentials()
client = self._make_one(self.PROJECT, creds)
- conn = client._connection = make_connection({}, {})
+ conn = client._connection = make_connection(RESOURCE)
- parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)]
+ client.cancel_job(JOB_ID, timeout=7.5)
- for parent_job in parent_job_args:
- list(client.list_jobs(parent_job=parent_job))
- conn.api_request.assert_called_once_with(
- method="GET",
- path="/projects/%s/jobs" % self.PROJECT,
- query_params={"projection": "full", "parentJobId": "parent-job-123"},
- timeout=None,
- )
- conn.api_request.reset_mock()
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/{}/jobs/query_job/cancel".format(self.PROJECT),
+ query_params={"projection": "full"},
+ timeout=7.5,
+ )
def test_load_table_from_uri(self):
from google.cloud.bigquery.job import LoadJob, LoadJobConfig
@@ -4341,7 +3167,7 @@ def test_load_table_from_uri_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_load_table_from_uri_w_client_location(self):
@@ -4385,7 +3211,7 @@ def test_load_table_from_uri_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_load_table_from_uri_w_invalid_job_config(self):
@@ -4425,7 +3251,7 @@ def _mock_transport(self, status_code, headers, content=b""):
fake_transport.request.return_value = fake_response
return fake_transport
- def _initiate_resumable_upload_helper(self, num_retries=None):
+ def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False):
from google.resumable_media.requests import ResumableUpload
from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE
from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE
@@ -4440,6 +3266,8 @@ def _initiate_resumable_upload_helper(self, num_retries=None):
fake_transport = self._mock_transport(http.client.OK, response_headers)
client = self._make_one(project=self.PROJECT, _http=fake_transport)
conn = client._connection = make_connection()
+ if mtls:
+ conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
# Create some mock arguments and call the method under test.
data = b"goodbye gudbi gootbee"
@@ -4454,8 +3282,10 @@ def _initiate_resumable_upload_helper(self, num_retries=None):
# Check the returned values.
self.assertIsInstance(upload, ResumableUpload)
+
+ host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
upload_url = (
- f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{self.PROJECT}"
+ f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}"
"/jobs?uploadType=resumable"
)
self.assertEqual(upload.upload_url, upload_url)
@@ -4494,11 +3324,14 @@ def _initiate_resumable_upload_helper(self, num_retries=None):
def test__initiate_resumable_upload(self):
self._initiate_resumable_upload_helper()
+ def test__initiate_resumable_upload_mtls(self):
+ self._initiate_resumable_upload_helper(mtls=True)
+
def test__initiate_resumable_upload_with_retry(self):
self._initiate_resumable_upload_helper(num_retries=11)
def _do_multipart_upload_success_helper(
- self, get_boundary, num_retries=None, project=None
+ self, get_boundary, num_retries=None, project=None, mtls=False
):
from google.cloud.bigquery.client import _get_upload_headers
from google.cloud.bigquery.job import LoadJob
@@ -4508,6 +3341,8 @@ def _do_multipart_upload_success_helper(
fake_transport = self._mock_transport(http.client.OK, {})
client = self._make_one(project=self.PROJECT, _http=fake_transport)
conn = client._connection = make_connection()
+ if mtls:
+ conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
if project is None:
project = self.PROJECT
@@ -4530,8 +3365,9 @@ def _do_multipart_upload_success_helper(
self.assertEqual(stream.tell(), size)
get_boundary.assert_called_once_with()
+ host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
upload_url = (
- f"https://bigquery.googleapis.com/upload/bigquery/v2/projects/{project}"
+ f"{host_name}/upload/bigquery/v2/projects/{project}"
"/jobs?uploadType=multipart"
)
payload = (
@@ -4556,6 +3392,10 @@ def _do_multipart_upload_success_helper(
def test__do_multipart_upload(self, get_boundary):
self._do_multipart_upload_success_helper(get_boundary)
+ @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
+ def test__do_multipart_upload_mtls(self, get_boundary):
+ self._do_multipart_upload_success_helper(get_boundary, mtls=True)
+
@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
def test__do_multipart_upload_with_retry(self, get_boundary):
self._do_multipart_upload_success_helper(get_boundary, num_retries=8)
@@ -4659,7 +3499,7 @@ def test_copy_table_w_multiple_sources(self):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=expected_resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIsInstance(job, CopyJob)
self.assertIs(job._client, client)
@@ -4721,7 +3561,7 @@ def test_copy_table_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_copy_table_w_client_location(self):
@@ -4771,7 +3611,7 @@ def test_copy_table_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_copy_table_w_source_strings(self):
@@ -4864,7 +3704,7 @@ def test_copy_table_w_valid_job_config(self):
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
data=RESOURCE,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertIsInstance(job._configuration, CopyJobConfig)
@@ -4970,7 +3810,7 @@ def test_extract_table_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_extract_table_w_client_location(self):
@@ -5014,7 +3854,7 @@ def test_extract_table_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_extract_table_generated_job_id(self):
@@ -5057,7 +3897,7 @@ def test_extract_table_generated_job_id(self):
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
self.assertIsInstance(req["data"]["jobReference"]["jobId"], str)
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
# Check the job resource.
self.assertIsInstance(job, ExtractJob)
@@ -5102,7 +3942,7 @@ def test_extract_table_w_destination_uris(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
# Check the job resource.
self.assertIsInstance(job, ExtractJob)
@@ -5272,7 +4112,7 @@ def test_query_defaults(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertIsInstance(sent["jobReference"]["jobId"], str)
sent_config = sent["configuration"]["query"]
@@ -5325,7 +4165,7 @@ def test_query_w_explicit_project(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_explicit_job_config(self):
@@ -5381,7 +4221,10 @@ def test_query_w_explicit_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original config object should not have been modified
@@ -5425,7 +4268,10 @@ def test_query_preserving_explicit_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original config object should not have been modified
@@ -5477,7 +4323,10 @@ def test_query_preserving_explicit_default_job_config(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
# the original default config object should not have been modified
@@ -5562,7 +4411,10 @@ def test_query_w_explicit_job_config_override(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_client_default_config_no_incoming(self):
@@ -5603,7 +4455,10 @@ def test_query_w_client_default_config_no_incoming(self):
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
- method="POST", path="/projects/PROJECT/jobs", data=resource, timeout=None
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data=resource,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_w_invalid_default_job_config(self):
@@ -5648,7 +4503,7 @@ def test_query_w_client_location(self):
method="POST",
path="/projects/other-project/jobs",
data=resource,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_query_detect_location(self):
@@ -5719,7 +4574,7 @@ def test_query_w_udf_resources(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertIsInstance(sent["jobReference"]["jobId"], str)
sent_config = sent["configuration"]["query"]
@@ -5775,7 +4630,7 @@ def test_query_w_query_parameters(self):
_, req = conn.api_request.call_args
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/projects/PROJECT/jobs")
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
sent = req["data"]
self.assertEqual(sent["jobReference"]["jobId"], JOB)
sent_config = sent["configuration"]["query"]
@@ -5790,6 +4645,81 @@ def test_query_w_query_parameters(self):
},
)
+ def test_query_job_rpc_fail_w_random_error(self):
+ from google.api_core.exceptions import Unknown
+ from google.cloud.bigquery.job import QueryJob
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ job_create_error = Unknown("Not sure what went wrong.")
+ job_begin_patcher = mock.patch.object(
+ QueryJob, "_begin", side_effect=job_create_error
+ )
+ with job_begin_patcher:
+ with pytest.raises(Unknown, match="Not sure what went wrong."):
+ client.query("SELECT 1;", job_id="123")
+
+ def test_query_job_rpc_fail_w_conflict_job_id_given(self):
+ from google.api_core.exceptions import Conflict
+ from google.cloud.bigquery.job import QueryJob
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ job_create_error = Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ QueryJob, "_begin", side_effect=job_create_error
+ )
+ with job_begin_patcher:
+ with pytest.raises(Conflict, match="Job already exists."):
+ client.query("SELECT 1;", job_id="123")
+
+ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self):
+ from google.api_core.exceptions import Conflict
+ from google.api_core.exceptions import DataLoss
+ from google.cloud.bigquery.job import QueryJob
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ job_create_error = Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ QueryJob, "_begin", side_effect=job_create_error
+ )
+ get_job_patcher = mock.patch.object(
+ client, "get_job", side_effect=DataLoss("we lost yor job, sorry")
+ )
+
+ with job_begin_patcher, get_job_patcher:
+ # If get job request fails, the original exception should be raised.
+ with pytest.raises(Conflict, match="Job already exists."):
+ client.query("SELECT 1;", job_id=None)
+
+ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self):
+ from google.api_core.exceptions import Conflict
+ from google.cloud.bigquery.job import QueryJob
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ job_create_error = Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ QueryJob, "_begin", side_effect=job_create_error
+ )
+ get_job_patcher = mock.patch.object(
+ client, "get_job", return_value=mock.sentinel.query_job
+ )
+
+ with job_begin_patcher, get_job_patcher:
+ result = client.query("SELECT 1;", job_id=None)
+
+ assert result is mock.sentinel.query_job
+
def test_insert_rows_w_timeout(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -5892,7 +4822,7 @@ def _row_data(row):
self.assertEqual(req["method"], "POST")
self.assertEqual(req["path"], "/%s" % PATH)
self.assertEqual(req["data"], SENT)
- self.assertIsNone(req["timeout"])
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
def test_insert_rows_w_list_of_dictionaries(self):
import datetime
@@ -5960,7 +4890,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_list_of_Rows(self):
@@ -6005,7 +4935,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_skip_invalid_and_ignore_unknown(self):
@@ -6082,7 +5012,7 @@ def _row_data(row):
errors[0]["errors"][0], RESPONSE["insertErrors"][0]["errors"][0]
)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_repeated_fields(self):
@@ -6115,16 +5045,24 @@ def test_insert_rows_w_repeated_fields(self):
(
12,
[
- datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=pytz.utc),
- datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=pytz.utc),
+ datetime.datetime(
+ 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc
+ ),
+ datetime.datetime(
+ 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc
+ ),
],
[1.25, 2.5],
),
{
"score": 13,
"times": [
- datetime.datetime(2018, 12, 2, 12, 0, 0, tzinfo=pytz.utc),
- datetime.datetime(2018, 12, 2, 13, 0, 0, tzinfo=pytz.utc),
+ datetime.datetime(
+ 2018, 12, 2, 12, 0, 0, tzinfo=datetime.timezone.utc
+ ),
+ datetime.datetime(
+ 2018, 12, 2, 13, 0, 0, tzinfo=datetime.timezone.utc
+ ),
],
"distances": [-1.25, -2.5],
},
@@ -6175,7 +5113,7 @@ def test_insert_rows_w_repeated_fields(self):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None,
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_w_record_schema(self):
@@ -6241,7 +5179,7 @@ def test_insert_rows_w_record_schema(self):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/%s" % PATH, data=SENT, timeout=None
+ method="POST", path="/%s" % PATH, data=SENT, timeout=DEFAULT_TIMEOUT
)
def test_insert_rows_w_explicit_none_insert_ids(self):
@@ -6275,7 +5213,7 @@ def _row_data(row):
self.assertEqual(len(errors), 0)
conn.api_request.assert_called_once_with(
- method="POST", path="/{}".format(PATH), data=SENT, timeout=None,
+ method="POST", path="/{}".format(PATH), data=SENT, timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_errors(self):
@@ -6359,7 +5297,7 @@ def test_insert_rows_w_numeric(self):
project, ds_id, table_id
),
data=sent,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(pandas is None, "Requires `pandas`")
@@ -6551,7 +5489,10 @@ def test_insert_rows_from_dataframe_many_columns(self):
]
}
expected_call = mock.call(
- method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None
+ method="POST",
+ path=API_PATH,
+ data=EXPECTED_SENT_DATA,
+ timeout=DEFAULT_TIMEOUT,
)
actual_calls = conn.api_request.call_args_list
@@ -6604,10 +5545,13 @@ def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
actual_calls = conn.api_request.call_args_list
assert len(actual_calls) == 1
assert actual_calls[0] == mock.call(
- method="POST", path=API_PATH, data=EXPECTED_SENT_DATA, timeout=None
+ method="POST",
+ path=API_PATH,
+ data=EXPECTED_SENT_DATA,
+ timeout=DEFAULT_TIMEOUT,
)
- def test_insert_rows_json(self):
+ def test_insert_rows_json_default_behavior(self):
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -6654,8 +5598,10 @@ def test_insert_rows_json(self):
method="POST", path="/%s" % PATH, data=SENT, timeout=7.5,
)
- def test_insert_rows_json_with_string_id(self):
- rows = [{"col1": "val1"}]
+ def test_insert_rows_json_w_explicitly_requested_autogenerated_insert_ids(self):
+ from google.cloud.bigquery import AutoRowIDs
+
+ rows = [{"col1": "val1"}, {"col2": "val2"}]
creds = _make_credentials()
http = object()
client = self._make_one(
@@ -6663,19 +5609,115 @@ def test_insert_rows_json_with_string_id(self):
)
conn = client._connection = make_connection({})
- with mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows)))):
- errors = client.insert_rows_json("proj.dset.tbl", rows)
+ uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows))))
+ with uuid_patcher:
+ errors = client.insert_rows_json(
+ "proj.dset.tbl", rows, row_ids=AutoRowIDs.GENERATE_UUID
+ )
self.assertEqual(len(errors), 0)
- expected = {
- "rows": [{"json": row, "insertId": str(i)} for i, row in enumerate(rows)]
+
+ # Check row data sent to the backend.
+ expected_row_data = {
+ "rows": [
+ {"json": {"col1": "val1"}, "insertId": "0"},
+ {"json": {"col2": "val2"}, "insertId": "1"},
+ ]
}
conn.api_request.assert_called_once_with(
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
- data=expected,
- timeout=None,
+ data=expected_row_data,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_insert_rows_json_w_explicitly_disabled_insert_ids(self):
+ from google.cloud.bigquery import AutoRowIDs
+
+ rows = [{"col1": "val1"}, {"col2": "val2"}]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project="default-project", credentials=creds, _http=http
+ )
+ conn = client._connection = make_connection({})
+
+ errors = client.insert_rows_json(
+ "proj.dset.tbl", rows, row_ids=AutoRowIDs.DISABLED,
+ )
+
+ self.assertEqual(len(errors), 0)
+
+ expected_row_data = {
+ "rows": [
+ {"json": {"col1": "val1"}, "insertId": None},
+ {"json": {"col2": "val2"}, "insertId": None},
+ ]
+ }
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/proj/datasets/dset/tables/tbl/insertAll",
+ data=expected_row_data,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_insert_rows_json_with_iterator_row_ids(self):
+ rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project="default-project", credentials=creds, _http=http
+ )
+ conn = client._connection = make_connection({})
+
+ row_ids_iter = map(str, itertools.count(42))
+ errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=row_ids_iter)
+
+ self.assertEqual(len(errors), 0)
+ expected_row_data = {
+ "rows": [
+ {"json": {"col1": "val1"}, "insertId": "42"},
+ {"json": {"col2": "val2"}, "insertId": "43"},
+ {"json": {"col3": "val3"}, "insertId": "44"},
+ ]
+ }
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/proj/datasets/dset/tables/tbl/insertAll",
+ data=expected_row_data,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_insert_rows_json_with_non_iterable_row_ids(self):
+ rows = [{"col1": "val1"}]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project="default-project", credentials=creds, _http=http
+ )
+ client._connection = make_connection({})
+
+ with self.assertRaises(TypeError) as exc:
+ client.insert_rows_json("proj.dset.tbl", rows, row_ids=object())
+
+ err_msg = str(exc.exception)
+ self.assertIn("row_ids", err_msg)
+ self.assertIn("iterable", err_msg)
+
+ def test_insert_rows_json_with_too_few_row_ids(self):
+ rows = [{"col1": "val1"}, {"col2": "val2"}, {"col3": "val3"}]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project="default-project", credentials=creds, _http=http
)
+ client._connection = make_connection({})
+
+ insert_ids = ["10", "20"]
+
+ error_msg_pattern = "row_ids did not generate enough IDs.*index 2"
+ with self.assertRaisesRegex(ValueError, error_msg_pattern):
+ client.insert_rows_json("proj.dset.tbl", rows, row_ids=insert_ids)
def test_insert_rows_json_w_explicit_none_insert_ids(self):
rows = [{"col1": "val1"}, {"col2": "val2"}]
@@ -6696,7 +5738,46 @@ def test_insert_rows_json_w_explicit_none_insert_ids(self):
method="POST",
path="/projects/proj/datasets/dset/tables/tbl/insertAll",
data=expected,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_insert_rows_json_w_none_insert_ids_sequence(self):
+ rows = [{"col1": "val1"}, {"col2": "val2"}]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project="default-project", credentials=creds, _http=http
+ )
+ conn = client._connection = make_connection({})
+
+ uuid_patcher = mock.patch("uuid.uuid4", side_effect=map(str, range(len(rows))))
+ with warnings.catch_warnings(record=True) as warned, uuid_patcher:
+ errors = client.insert_rows_json("proj.dset.tbl", rows, row_ids=None)
+
+ self.assertEqual(len(errors), 0)
+
+ # Passing row_ids=None should have resulted in a deprecation warning.
+ matches = [
+ warning
+ for warning in warned
+ if issubclass(warning.category, DeprecationWarning)
+ and "row_ids" in str(warning)
+ and "AutoRowIDs.GENERATE_UUID" in str(warning)
+ ]
+ assert matches, "The expected deprecation warning was not raised."
+
+ # Check row data sent to the backend.
+ expected_row_data = {
+ "rows": [
+ {"json": {"col1": "val1"}, "insertId": "0"},
+ {"json": {"col2": "val2"}, "insertId": "1"},
+ ]
+ }
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/proj/datasets/dset/tables/tbl/insertAll",
+ data=expected_row_data,
+ timeout=DEFAULT_TIMEOUT,
)
def test_insert_rows_w_wrong_arg(self):
@@ -6891,7 +5972,7 @@ def test_list_rows_w_start_index_w_page_size(self):
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
mock.call(
method="GET",
@@ -6901,7 +5982,7 @@ def test_list_rows_w_start_index_w_page_size(self):
"maxResults": 2,
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
),
]
)
@@ -7052,7 +6133,7 @@ def test_list_rows_repeated_fields(self):
"selectedFields": "color,struct",
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_list_rows_w_record_schema(self):
@@ -7122,7 +6203,7 @@ def test_list_rows_w_record_schema(self):
method="GET",
path="/%s" % PATH,
query_params={"formatOptions.useInt64Timestamp": True},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
def test_list_rows_with_missing_schema(self):
@@ -7177,7 +6258,7 @@ def test_list_rows_with_missing_schema(self):
row_iter = client.list_rows(table)
conn.api_request.assert_called_once_with(
- method="GET", path=table_path, timeout=None
+ method="GET", path=table_path, timeout=DEFAULT_TIMEOUT
)
conn.api_request.reset_mock()
self.assertEqual(row_iter.total_rows, 2, msg=repr(table))
@@ -7187,7 +6268,7 @@ def test_list_rows_with_missing_schema(self):
method="GET",
path=tabledata_path,
query_params={"formatOptions.useInt64Timestamp": True},
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
self.assertEqual(row_iter.total_rows, 3, msg=repr(table))
self.assertEqual(rows[0].name, "Phred Phlyntstone", msg=repr(table))
@@ -7203,6 +6284,28 @@ def test_list_rows_error(self):
with self.assertRaises(TypeError):
client.list_rows(1)
+ def test_context_manager_enter_returns_itself(self):
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ with mock.patch.object(client, "close"), client as context_var:
+ pass
+
+ self.assertIs(client, context_var)
+
+ def test_context_manager_exit_closes_client(self):
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ fake_close = mock.Mock()
+ with mock.patch.object(client, "close", fake_close):
+ with client:
+ pass
+
+ fake_close.assert_called_once()
+
class Test_make_job_id(unittest.TestCase):
def _call_fut(self, job_id, prefix=None):
@@ -7338,7 +6441,7 @@ def test_load_table_from_file_resumable(self):
file_obj,
self.EXPECTED_CONFIGURATION,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -7371,7 +6474,7 @@ def test_load_table_from_file_w_explicit_project(self):
file_obj,
expected_resource,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project="other-project",
)
@@ -7405,7 +6508,7 @@ def test_load_table_from_file_w_client_location(self):
file_obj,
expected_resource,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project="other-project",
)
@@ -7467,7 +6570,7 @@ def test_load_table_from_file_resumable_metadata(self):
file_obj,
expected_config,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -7496,7 +6599,7 @@ def test_load_table_from_file_multipart(self):
self.EXPECTED_CONFIGURATION,
file_obj_size,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.PROJECT,
)
@@ -7521,7 +6624,7 @@ def test_load_table_from_file_with_retries(self):
file_obj,
self.EXPECTED_CONFIGURATION,
num_retries,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -7558,7 +6661,7 @@ def test_load_table_from_file_with_readable_gzip(self):
gzip_file,
self.EXPECTED_CONFIGURATION,
_DEFAULT_NUM_RETRIES,
- None,
+ DEFAULT_TIMEOUT,
project=self.EXPECTED_CONFIGURATION["jobReference"]["projectId"],
)
@@ -7621,18 +6724,47 @@ def test_load_table_from_file_w_invalid_job_config(self):
def test_load_table_from_dataframe(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
- from google.cloud.bigquery.schema import SchemaField
+ from google.cloud.bigquery.schema import PolicyTagList, SchemaField
client = self._make_client()
- records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
- dataframe = pandas.DataFrame(records)
+ records = [
+ {"id": 1, "age": 100, "accounts": [2, 3]},
+ {"id": 2, "age": 60, "accounts": [5]},
+ {"id": 3, "age": 40, "accounts": []},
+ ]
+ # Mixup column order so that we can verify sent schema matches the
+ # serialized order, not the table column order.
+ column_order = ["age", "accounts", "id"]
+ dataframe = pandas.DataFrame(records, columns=column_order)
+ table_fields = {
+ "id": SchemaField(
+ "id",
+ "INTEGER",
+ mode="REQUIRED",
+ description="integer column",
+ policy_tags=PolicyTagList(names=("foo", "bar")),
+ ),
+ "age": SchemaField(
+ "age",
+ "INTEGER",
+ mode="NULLABLE",
+ description="age column",
+ policy_tags=PolicyTagList(names=("baz",)),
+ ),
+ "accounts": SchemaField(
+ "accounts", "INTEGER", mode="REPEATED", description="array column",
+ ),
+ }
+ get_table_schema = [
+ table_fields["id"],
+ table_fields["age"],
+ table_fields["accounts"],
+ ]
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
- return_value=mock.Mock(
- schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
- ),
+ return_value=mock.Mock(schema=get_table_schema),
)
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
@@ -7652,14 +6784,27 @@ def test_load_table_from_dataframe(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
assert sent_file.closed
- sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
- assert sent_config.source_format == job.SourceFormat.PARQUET
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"].to_api_repr()[
+ "load"
+ ]
+ assert sent_config["sourceFormat"] == job.SourceFormat.PARQUET
+ for field_index, field in enumerate(sent_config["schema"]["fields"]):
+ assert field["name"] == column_order[field_index]
+ table_field = table_fields[field["name"]]
+ assert field["name"] == table_field.name
+ assert field["type"] == table_field.field_type
+ assert field["mode"] == table_field.mode
+ assert len(field.get("fields", [])) == len(table_field.fields)
+ assert field["policyTags"]["names"] == []
+ # Omit unnecessary fields when they come from getting the table
+ # (not passed in via job_config)
+ assert "description" not in field
@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
@@ -7697,7 +6842,7 @@ def test_load_table_from_dataframe_w_client_location(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
@@ -7751,7 +6896,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7807,7 +6952,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7870,7 +7015,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
datetime.datetime(2012, 3, 14, 15, 16),
],
dtype="datetime64[ns]",
- ).dt.tz_localize(pytz.utc),
+ ).dt.tz_localize(datetime.timezone.utc),
),
]
)
@@ -7901,7 +7046,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -7962,7 +7107,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8009,7 +7154,7 @@ def test_load_table_from_dataframe_unknown_table(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
@unittest.skipIf(
@@ -8051,7 +7196,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8099,7 +7244,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8161,7 +7306,7 @@ def test_load_table_from_dataframe_struct_fields(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8202,7 +7347,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
datetime.datetime(2012, 3, 14, 15, 16),
],
dtype="datetime64[ns]",
- ).dt.tz_localize(pytz.utc),
+ ).dt.tz_localize(datetime.timezone.utc),
),
("string_col", ["abc", None, "def"]),
("bytes_col", [b"abc", b"def", None]),
@@ -8236,7 +7381,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8331,7 +7476,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
assert warned # there should be at least one warning
@@ -8407,6 +7552,42 @@ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self):
parquet_compression="gzip",
)
+ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self):
+ pytest.importorskip("pandas", reason="Requires `pandas`")
+ pytest.importorskip("pyarrow", reason="Requires `pyarrow`")
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ pyarrow_version_patch = mock.patch(
+ "google.cloud.bigquery.client._PYARROW_VERSION",
+ packaging.version.parse("2.0.0"), # A known bad version of pyarrow.
+ )
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=google.api_core.exceptions.NotFound("Table not found"),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ with load_patch, get_table_patch, pyarrow_version_patch:
+ with warnings.catch_warnings(record=True) as warned:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, location=self.LOCATION,
+ )
+
+ expected_warnings = [
+ warning for warning in warned if "pyarrow" in str(warning).lower()
+ ]
+ assert len(expected_warnings) == 1
+ assert issubclass(expected_warnings[0].category, RuntimeWarning)
+ msg = str(expected_warnings[0].message)
+ assert "pyarrow 2.0.0" in msg
+ assert "data corruption" in msg
+
@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_load_table_from_dataframe_w_nulls(self):
@@ -8445,7 +7626,7 @@ def test_load_table_from_dataframe_w_nulls(self):
location=self.LOCATION,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8511,7 +7692,7 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
location=None,
project=None,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_file = load_table_from_file.mock_calls[0][1][1]
@@ -8549,7 +7730,7 @@ def test_load_table_from_json_basic_use(self):
location=client.location,
project=client.project,
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8602,7 +7783,7 @@ def test_load_table_from_json_non_default_args(self):
location="EU",
project="project-x",
job_config=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
@@ -8635,6 +7816,42 @@ def test_load_table_from_json_w_invalid_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg
+ def test_load_table_from_json_unicode_emoji_data_case(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+
+ client = self._make_client()
+
+ emoji = "\U0001F3E6"
+ json_row = {"emoji": emoji}
+ json_rows = [json_row]
+
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ with load_patch as load_table_from_file:
+ client.load_table_from_json(json_rows, self.TABLE_REF)
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=client.location,
+ project=client.project,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_data_file = load_table_from_file.mock_calls[0][1][1]
+
+ # make sure json_row's unicode characters are only encoded one time
+ expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}'
+ assert sent_data_file.getvalue() == expected_bytes
+
# Low-level tests
@classmethod
@@ -8858,18 +8075,21 @@ def test_schema_to_json_with_file_path(self):
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
+ "policyTags": {"names": []},
"type": "STRING",
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
+ "policyTags": {"names": []},
"type": "STRING",
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
+ "policyTags": {"names": []},
"type": "FLOAT",
},
]
@@ -8902,18 +8122,21 @@ def test_schema_to_json_with_file_object(self):
"description": "quarter",
"mode": "REQUIRED",
"name": "qtr",
+ "policyTags": {"names": []},
"type": "STRING",
},
{
"description": "sales representative",
"mode": "NULLABLE",
"name": "rep",
+ "policyTags": {"names": []},
"type": "STRING",
},
{
"description": "total sales",
"mode": "NULLABLE",
"name": "sales",
+ "policyTags": {"names": []},
"type": "FLOAT",
},
]
@@ -8930,3 +8153,23 @@ def test_schema_to_json_with_file_object(self):
client.schema_to_json(schema_list, fake_file)
assert file_content == json.loads(fake_file.getvalue())
+
+
+def test_upload_chunksize(client):
+ with mock.patch("google.cloud.bigquery.client.ResumableUpload") as RU:
+ upload = RU.return_value
+
+ upload.finished = False
+
+ def transmit_next_chunk(transport):
+ upload.finished = True
+ result = mock.MagicMock()
+ result.json.return_value = {}
+ return result
+
+ upload.transmit_next_chunk = transmit_next_chunk
+ f = io.BytesIO()
+ client.load_table_from_file(f, "foo.bar")
+
+ chunk_size = RU.call_args_list[0][0][1]
+ assert chunk_size == 100 * (1 << 20)
diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py
new file mode 100644
index 000000000..67b21225d
--- /dev/null
+++ b/tests/unit/test_create_dataset.py
@@ -0,0 +1,364 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud.bigquery.dataset import Dataset, DatasetReference
+from .helpers import make_connection, dataset_polymorphic, make_client
+import google.cloud.bigquery.dataset
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+import mock
+import pytest
+
+
+@dataset_polymorphic
+def test_create_dataset_minimal(make_dataset, get_reference, client, PROJECT, DS_ID):
+ PATH = "projects/%s/datasets" % PROJECT
+ RESOURCE = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ }
+ conn = client._connection = make_connection(RESOURCE)
+
+ dataset = make_dataset(PROJECT, DS_ID)
+ after = client.create_dataset(dataset, timeout=7.5)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.etag == RESOURCE["etag"]
+ assert after.full_dataset_id == RESOURCE["id"]
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/%s" % PATH,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ },
+ timeout=7.5,
+ )
+
+
+def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
+ from google.cloud.bigquery.dataset import AccessEntry
+
+ PATH = "projects/%s/datasets" % PROJECT
+ DESCRIPTION = "DESC"
+ FRIENDLY_NAME = "FN"
+ LOCATION = "US"
+ USER_EMAIL = "phred@example.com"
+ LABELS = {"color": "red"}
+ VIEW = {
+ "projectId": "my-proj",
+ "datasetId": "starry-skies",
+ "tableId": "northern-hemisphere",
+ }
+ RESOURCE = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "description": DESCRIPTION,
+ "friendlyName": FRIENDLY_NAME,
+ "location": LOCATION,
+ "defaultTableExpirationMs": "3600",
+ "labels": LABELS,
+ "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
+ }
+ conn = client._connection = make_connection(RESOURCE)
+ entries = [
+ AccessEntry("OWNER", "userByEmail", USER_EMAIL),
+ AccessEntry(None, "view", VIEW),
+ ]
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before.access_entries = entries
+ before.description = DESCRIPTION
+ before.friendly_name = FRIENDLY_NAME
+ before.default_table_expiration_ms = 3600
+ before.location = LOCATION
+ before.labels = LABELS
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.etag == RESOURCE["etag"]
+ assert after.full_dataset_id == RESOURCE["id"]
+ assert after.description == DESCRIPTION
+ assert after.friendly_name == FRIENDLY_NAME
+ assert after.location == LOCATION
+ assert after.default_table_expiration_ms == 3600
+ assert after.labels == LABELS
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/%s" % PATH,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "description": DESCRIPTION,
+ "friendlyName": FRIENDLY_NAME,
+ "location": LOCATION,
+ "defaultTableExpirationMs": "3600",
+ "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
+ "labels": LABELS,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_custom_property(client, PROJECT, DS_ID):
+ # The library should handle sending properties to the API that are not
+ # yet part of the library
+
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "newAlphaProperty": "unreleased property",
+ }
+ conn = client._connection = make_connection(resource)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before._properties["newAlphaProperty"] = "unreleased property"
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after._properties["newAlphaProperty"] == "unreleased property"
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "newAlphaProperty": "unreleased property",
+ "labels": {},
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_client_location_wo_dataset_location(PROJECT, DS_ID, LOCATION):
+ PATH = "projects/%s/datasets" % PROJECT
+ RESOURCE = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(RESOURCE)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.etag == RESOURCE["etag"]
+ assert after.full_dataset_id == RESOURCE["id"]
+ assert after.location == LOCATION
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/%s" % PATH,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_client_location_w_dataset_location(PROJECT, DS_ID, LOCATION):
+ PATH = "projects/%s/datasets" % PROJECT
+ OTHER_LOCATION = "EU"
+ RESOURCE = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "location": OTHER_LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(RESOURCE)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before.location = OTHER_LOCATION
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.etag == RESOURCE["etag"]
+ assert after.full_dataset_id == RESOURCE["id"]
+ assert after.location == OTHER_LOCATION
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/%s" % PATH,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": OTHER_LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_reference(PROJECT, DS_ID, LOCATION):
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+ dataset = client.create_dataset(DatasetReference(PROJECT, DS_ID))
+
+ assert dataset.dataset_id == DS_ID
+ assert dataset.project == PROJECT
+ assert dataset.etag == resource["etag"]
+ assert dataset.full_dataset_id == resource["id"]
+ assert dataset.location == LOCATION
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_fully_qualified_string(PROJECT, DS_ID, LOCATION):
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+ dataset = client.create_dataset("{}.{}".format(PROJECT, DS_ID))
+
+ assert dataset.dataset_id == DS_ID
+ assert dataset.project == PROJECT
+ assert dataset.etag == resource["etag"]
+ assert dataset.full_dataset_id == resource["id"]
+ assert dataset.location == LOCATION
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_w_string(PROJECT, DS_ID, LOCATION):
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "%s:%s" % (PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ dataset = client.create_dataset(DS_ID)
+
+ final_attributes.assert_called_once_with({"path": path}, client, None)
+
+ assert dataset.dataset_id == DS_ID
+ assert dataset.project == PROJECT
+ assert dataset.etag == resource["etag"]
+ assert dataset.full_dataset_id == resource["id"]
+ assert dataset.location == LOCATION
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_alreadyexists_w_exists_ok_false(PROJECT, DS_ID, LOCATION):
+ client = make_client(location=LOCATION)
+ client._connection = make_connection(
+ google.api_core.exceptions.AlreadyExists("dataset already exists")
+ )
+
+ with pytest.raises(google.api_core.exceptions.AlreadyExists):
+ client.create_dataset(DS_ID)
+
+
+def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION):
+ post_path = "/projects/{}/datasets".format(PROJECT)
+ get_path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID)
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "{}:{}".format(PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.AlreadyExists("dataset already exists"), resource
+ )
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ dataset = client.create_dataset(DS_ID, exists_ok=True)
+
+ final_attributes.assert_called_with({"path": get_path}, client, None)
+
+ assert dataset.dataset_id == DS_ID
+ assert dataset.project == PROJECT
+ assert dataset.etag == resource["etag"]
+ assert dataset.full_dataset_id == resource["id"]
+ assert dataset.location == LOCATION
+
+ conn.api_request.assert_has_calls(
+ [
+ mock.call(
+ method="POST",
+ path=post_path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ ),
+ mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
+ ]
+ )
diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py
index fffa46aa8..b33203354 100644
--- a/tests/unit/test_dbapi__helpers.py
+++ b/tests/unit/test_dbapi__helpers.py
@@ -16,15 +16,18 @@
import decimal
import math
import operator as op
+import re
import unittest
+import pytest
+
try:
import pyarrow
except ImportError: # pragma: NO COVER
pyarrow = None
import google.cloud._helpers
-from google.cloud.bigquery import table
+from google.cloud.bigquery import table, enums
from google.cloud.bigquery.dbapi import _helpers
from google.cloud.bigquery.dbapi import exceptions
from tests.unit.helpers import _to_pyarrow
@@ -38,9 +41,8 @@ def test_scalar_to_query_parameter(self):
(123, "INT64"),
(-123456789, "INT64"),
(1.25, "FLOAT64"),
- (decimal.Decimal("1.25"), "NUMERIC"),
(b"I am some bytes", "BYTES"),
- (u"I am a string", "STRING"),
+ ("I am a string", "STRING"),
(datetime.date(2017, 4, 1), "DATE"),
(datetime.time(12, 34, 56), "TIME"),
(datetime.datetime(2012, 3, 4, 5, 6, 7), "DATETIME"),
@@ -50,7 +52,18 @@ def test_scalar_to_query_parameter(self):
),
"TIMESTAMP",
),
+ (decimal.Decimal("1.25"), "NUMERIC"),
+ (decimal.Decimal("9.9999999999999999999999999999999999999E+28"), "NUMERIC"),
+ (decimal.Decimal("1.0E+29"), "BIGNUMERIC"), # more than max NUMERIC value
+ (decimal.Decimal("1.123456789"), "NUMERIC"),
+ (decimal.Decimal("1.1234567891"), "BIGNUMERIC"), # scale > 9
+ (decimal.Decimal("12345678901234567890123456789.012345678"), "NUMERIC"),
+ (
+ decimal.Decimal("12345678901234567890123456789012345678"),
+ "BIGNUMERIC", # larger than max NUMERIC value, despite precision <=38
+ ),
]
+
for value, expected_type in expected_types:
msg = "value: {} expected_type: {}".format(value, expected_type)
parameter = _helpers.scalar_to_query_parameter(value)
@@ -80,8 +93,9 @@ def test_array_to_query_parameter_valid_argument(self):
([123, -456, 0], "INT64"),
([1.25, 2.50], "FLOAT64"),
([decimal.Decimal("1.25")], "NUMERIC"),
+ ([decimal.Decimal("{d38}.{d38}".format(d38="9" * 38))], "BIGNUMERIC"),
([b"foo", b"bar"], "BYTES"),
- ([u"foo", u"bar"], "STRING"),
+ (["foo", "bar"], "STRING"),
([datetime.date(2017, 4, 1), datetime.date(2018, 4, 1)], "DATE"),
([datetime.time(12, 34, 56), datetime.time(10, 20, 30)], "TIME"),
(
@@ -120,7 +134,7 @@ def test_array_to_query_parameter_empty_argument(self):
_helpers.array_to_query_parameter([])
def test_array_to_query_parameter_unsupported_sequence(self):
- unsupported_iterables = [{10, 20, 30}, u"foo", b"bar", bytearray([65, 75, 85])]
+ unsupported_iterables = [{10, 20, 30}, "foo", b"bar", bytearray([65, 75, 85])]
for iterable in unsupported_iterables:
with self.assertRaises(exceptions.ProgrammingError):
_helpers.array_to_query_parameter(iterable)
@@ -130,8 +144,8 @@ def test_array_to_query_parameter_sequence_w_invalid_elements(self):
_helpers.array_to_query_parameter([object(), 2, 7])
def test_to_query_parameters_w_dict(self):
- parameters = {"somebool": True, "somestring": u"a-string-value"}
- query_parameters = _helpers.to_query_parameters(parameters)
+ parameters = {"somebool": True, "somestring": "a-string-value"}
+ query_parameters = _helpers.to_query_parameters(parameters, {})
query_parameter_tuples = []
for param in query_parameters:
query_parameter_tuples.append((param.name, param.type_, param.value))
@@ -140,14 +154,14 @@ def test_to_query_parameters_w_dict(self):
sorted(
[
("somebool", "BOOL", True),
- ("somestring", "STRING", u"a-string-value"),
+ ("somestring", "STRING", "a-string-value"),
]
),
)
def test_to_query_parameters_w_dict_array_param(self):
parameters = {"somelist": [10, 20]}
- query_parameters = _helpers.to_query_parameters(parameters)
+ query_parameters = _helpers.to_query_parameters(parameters, {})
self.assertEqual(len(query_parameters), 1)
param = query_parameters[0]
@@ -160,22 +174,22 @@ def test_to_query_parameters_w_dict_dict_param(self):
parameters = {"my_param": {"foo": "bar"}}
with self.assertRaises(NotImplementedError):
- _helpers.to_query_parameters(parameters)
+ _helpers.to_query_parameters(parameters, {})
def test_to_query_parameters_w_list(self):
- parameters = [True, u"a-string-value"]
- query_parameters = _helpers.to_query_parameters(parameters)
+ parameters = [True, "a-string-value"]
+ query_parameters = _helpers.to_query_parameters(parameters, [None, None])
query_parameter_tuples = []
for param in query_parameters:
query_parameter_tuples.append((param.name, param.type_, param.value))
self.assertSequenceEqual(
sorted(query_parameter_tuples),
- sorted([(None, "BOOL", True), (None, "STRING", u"a-string-value")]),
+ sorted([(None, "BOOL", True), (None, "STRING", "a-string-value")]),
)
def test_to_query_parameters_w_list_array_param(self):
parameters = [[10, 20]]
- query_parameters = _helpers.to_query_parameters(parameters)
+ query_parameters = _helpers.to_query_parameters(parameters, [None])
self.assertEqual(len(query_parameters), 1)
param = query_parameters[0]
@@ -188,10 +202,10 @@ def test_to_query_parameters_w_list_dict_param(self):
parameters = [{"foo": "bar"}]
with self.assertRaises(NotImplementedError):
- _helpers.to_query_parameters(parameters)
+ _helpers.to_query_parameters(parameters, [None])
def test_to_query_parameters_none_argument(self):
- query_parameters = _helpers.to_query_parameters(None)
+ query_parameters = _helpers.to_query_parameters(None, None)
self.assertEqual(query_parameters, [])
@@ -327,3 +341,356 @@ def test_custom_on_closed_error_type(self):
with self.assertRaisesRegex(RuntimeError, "I'm closed!"):
instance.instance_method()
+
+
+VALID_BQ_TYPES = [
+ (name, getattr(enums.SqlParameterScalarTypes, name)._type)
+ for name in dir(enums.SqlParameterScalarTypes)
+ if not name.startswith("_")
+]
+
+
+@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES)
+def test_scalar_to_query_parameter_honors_given_type(alias, type_):
+ from google.cloud import bigquery
+
+ assert _helpers.scalar_to_query_parameter(1.23, None, alias) == (
+ bigquery.ScalarQueryParameter(None, type_, 1.23)
+ )
+ assert _helpers.scalar_to_query_parameter(None, "foo", alias) == (
+ bigquery.ScalarQueryParameter("foo", type_, None)
+ )
+
+
+def test_scalar_to_query_parameter_honors_given_type_errors_on_invalid():
+ with pytest.raises(
+ google.cloud.bigquery.dbapi.exceptions.ProgrammingError,
+ match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.",
+ ):
+ _helpers.scalar_to_query_parameter(None, "foo", "INT")
+
+
+@pytest.mark.parametrize("alias, type_", VALID_BQ_TYPES)
+def test_array_to_query_parameter_honors_given_type(alias, type_):
+ from google.cloud import bigquery
+
+ assert _helpers.array_to_query_parameter([1.23], None, alias) == (
+ bigquery.ArrayQueryParameter(None, type_, [1.23])
+ )
+ assert _helpers.array_to_query_parameter((), "foo", alias) == (
+ bigquery.ArrayQueryParameter("foo", type_, ())
+ )
+
+
+def test_array_to_query_parameter_honors_given_type_errors_on_invalid():
+ with pytest.raises(
+ google.cloud.bigquery.dbapi.exceptions.ProgrammingError,
+ match="The given parameter type, INT, for foo is not a valid BigQuery scalar type.",
+ ):
+ _helpers.array_to_query_parameter((), "foo", "INT")
+
+
+def test_to_query_parameters_dict_w_types():
+ from google.cloud import bigquery
+
+ assert sorted(
+ _helpers.to_query_parameters(
+ dict(i=1, x=1.2, y=None, q="hi", z=[]),
+ dict(x="numeric", y="string", q="string(9)", z="float64"),
+ ),
+ key=lambda p: p.name,
+ ) == [
+ bigquery.ScalarQueryParameter("i", "INT64", 1),
+ bigquery.ScalarQueryParameter("q", "STRING", "hi"),
+ bigquery.ScalarQueryParameter("x", "NUMERIC", 1.2),
+ bigquery.ScalarQueryParameter("y", "STRING", None),
+ bigquery.ArrayQueryParameter("z", "FLOAT64", []),
+ ]
+
+
+def test_to_query_parameters_list_w_types():
+ from google.cloud import bigquery
+
+ assert _helpers.to_query_parameters(
+ [1, 1.2, None, "hi", []], [None, "numeric", "string", "string(9)", "float64"]
+ ) == [
+ bigquery.ScalarQueryParameter(None, "INT64", 1),
+ bigquery.ScalarQueryParameter(None, "NUMERIC", 1.2),
+ bigquery.ScalarQueryParameter(None, "STRING", None),
+ bigquery.ScalarQueryParameter(None, "STRING", "hi"),
+ bigquery.ArrayQueryParameter(None, "FLOAT64", []),
+ ]
+
+
+@pytest.mark.parametrize(
+ "value,type_,expect",
+ [
+ (
+ [],
+ "ARRAY",
+ {
+ "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}},
+ "parameterValue": {"arrayValues": []},
+ },
+ ),
+ (
+ [1, 2],
+ "ARRAY",
+ {
+ "parameterType": {"type": "ARRAY", "arrayType": {"type": "INT64"}},
+ "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "2"}]},
+ },
+ ),
+ (
+ dict(
+ name="par",
+ children=[
+ dict(name="ch1", bdate=datetime.date(2021, 1, 1)),
+ dict(name="ch2", bdate=datetime.date(2021, 1, 2)),
+ ],
+ ),
+ "struct>>",
+ {
+ "parameterType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {
+ "name": "children",
+ "type": {
+ "arrayType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {"name": "bdate", "type": {"type": "DATE"}},
+ ],
+ "type": "STRUCT",
+ },
+ "type": "ARRAY",
+ },
+ },
+ ],
+ "type": "STRUCT",
+ },
+ "parameterValue": {
+ "structValues": {
+ "children": {
+ "arrayValues": [
+ {
+ "structValues": {
+ "bdate": {"value": "2021-01-01"},
+ "name": {"value": "ch1"},
+ }
+ },
+ {
+ "structValues": {
+ "bdate": {"value": "2021-01-02"},
+ "name": {"value": "ch2"},
+ }
+ },
+ ]
+ },
+ "name": {"value": "par"},
+ }
+ },
+ },
+ ),
+ (
+ dict(
+ name="par",
+ children=[
+ dict(name="ch1", bdate=datetime.date(2021, 1, 1)),
+ dict(name="ch2", bdate=datetime.date(2021, 1, 2)),
+ ],
+ ),
+ "struct>>",
+ {
+ "parameterType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {
+ "name": "children",
+ "type": {
+ "arrayType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {"name": "bdate", "type": {"type": "DATE"}},
+ ],
+ "type": "STRUCT",
+ },
+ "type": "ARRAY",
+ },
+ },
+ ],
+ "type": "STRUCT",
+ },
+ "parameterValue": {
+ "structValues": {
+ "children": {
+ "arrayValues": [
+ {
+ "structValues": {
+ "bdate": {"value": "2021-01-01"},
+ "name": {"value": "ch1"},
+ }
+ },
+ {
+ "structValues": {
+ "bdate": {"value": "2021-01-02"},
+ "name": {"value": "ch2"},
+ }
+ },
+ ]
+ },
+ "name": {"value": "par"},
+ }
+ },
+ },
+ ),
+ (
+ ["1", "hi"],
+ "ARRAY",
+ {
+ "parameterType": {"type": "ARRAY", "arrayType": {"type": "STRING"}},
+ "parameterValue": {"arrayValues": [{"value": "1"}, {"value": "hi"}]},
+ },
+ ),
+ ],
+)
+def test_complex_query_parameter_type(type_, value, expect):
+ from google.cloud.bigquery.dbapi._helpers import complex_query_parameter
+
+ param = complex_query_parameter("test", value, type_).to_api_repr()
+ assert param.pop("name") == "test"
+ assert param == expect
+
+
+def _expected_error_match(expect):
+ return "^" + re.escape(expect) + "$"
+
+
+@pytest.mark.parametrize(
+ "value,type_,expect",
+ [
+ (
+ [],
+ "ARRAY",
+ "The given parameter type, INT,"
+ " is not a valid BigQuery scalar type, in ARRAY.",
+ ),
+ ([], "x", "Invalid parameter type, x"),
+ ({}, "struct", "Invalid struct field, int, in struct"),
+ (
+ {"x": 1},
+ "struct",
+ "The given parameter type, int,"
+ " for x is not a valid BigQuery scalar type, in struct.",
+ ),
+ ([], "x<", "Invalid parameter type, x<"),
+ (0, "ARRAY", "Array type with non-array-like value with type int"),
+ (
+ [],
+ "ARRAY>",
+ "Array can't contain an array in ARRAY>",
+ ),
+ ([], "struct", "Non-mapping value for type struct"),
+ ({}, "struct", "No field value for x in struct"),
+ ({"x": 1, "y": 1}, "struct", "Extra data keys for struct"),
+ ([], "array>", "Invalid struct field, xxx, in array>"),
+ ([], "array<<>>", "Invalid parameter type, <>"),
+ ],
+)
+def test_complex_query_parameter_type_errors(type_, value, expect):
+ from google.cloud.bigquery.dbapi._helpers import complex_query_parameter
+ from google.cloud.bigquery.dbapi import exceptions
+
+ with pytest.raises(
+ exceptions.ProgrammingError, match=_expected_error_match(expect),
+ ):
+ complex_query_parameter("test", value, type_)
+
+
+@pytest.mark.parametrize(
+ "parameters,parameter_types,expect",
+ [
+ (
+ [[], dict(name="ch1", bdate=datetime.date(2021, 1, 1))],
+ ["ARRAY", "struct"],
+ [
+ {
+ "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"},
+ "parameterValue": {"arrayValues": []},
+ },
+ {
+ "parameterType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {"name": "bdate", "type": {"type": "DATE"}},
+ ],
+ "type": "STRUCT",
+ },
+ "parameterValue": {
+ "structValues": {
+ "bdate": {"value": "2021-01-01"},
+ "name": {"value": "ch1"},
+ }
+ },
+ },
+ ],
+ ),
+ (
+ dict(ids=[], child=dict(name="ch1", bdate=datetime.date(2021, 1, 1))),
+ dict(ids="ARRAY", child="struct"),
+ [
+ {
+ "name": "ids",
+ "parameterType": {"arrayType": {"type": "INT64"}, "type": "ARRAY"},
+ "parameterValue": {"arrayValues": []},
+ },
+ {
+ "name": "child",
+ "parameterType": {
+ "structTypes": [
+ {"name": "name", "type": {"type": "STRING"}},
+ {"name": "bdate", "type": {"type": "DATE"}},
+ ],
+ "type": "STRUCT",
+ },
+ "parameterValue": {
+ "structValues": {
+ "bdate": {"value": "2021-01-01"},
+ "name": {"value": "ch1"},
+ }
+ },
+ },
+ ],
+ ),
+ ],
+)
+def test_to_query_parameters_complex_types(parameters, parameter_types, expect):
+ from google.cloud.bigquery.dbapi._helpers import to_query_parameters
+
+ result = [p.to_api_repr() for p in to_query_parameters(parameters, parameter_types)]
+ assert result == expect
+
+
+def test_to_query_parameters_struct_error():
+ from google.cloud.bigquery.dbapi._helpers import to_query_parameters
+
+ with pytest.raises(
+ NotImplementedError,
+ match=_expected_error_match(
+ "STRUCT-like parameter values are not supported, "
+ "unless an explicit type is give in the parameter placeholder "
+ "(e.g. '%(:struct<...>)s')."
+ ),
+ ):
+ to_query_parameters([dict(x=1)], [None])
+
+ with pytest.raises(
+ NotImplementedError,
+ match=_expected_error_match(
+ "STRUCT-like parameter values are not supported (parameter foo), "
+ "unless an explicit type is give in the parameter placeholder "
+ "(e.g. '%(foo:struct<...>)s')."
+ ),
+ ):
+ to_query_parameters(dict(foo=dict(x=1)), {})
diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py
index edec559b2..0576cad38 100644
--- a/tests/unit/test_dbapi_connection.py
+++ b/tests/unit/test_dbapi_connection.py
@@ -51,7 +51,7 @@ def test_ctor_wo_bqstorage_client(self):
from google.cloud.bigquery.dbapi import Connection
mock_client = self._mock_client()
- mock_client._create_bqstorage_client.return_value = None
+ mock_client._ensure_bqstorage_client.return_value = None
connection = self._make_one(client=mock_client)
self.assertIsInstance(connection, Connection)
@@ -66,9 +66,15 @@ def test_ctor_w_bqstorage_client(self):
mock_client = self._mock_client()
mock_bqstorage_client = self._mock_bqstorage_client()
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
+
connection = self._make_one(
client=mock_client, bqstorage_client=mock_bqstorage_client,
)
+
+ mock_client._ensure_bqstorage_client.assert_called_once_with(
+ mock_bqstorage_client
+ )
self.assertIsInstance(connection, Connection)
self.assertIs(connection._client, mock_client)
self.assertIs(connection._bqstorage_client, mock_bqstorage_client)
@@ -92,9 +98,11 @@ def test_connect_w_client(self):
mock_client = self._mock_client()
mock_bqstorage_client = self._mock_bqstorage_client()
- mock_client._create_bqstorage_client.return_value = mock_bqstorage_client
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
connection = connect(client=mock_client)
+
+ mock_client._ensure_bqstorage_client.assert_called_once_with()
self.assertIsInstance(connection, Connection)
self.assertIs(connection._client, mock_client)
self.assertIs(connection._bqstorage_client, mock_bqstorage_client)
@@ -108,9 +116,15 @@ def test_connect_w_both_clients(self):
mock_client = self._mock_client()
mock_bqstorage_client = self._mock_bqstorage_client()
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
+
connection = connect(
client=mock_client, bqstorage_client=mock_bqstorage_client,
)
+
+ mock_client._ensure_bqstorage_client.assert_called_once_with(
+ mock_bqstorage_client
+ )
self.assertIsInstance(connection, Connection)
self.assertIs(connection._client, mock_client)
self.assertIs(connection._bqstorage_client, mock_bqstorage_client)
@@ -140,7 +154,7 @@ def test_close_closes_all_created_bigquery_clients(self):
return_value=client,
)
bqstorage_client_patcher = mock.patch.object(
- client, "_create_bqstorage_client", return_value=bqstorage_client,
+ client, "_ensure_bqstorage_client", return_value=bqstorage_client,
)
with client_patcher, bqstorage_client_patcher:
@@ -176,6 +190,22 @@ def test_close_closes_all_created_cursors(self):
self.assertTrue(cursor_1._closed)
self.assertTrue(cursor_2._closed)
+ def test_close_closes_only_open_created_cursors(self):
+ connection = self._make_one(client=self._mock_client())
+ cursor_1 = connection.cursor()
+ cursor_2 = connection.cursor()
+ self.assertFalse(cursor_1._closed)
+ self.assertFalse(cursor_2._closed)
+
+ cursor_1.close()
+ self.assertTrue(cursor_1._closed)
+ cursor_1.close = mock.MagicMock()
+
+ connection.close()
+
+ self.assertFalse(cursor_1.close.called)
+ self.assertTrue(cursor_2._closed)
+
def test_does_not_keep_cursor_instances_alive(self):
from google.cloud.bigquery.dbapi import Cursor
diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py
index cbd6f6909..026810aaf 100644
--- a/tests/unit/test_dbapi_cursor.py
+++ b/tests/unit/test_dbapi_cursor.py
@@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import mock
import operator as op
import unittest
-import mock
+import pytest
+
try:
import pyarrow
@@ -70,7 +72,7 @@ def _mock_client(
mock_client._default_query_job_config = default_query_job_config
# Assure that the REST client gets used, not the BQ Storage client.
- mock_client._create_bqstorage_client.return_value = None
+ mock_client._ensure_bqstorage_client.return_value = None
return mock_client
@@ -123,6 +125,7 @@ def _mock_job(
schema=schema,
num_dml_affected_rows=num_dml_affected_rows,
)
+ mock_job.destination.project = "P"
mock_job.destination.to_bqstorage.return_value = (
"projects/P/datasets/DS/tables/T"
)
@@ -177,6 +180,7 @@ def test_raises_error_if_closed(self):
"fetchone",
"setinputsizes",
"setoutputsize",
+ "__iter__",
)
for method in method_names:
@@ -307,6 +311,7 @@ def test_fetchall_w_bqstorage_client_fetch_success(self):
mock_bqstorage_client = self._mock_bqstorage_client(
stream_count=1, rows=bqstorage_streamed_rows,
)
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
connection = dbapi.connect(
client=mock_client, bqstorage_client=mock_bqstorage_client,
@@ -337,6 +342,7 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self):
mock_client = self._mock_client(rows=[])
mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0)
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
connection = dbapi.connect(
client=mock_client, bqstorage_client=mock_bqstorage_client,
@@ -361,7 +367,11 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self):
row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})]
+ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs):
+ return bqstorage_client
+
mock_client = self._mock_client(rows=row_data)
+ mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client
mock_bqstorage_client = self._mock_bqstorage_client(
stream_count=1, rows=row_data,
)
@@ -380,6 +390,56 @@ def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self):
# the default client was not used
mock_client.list_rows.assert_not_called()
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+ def test_fetchall_w_bqstorage_client_no_arrow_compression(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery import table
+
+ # Use unordered data to also test any non-determenistic key order in dicts.
+ row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})]
+ bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}]
+
+ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs):
+ return bqstorage_client
+
+ mock_client = self._mock_client(rows=row_data)
+ mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client
+ mock_bqstorage_client = self._mock_bqstorage_client(
+ stream_count=1, rows=bqstorage_streamed_rows,
+ )
+
+ connection = dbapi.connect(
+ client=mock_client, bqstorage_client=mock_bqstorage_client,
+ )
+ cursor = connection.cursor()
+ cursor.execute("SELECT foo, bar FROM some_table")
+
+ with mock.patch(
+ "google.cloud.bigquery.dbapi.cursor._ARROW_COMPRESSION_SUPPORT", new=False
+ ):
+ rows = cursor.fetchall()
+
+ mock_client.list_rows.assert_not_called() # The default client was not used.
+
+ # Check the BQ Storage session config.
+ expected_session = bigquery_storage.ReadSession(
+ table="projects/P/datasets/DS/tables/T",
+ data_format=bigquery_storage.DataFormat.ARROW,
+ )
+ mock_bqstorage_client.create_read_session.assert_called_once_with(
+ parent="projects/P", read_session=expected_session, max_stream_count=1
+ )
+
+ # Check the data returned.
+ field_value = op.itemgetter(1)
+ sorted_row_data = [sorted(row.items(), key=field_value) for row in rows]
+ expected_row_data = [[("foo", 1.1), ("bar", 1.2)]]
+
+ self.assertEqual(sorted_row_data, expected_row_data)
+
def test_execute_custom_job_id(self):
from google.cloud.bigquery.dbapi import connect
@@ -562,18 +622,52 @@ def test_executemany_w_dml(self):
(("test",), ("anothertest",)),
)
self.assertIsNone(cursor.description)
- self.assertEqual(cursor.rowcount, 12)
+ self.assertEqual(cursor.rowcount, 24) # 24 because 2 * 12 because cumulatve.
+
+ def test_executemany_empty(self):
+ from google.cloud.bigquery.dbapi import connect
+
+ connection = connect(self._mock_client(rows=[], num_dml_affected_rows=12))
+ cursor = connection.cursor()
+ cursor.executemany((), ())
+ self.assertIsNone(cursor.description)
+ self.assertEqual(cursor.rowcount, -1)
+
+ def test_is_iterable(self):
+ from google.cloud.bigquery import dbapi
+
+ connection = dbapi.connect(
+ self._mock_client(rows=[("hello", "there", 7), ("good", "bye", -3)])
+ )
+ cursor = connection.cursor()
+ cursor.execute("SELECT foo, bar, baz FROM hello_world WHERE baz < 42;")
+
+ rows_iter = iter(cursor)
+
+ row = next(rows_iter)
+ self.assertEqual(row, ("hello", "there", 7))
+ row = next(rows_iter)
+ self.assertEqual(row, ("good", "bye", -3))
+ self.assertRaises(StopIteration, next, rows_iter)
+
+ self.assertEqual(
+ list(cursor),
+ [],
+ "Iterating again over the same results should produce no rows.",
+ )
def test__format_operation_w_dict(self):
from google.cloud.bigquery.dbapi import cursor
- formatted_operation = cursor._format_operation(
- "SELECT %(somevalue)s, %(a `weird` one)s;",
+ parameter_types = {}
+ formatted_operation, parameter_types = cursor._format_operation(
+ "SELECT %(somevalue)s, %(a `weird` one:STRING)s;",
{"somevalue": "hi", "a `weird` one": "world"},
)
self.assertEqual(
formatted_operation, "SELECT @`somevalue`, @`a \\`weird\\` one`;"
)
+ self.assertEqual(parameter_types, {"a `weird` one": "STRING"})
def test__format_operation_w_wrong_dict(self):
from google.cloud.bigquery import dbapi
@@ -586,10 +680,18 @@ def test__format_operation_w_wrong_dict(self):
{"somevalue-not-here": "hi", "othervalue": "world"},
)
+ def test__format_operation_w_redundant_dict_key(self):
+ from google.cloud.bigquery.dbapi import cursor
+
+ formatted_operation, _ = cursor._format_operation(
+ "SELECT %(somevalue)s;", {"somevalue": "foo", "value-not-used": "bar"}
+ )
+ self.assertEqual(formatted_operation, "SELECT @`somevalue`;")
+
def test__format_operation_w_sequence(self):
from google.cloud.bigquery.dbapi import cursor
- formatted_operation = cursor._format_operation(
+ formatted_operation, _ = cursor._format_operation(
"SELECT %s, %s;", ("hello", "world")
)
self.assertEqual(formatted_operation, "SELECT ?, ?;")
@@ -605,8 +707,156 @@ def test__format_operation_w_too_short_sequence(self):
("hello",),
)
+ def test__format_operation_w_too_long_sequence(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ "SELECT %s, %s;",
+ ("hello", "world", "everyone"),
+ )
+
def test__format_operation_w_empty_dict(self):
from google.cloud.bigquery.dbapi import cursor
- formatted_operation = cursor._format_operation("SELECT '%f'", {})
+ formatted_operation, _ = cursor._format_operation("SELECT '%f'", {})
self.assertEqual(formatted_operation, "SELECT '%f'")
+
+ def test__format_operation_wo_params_single_percent(self):
+ from google.cloud.bigquery.dbapi import cursor
+
+ formatted_operation, _ = cursor._format_operation("SELECT '%'", {})
+ self.assertEqual(formatted_operation, "SELECT '%'")
+
+ def test__format_operation_wo_params_double_percents(self):
+ from google.cloud.bigquery.dbapi import cursor
+
+ formatted_operation, _ = cursor._format_operation("SELECT '%%'", {})
+ self.assertEqual(formatted_operation, "SELECT '%'")
+
+ def test__format_operation_unescaped_percent_w_dict_param(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ "SELECT %(foo)s, '100 %';",
+ {"foo": "bar"},
+ )
+
+ def test__format_operation_unescaped_percent_w_list_param(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ "SELECT %s, %s, '100 %';",
+ ["foo", "bar"],
+ )
+
+ def test__format_operation_no_placeholders(self):
+ from google.cloud.bigquery import dbapi
+ from google.cloud.bigquery.dbapi import cursor
+
+ self.assertRaises(
+ dbapi.ProgrammingError,
+ cursor._format_operation,
+ "SELECT 42",
+ ["foo", "bar"],
+ )
+
+
+@pytest.mark.parametrize(
+ "inp,expect",
+ [
+ ("", ("", None)),
+ ("values(%(foo)s, %(bar)s)", ("values(%(foo)s, %(bar)s)", {})),
+ (
+ "values('%%(oof:INT64)s', %(foo)s, %(bar)s)",
+ ("values('%%(oof:INT64)s', %(foo)s, %(bar)s)", {}),
+ ),
+ (
+ "values(%(foo:INT64)s, %(bar)s)",
+ ("values(%(foo)s, %(bar)s)", dict(foo="INT64")),
+ ),
+ (
+ "values('%%(oof:INT64)s, %(foo:INT64)s, %(foo)s)",
+ ("values('%%(oof:INT64)s, %(foo)s, %(foo)s)", dict(foo="INT64")),
+ ),
+ (
+ "values(%(foo:INT64)s, %(foo:INT64)s)",
+ ("values(%(foo)s, %(foo)s)", dict(foo="INT64")),
+ ),
+ (
+ "values(%(foo:INT64)s, %(bar:NUMERIC)s) 100 %",
+ ("values(%(foo)s, %(bar)s) 100 %", dict(foo="INT64", bar="NUMERIC")),
+ ),
+ (" %s %()s %(:int64)s ", (" %s %s %s ", [None, None, "int64"])),
+ (" %%s %s %()s %(:int64)s ", (" %%s %s %s %s ", [None, None, "int64"])),
+ (
+ "values(%%%(foo:INT64)s, %(bar)s)",
+ ("values(%%%(foo)s, %(bar)s)", dict(foo="INT64")),
+ ),
+ (
+ "values(%%%%(foo:INT64)s, %(bar)s)",
+ ("values(%%%%(foo:INT64)s, %(bar)s)", dict()),
+ ),
+ (
+ "values(%%%%%(foo:INT64)s, %(bar)s)",
+ ("values(%%%%%(foo)s, %(bar)s)", dict(foo="INT64")),
+ ),
+ (
+ "values(%%%%%(foo:struct)s, %(bar)s)",
+ ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")),
+ ),
+ (
+ "values(%%%%%(foo:struct)s, %(bar)s)",
+ ("values(%%%%%(foo)s, %(bar)s)", dict(foo="struct")),
+ ),
+ (
+ "values(%(foo:struct)s, %(bar)s)",
+ (
+ "values(%(foo)s, %(bar)s)",
+ dict(foo="struct"),
+ ),
+ ),
+ (
+ "values(%(foo:struct)s, %(bar)s)",
+ (
+ "values(%(foo)s, %(bar)s)",
+ dict(foo="struct"),
+ ),
+ ),
+ (
+ "values(%(foo:string(10))s, %(bar)s)",
+ ("values(%(foo)s, %(bar)s)", dict(foo="string(10)")),
+ ),
+ ],
+)
+def test__extract_types(inp, expect):
+ from google.cloud.bigquery.dbapi.cursor import _extract_types as et
+
+ assert et(inp) == expect
+
+
+@pytest.mark.parametrize(
+ "match,inp",
+ [
+ (
+ "Conflicting types for foo: numeric and int64.",
+ " %(foo:numeric)s %(foo:int64)s ",
+ ),
+ (r"' %s %\(foo\)s ' mixes named and unamed parameters.", " %s %(foo)s "),
+ (r"' %\(foo\)s %s ' mixes named and unamed parameters.", " %(foo)s %s "),
+ ],
+)
+def test__extract_types_fail(match, inp):
+ from google.cloud.bigquery.dbapi.cursor import _extract_types as et
+ from google.cloud.bigquery.dbapi import exceptions
+
+ with pytest.raises(exceptions.ProgrammingError, match=match):
+ et(inp)
diff --git a/tests/unit/test_dbapi_types.py b/tests/unit/test_dbapi_types.py
index e05660ffe..cf282c68b 100644
--- a/tests/unit/test_dbapi_types.py
+++ b/tests/unit/test_dbapi_types.py
@@ -15,6 +15,8 @@
import datetime
import unittest
+import pytest
+
import google.cloud._helpers
from google.cloud.bigquery.dbapi import types
@@ -26,10 +28,6 @@ def test_binary_type(self):
self.assertEqual("STRUCT", types.BINARY)
self.assertNotEqual("STRING", types.BINARY)
- def test_binary_constructor(self):
- self.assertEqual(types.Binary(u"hello"), b"hello")
- self.assertEqual(types.Binary(u"\u1f60"), u"\u1f60".encode("utf-8"))
-
def test_timefromticks(self):
somedatetime = datetime.datetime(
2017, 2, 18, 12, 47, 26, tzinfo=google.cloud._helpers.UTC
@@ -40,3 +38,29 @@ def test_timefromticks(self):
types.TimeFromTicks(ticks, google.cloud._helpers.UTC),
datetime.time(12, 47, 26, tzinfo=google.cloud._helpers.UTC),
)
+
+
+class CustomBinary:
+ def __bytes__(self):
+ return b"Google"
+
+
+@pytest.mark.parametrize(
+ "raw,expected",
+ [
+ (u"hello", b"hello"),
+ (u"\u1f60", u"\u1f60".encode("utf-8")),
+ (b"hello", b"hello"),
+ (bytearray(b"hello"), b"hello"),
+ (memoryview(b"hello"), b"hello"),
+ (CustomBinary(), b"Google"),
+ ],
+)
+def test_binary_constructor(raw, expected):
+ assert types.Binary(raw) == expected
+
+
+@pytest.mark.parametrize("bad", (42, 42.0, None))
+def test_invalid_binary_constructor(bad):
+ with pytest.raises(TypeError):
+ types.Binary(bad)
diff --git a/tests/unit/test_delete_dataset.py b/tests/unit/test_delete_dataset.py
new file mode 100644
index 000000000..b48beb147
--- /dev/null
+++ b/tests/unit/test_delete_dataset.py
@@ -0,0 +1,79 @@
+# Copyright 2021 Google LLC
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# https://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .helpers import make_connection, make_client, dataset_polymorphic
+import google.api_core.exceptions
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+import pytest
+
+
+@dataset_polymorphic
+def test_delete_dataset(make_dataset, get_reference, client, PROJECT, DS_ID):
+ dataset = make_dataset(PROJECT, DS_ID)
+ PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID)
+ conn = client._connection = make_connection({})
+ client.delete_dataset(dataset, timeout=7.5)
+ conn.api_request.assert_called_with(
+ method="DELETE", path="/%s" % PATH, query_params={}, timeout=7.5
+ )
+
+
+@dataset_polymorphic
+def test_delete_dataset_delete_contents(
+ make_dataset, get_reference, client, PROJECT, DS_ID
+):
+ PATH = "projects/%s/datasets/%s" % (PROJECT, DS_ID)
+ conn = client._connection = make_connection({})
+ dataset = make_dataset(PROJECT, DS_ID)
+ client.delete_dataset(dataset, delete_contents=True)
+ conn.api_request.assert_called_with(
+ method="DELETE",
+ path="/%s" % PATH,
+ query_params={"deleteContents": "true"},
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_delete_dataset_wrong_type(client):
+ with pytest.raises(TypeError):
+ client.delete_dataset(42)
+
+
+def test_delete_dataset_w_not_found_ok_false(PROJECT, DS_ID):
+ path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID)
+ http = object()
+ client = make_client(_http=http)
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.NotFound("dataset not found")
+ )
+
+ with pytest.raises(google.api_core.exceptions.NotFound):
+ client.delete_dataset(DS_ID)
+
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_delete_dataset_w_not_found_ok_true(PROJECT, DS_ID):
+ path = "/projects/{}/datasets/{}".format(PROJECT, DS_ID)
+ http = object()
+ client = make_client(_http=http)
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.NotFound("dataset not found")
+ )
+ client.delete_dataset(DS_ID, not_found_ok=True)
+ conn.api_request.assert_called_with(
+ method="DELETE", path=path, query_params={}, timeout=DEFAULT_TIMEOUT
+ )
diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py
index 4b6ef5118..1f49dba5d 100644
--- a/tests/unit/test_external_config.py
+++ b/tests/unit/test_external_config.py
@@ -74,6 +74,7 @@ def test_to_api_repr_base(self):
ec.autodetect = True
ec.ignore_unknown_values = False
ec.compression = "compression"
+ ec.connection_id = "path/to/connection"
ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")]
exp_schema = {
@@ -82,7 +83,7 @@ def test_to_api_repr_base(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
}
]
}
@@ -94,10 +95,17 @@ def test_to_api_repr_base(self):
"autodetect": True,
"ignoreUnknownValues": False,
"compression": "compression",
+ "connectionId": "path/to/connection",
"schema": exp_schema,
}
self.assertEqual(got_resource, exp_resource)
+ def test_connection_id(self):
+ ec = external_config.ExternalConfig("")
+ self.assertIsNone(ec.connection_id)
+ ec.connection_id = "path/to/connection"
+ self.assertEqual(ec.connection_id, "path/to/connection")
+
def test_schema_None(self):
ec = external_config.ExternalConfig("")
ec.schema = None
@@ -424,6 +432,164 @@ def test_to_api_repr_bigtable(self):
self.assertEqual(got_resource, exp_resource)
+ def test_parquet_options_getter(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ parquet_options = ParquetOptions.from_api_repr(
+ {"enumAsString": True, "enableListInference": False}
+ )
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.PARQUET
+ )
+
+ self.assertIsNone(ec.parquet_options.enum_as_string)
+ self.assertIsNone(ec.parquet_options.enable_list_inference)
+
+ ec._options = parquet_options
+
+ self.assertTrue(ec.parquet_options.enum_as_string)
+ self.assertFalse(ec.parquet_options.enable_list_inference)
+
+ self.assertIs(ec.parquet_options, ec.options)
+
+ def test_parquet_options_getter_non_parquet_format(self):
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+ self.assertIsNone(ec.parquet_options)
+
+ def test_parquet_options_setter(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ parquet_options = ParquetOptions.from_api_repr(
+ {"enumAsString": False, "enableListInference": True}
+ )
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.PARQUET
+ )
+
+ ec.parquet_options = parquet_options
+
+ # Setting Parquet options should be reflected in the generic options attribute.
+ self.assertFalse(ec.options.enum_as_string)
+ self.assertTrue(ec.options.enable_list_inference)
+
+ def test_parquet_options_setter_non_parquet_format(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ parquet_options = ParquetOptions.from_api_repr(
+ {"enumAsString": False, "enableListInference": True}
+ )
+ ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
+
+ with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"):
+ ec.parquet_options = parquet_options
+
+ def test_from_api_repr_parquet(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ resource = _copy_and_update(
+ self.BASE_RESOURCE,
+ {
+ "sourceFormat": "PARQUET",
+ "parquetOptions": {"enumAsString": True, "enableListInference": False},
+ },
+ )
+
+ ec = external_config.ExternalConfig.from_api_repr(resource)
+
+ self._verify_base(ec)
+ self.assertEqual(ec.source_format, external_config.ExternalSourceFormat.PARQUET)
+ self.assertIsInstance(ec.options, ParquetOptions)
+ self.assertTrue(ec.parquet_options.enum_as_string)
+ self.assertFalse(ec.parquet_options.enable_list_inference)
+
+ got_resource = ec.to_api_repr()
+
+ self.assertEqual(got_resource, resource)
+
+ del resource["parquetOptions"]["enableListInference"]
+ ec = external_config.ExternalConfig.from_api_repr(resource)
+ self.assertIsNone(ec.options.enable_list_inference)
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ def test_to_api_repr_parquet(self):
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ ec = external_config.ExternalConfig(
+ external_config.ExternalSourceFormat.PARQUET
+ )
+ options = ParquetOptions.from_api_repr(
+ dict(enumAsString=False, enableListInference=True)
+ )
+ ec._options = options
+
+ exp_resource = {
+ "sourceFormat": external_config.ExternalSourceFormat.PARQUET,
+ "parquetOptions": {"enumAsString": False, "enableListInference": True},
+ }
+
+ got_resource = ec.to_api_repr()
+
+ self.assertEqual(got_resource, exp_resource)
+
+ def test_from_api_repr_decimal_target_types(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ resource = _copy_and_update(
+ self.BASE_RESOURCE,
+ {
+ "sourceFormat": "FORMAT_FOO",
+ "decimalTargetTypes": [DecimalTargetType.NUMERIC],
+ },
+ )
+
+ ec = external_config.ExternalConfig.from_api_repr(resource)
+
+ self._verify_base(ec)
+ self.assertEqual(ec.source_format, "FORMAT_FOO")
+ self.assertEqual(
+ ec.decimal_target_types, frozenset([DecimalTargetType.NUMERIC])
+ )
+
+ # converting back to API representation should yield the same result
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ del resource["decimalTargetTypes"]
+ ec = external_config.ExternalConfig.from_api_repr(resource)
+ self.assertIsNone(ec.decimal_target_types)
+
+ got_resource = ec.to_api_repr()
+ self.assertEqual(got_resource, resource)
+
+ def test_to_api_repr_decimal_target_types(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ ec = external_config.ExternalConfig("FORMAT_FOO")
+ ec.decimal_target_types = [DecimalTargetType.NUMERIC, DecimalTargetType.STRING]
+
+ got_resource = ec.to_api_repr()
+
+ expected_resource = {
+ "sourceFormat": "FORMAT_FOO",
+ "decimalTargetTypes": [DecimalTargetType.NUMERIC, DecimalTargetType.STRING],
+ }
+ self.assertEqual(got_resource, expected_resource)
+
+ def test_to_api_repr_decimal_target_types_unset(self):
+ from google.cloud.bigquery.enums import DecimalTargetType
+
+ ec = external_config.ExternalConfig("FORMAT_FOO")
+ ec._properties["decimalTargetTypes"] = [DecimalTargetType.NUMERIC]
+ ec.decimal_target_types = None
+
+ got_resource = ec.to_api_repr()
+
+ expected_resource = {"sourceFormat": "FORMAT_FOO"}
+ self.assertEqual(got_resource, expected_resource)
+
+ ec.decimal_target_types = None # No error if unsetting when already unset.
+
def _copy_and_update(d, u):
d = copy.deepcopy(d)
diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py
new file mode 100644
index 000000000..ab5f9e05c
--- /dev/null
+++ b/tests/unit/test_format_options.py
@@ -0,0 +1,41 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class TestParquetOptions:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.format_options import ParquetOptions
+
+ return ParquetOptions
+
+ def test_ctor(self):
+ config = self._get_target_class()()
+ assert config.enum_as_string is None
+ assert config.enable_list_inference is None
+
+ def test_from_api_repr(self):
+ config = self._get_target_class().from_api_repr(
+ {"enumAsString": False, "enableListInference": True}
+ )
+ assert not config.enum_as_string
+ assert config.enable_list_inference
+
+ def test_to_api_repr(self):
+ config = self._get_target_class()()
+ config.enum_as_string = True
+ config.enable_list_inference = False
+
+ result = config.to_api_repr()
+ assert result == {"enumAsString": True, "enableListInference": False}
diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py
new file mode 100644
index 000000000..b2095d2f2
--- /dev/null
+++ b/tests/unit/test_job_retry.py
@@ -0,0 +1,247 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import re
+
+import mock
+import pytest
+
+import google.api_core.exceptions
+import google.api_core.retry
+
+from .helpers import make_connection
+
+
+# With job_retry_on_query, we're testing 4 scenarios:
+# - No `job_retry` passed, retry on default rateLimitExceeded.
+# - Pass NotFound retry to `query`.
+# - Pass NotFound retry to `result`.
+# - Pass BadRequest retry to query, with the value passed to `result` overriding.
+@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"])
+@mock.patch("time.sleep")
+def test_retry_failed_jobs(sleep, client, job_retry_on_query):
+ """
+ Test retry of job failures, as opposed to API-invocation failures.
+ """
+
+ retry_notfound = google.api_core.retry.Retry(
+ predicate=google.api_core.retry.if_exception_type(
+ google.api_core.exceptions.NotFound
+ )
+ )
+ retry_badrequest = google.api_core.retry.Retry(
+ predicate=google.api_core.retry.if_exception_type(
+ google.api_core.exceptions.BadRequest
+ )
+ )
+
+ if job_retry_on_query is None:
+ reason = "rateLimitExceeded"
+ else:
+ reason = "notFound"
+
+ err = dict(reason=reason)
+ responses = [
+ dict(status=dict(state="DONE", errors=[err], errorResult=err)),
+ dict(status=dict(state="DONE", errors=[err], errorResult=err)),
+ dict(status=dict(state="DONE", errors=[err], errorResult=err)),
+ dict(status=dict(state="DONE")),
+ dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
+ ]
+
+ def api_request(method, path, query_params=None, data=None, **kw):
+ response = responses.pop(0)
+ if data:
+ response["jobReference"] = data["jobReference"]
+ else:
+ response["jobReference"] = dict(
+ jobId=path.split("/")[-1], projectId="PROJECT"
+ )
+ return response
+
+ conn = client._connection = make_connection()
+ conn.api_request.side_effect = api_request
+
+ if job_retry_on_query == "Query":
+ job_retry = dict(job_retry=retry_notfound)
+ elif job_retry_on_query == "Both":
+ # This will be overridden in `result`
+ job_retry = dict(job_retry=retry_badrequest)
+ else:
+ job_retry = {}
+ job = client.query("select 1", **job_retry)
+
+ orig_job_id = job.job_id
+ job_retry = (
+ dict(job_retry=retry_notfound)
+ if job_retry_on_query in ("Result", "Both")
+ else {}
+ )
+ result = job.result(**job_retry)
+ assert result.total_rows == 1
+ assert not responses # We made all the calls we expected to.
+
+ # The job adjusts it's job id based on the id of the last attempt.
+ assert job.job_id != orig_job_id
+ assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"]
+
+ # We had to sleep three times
+ assert len(sleep.mock_calls) == 3
+
+ # Sleeps are random, however they're more than 0
+ assert min(c[1][0] for c in sleep.mock_calls) > 0
+
+ # They're at most 2 * (multiplier**(number of sleeps - 1)) * initial
+ # The default multiplier is 2
+ assert max(c[1][0] for c in sleep.mock_calls) <= 8
+
+ # We can ask for the result again:
+ responses = [
+ dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
+ ]
+ orig_job_id = job.job_id
+ result = job.result()
+ assert result.total_rows == 1
+ assert not responses # We made all the calls we expected to.
+
+ # We wouldn't (and didn't) fail, because we're dealing with a successful job.
+ # So the job id hasn't changed.
+ assert job.job_id == orig_job_id
+
+
+# With job_retry_on_query, we're testing 4 scenarios:
+# - Pass None retry to `query`.
+# - Pass None retry to `result`.
+@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"])
+@mock.patch("time.sleep")
+def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query):
+ """
+ Test retry of job failures, as opposed to API-invocation failures.
+ """
+ err = dict(reason="rateLimitExceeded")
+ responses = [dict(status=dict(state="DONE", errors=[err], errorResult=err))] * 3
+
+ def api_request(method, path, query_params=None, data=None, **kw):
+ response = responses.pop(0)
+ response["jobReference"] = data["jobReference"]
+ return response
+
+ conn = client._connection = make_connection()
+ conn.api_request.side_effect = api_request
+
+ if job_retry_on_query == "Query":
+ job_retry = dict(job_retry=None)
+ else:
+ job_retry = {}
+ job = client.query("select 1", **job_retry)
+
+ orig_job_id = job.job_id
+ job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {}
+ with pytest.raises(google.api_core.exceptions.Forbidden):
+ job.result(**job_retry)
+
+ assert job.job_id == orig_job_id
+ assert len(sleep.mock_calls) == 0
+
+
+@mock.patch("google.api_core.retry.datetime_helpers")
+@mock.patch("time.sleep")
+def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client):
+ """
+ If at first you don't succeed, maybe you will later. :)
+ """
+ conn = client._connection = make_connection()
+
+ datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2)
+
+ err = dict(reason="rateLimitExceeded")
+
+ def api_request(method, path, query_params=None, data=None, **kw):
+ calls = sleep.mock_calls
+ if calls:
+ datetime_helpers.utcnow.return_value += datetime.timedelta(
+ seconds=calls[-1][1][0]
+ )
+ response = dict(status=dict(state="DONE", errors=[err], errorResult=err))
+ response["jobReference"] = data["jobReference"]
+ return response
+
+ conn.api_request.side_effect = api_request
+
+ job = client.query("select 1")
+ orig_job_id = job.job_id
+
+ with pytest.raises(google.api_core.exceptions.RetryError):
+ job.result()
+
+ # We never got a successful job, so the job id never changed:
+ assert job.job_id == orig_job_id
+
+ # We failed because we couldn't succeed after 120 seconds.
+ # But we can try again:
+ err2 = dict(reason="backendError") # We also retry on this
+ responses = [
+ dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
+ dict(status=dict(state="DONE", errors=[err], errorResult=err)),
+ dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
+ dict(status=dict(state="DONE")),
+ dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
+ ]
+
+ def api_request(method, path, query_params=None, data=None, **kw):
+ calls = sleep.mock_calls
+ datetime_helpers.utcnow.return_value += datetime.timedelta(
+ seconds=calls[-1][1][0]
+ )
+ response = responses.pop(0)
+ if data:
+ response["jobReference"] = data["jobReference"]
+ else:
+ response["jobReference"] = dict(
+ jobId=path.split("/")[-1], projectId="PROJECT"
+ )
+ return response
+
+ conn.api_request.side_effect = api_request
+ result = job.result()
+ assert result.total_rows == 1
+ assert not responses # We made all the calls we expected to.
+ assert job.job_id != orig_job_id
+
+
+def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client):
+ with pytest.raises(
+ TypeError,
+ match=re.escape(
+ "`job_retry` was provided, but the returned job is"
+ " not retryable, because a custom `job_id` was"
+ " provided."
+ ),
+ ):
+ client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry())
+
+
+def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client):
+ client._connection = make_connection({})
+ job = client.query("select 42", job_id=42)
+ with pytest.raises(
+ TypeError,
+ match=re.escape(
+ "`job_retry` was provided, but this job is"
+ " not retryable, because a custom `job_id` was"
+ " provided to the query that created this job."
+ ),
+ ):
+ job.result(job_retry=google.api_core.retry.Retry())
diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py
new file mode 100644
index 000000000..6f0b55c5e
--- /dev/null
+++ b/tests/unit/test_list_datasets.py
@@ -0,0 +1,125 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import mock
+import pytest
+
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection
+
+
+@pytest.mark.parametrize(
+ "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))]
+)
+def test_list_datasets_defaults(client, PROJECT, extra, query):
+ from google.cloud.bigquery.dataset import DatasetListItem
+
+ DATASET_1 = "dataset_one"
+ DATASET_2 = "dataset_two"
+ PATH = "projects/%s/datasets" % PROJECT
+ TOKEN = "TOKEN"
+ DATA = {
+ "nextPageToken": TOKEN,
+ "datasets": [
+ {
+ "kind": "bigquery#dataset",
+ "id": "%s:%s" % (PROJECT, DATASET_1),
+ "datasetReference": {"datasetId": DATASET_1, "projectId": PROJECT},
+ "friendlyName": None,
+ },
+ {
+ "kind": "bigquery#dataset",
+ "id": "%s:%s" % (PROJECT, DATASET_2),
+ "datasetReference": {"datasetId": DATASET_2, "projectId": PROJECT},
+ "friendlyName": "Two",
+ },
+ ],
+ }
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_datasets(**extra)
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ datasets = list(page)
+ token = iterator.next_page_token
+
+ assert len(datasets) == len(DATA["datasets"])
+ for found, expected in zip(datasets, DATA["datasets"]):
+ assert isinstance(found, DatasetListItem)
+ assert found.full_dataset_id == expected["id"]
+ assert found.friendly_name == expected["friendlyName"]
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_list_datasets_w_project_and_timeout(client, PROJECT):
+ conn = client._connection = make_connection({})
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ list(client.list_datasets(project="other-project", timeout=7.5))
+
+ final_attributes.assert_called_once_with(
+ {"path": "/projects/other-project/datasets"}, client, None
+ )
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/other-project/datasets",
+ query_params={},
+ timeout=7.5,
+ )
+
+
+def test_list_datasets_explicit_response_missing_datasets_key(client, PROJECT):
+ PATH = "projects/%s/datasets" % PROJECT
+ TOKEN = "TOKEN"
+ FILTER = "FILTER"
+ DATA = {}
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_datasets(
+ include_all=True, filter=FILTER, max_results=3, page_token=TOKEN
+ )
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ datasets = list(page)
+ token = iterator.next_page_token
+
+ assert len(datasets) == 0
+ assert token is None
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % PATH,
+ query_params={
+ "all": True,
+ "filter": FILTER,
+ "maxResults": 3,
+ "pageToken": TOKEN,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py
new file mode 100644
index 000000000..1fb40d446
--- /dev/null
+++ b/tests/unit/test_list_jobs.py
@@ -0,0 +1,292 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+
+import mock
+import pytest
+
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection
+
+
+@pytest.mark.parametrize(
+ "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))]
+)
+def test_list_jobs_defaults(client, PROJECT, DS_ID, extra, query):
+ from google.cloud.bigquery.job import CopyJob
+ from google.cloud.bigquery.job import CreateDisposition
+ from google.cloud.bigquery.job import ExtractJob
+ from google.cloud.bigquery.job import LoadJob
+ from google.cloud.bigquery.job import QueryJob
+ from google.cloud.bigquery.job import WriteDisposition
+
+ SOURCE_TABLE = "source_table"
+ DESTINATION_TABLE = "destination_table"
+ QUERY_DESTINATION_TABLE = "query_destination_table"
+ SOURCE_URI = "gs://test_bucket/src_object*"
+ DESTINATION_URI = "gs://test_bucket/dst_object*"
+ JOB_TYPES = {
+ "load_job": LoadJob,
+ "copy_job": CopyJob,
+ "extract_job": ExtractJob,
+ "query_job": QueryJob,
+ }
+ PATH = "projects/%s/jobs" % PROJECT
+ TOKEN = "TOKEN"
+ QUERY = "SELECT * from test_dataset:test_table"
+ ASYNC_QUERY_DATA = {
+ "id": "%s:%s" % (PROJECT, "query_job"),
+ "jobReference": {"projectId": PROJECT, "jobId": "query_job"},
+ "state": "DONE",
+ "configuration": {
+ "query": {
+ "query": QUERY,
+ "destinationTable": {
+ "projectId": PROJECT,
+ "datasetId": DS_ID,
+ "tableId": QUERY_DESTINATION_TABLE,
+ },
+ "createDisposition": CreateDisposition.CREATE_IF_NEEDED,
+ "writeDisposition": WriteDisposition.WRITE_TRUNCATE,
+ }
+ },
+ }
+ EXTRACT_DATA = {
+ "id": "%s:%s" % (PROJECT, "extract_job"),
+ "jobReference": {"projectId": PROJECT, "jobId": "extract_job"},
+ "state": "DONE",
+ "configuration": {
+ "extract": {
+ "sourceTable": {
+ "projectId": PROJECT,
+ "datasetId": DS_ID,
+ "tableId": SOURCE_TABLE,
+ },
+ "destinationUris": [DESTINATION_URI],
+ }
+ },
+ }
+ COPY_DATA = {
+ "id": "%s:%s" % (PROJECT, "copy_job"),
+ "jobReference": {"projectId": PROJECT, "jobId": "copy_job"},
+ "state": "DONE",
+ "configuration": {
+ "copy": {
+ "sourceTables": [
+ {"projectId": PROJECT, "datasetId": DS_ID, "tableId": SOURCE_TABLE}
+ ],
+ "destinationTable": {
+ "projectId": PROJECT,
+ "datasetId": DS_ID,
+ "tableId": DESTINATION_TABLE,
+ },
+ }
+ },
+ }
+ LOAD_DATA = {
+ "id": "%s:%s" % (PROJECT, "load_job"),
+ "jobReference": {"projectId": PROJECT, "jobId": "load_job"},
+ "state": "DONE",
+ "configuration": {
+ "load": {
+ "destinationTable": {
+ "projectId": PROJECT,
+ "datasetId": DS_ID,
+ "tableId": SOURCE_TABLE,
+ },
+ "sourceUris": [SOURCE_URI],
+ }
+ },
+ }
+ DATA = {
+ "nextPageToken": TOKEN,
+ "jobs": [ASYNC_QUERY_DATA, EXTRACT_DATA, COPY_DATA, LOAD_DATA],
+ }
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_jobs(**extra)
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ assert len(jobs) == len(DATA["jobs"])
+ for found, expected in zip(jobs, DATA["jobs"]):
+ name = expected["jobReference"]["jobId"]
+ assert isinstance(found, JOB_TYPES[name])
+ assert found.job_id == name
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % PATH,
+ query_params=dict({"projection": "full"}, **query),
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_jobs_load_job_wo_sourceUris(client, PROJECT, DS_ID):
+ from google.cloud.bigquery.job import LoadJob
+
+ SOURCE_TABLE = "source_table"
+ JOB_TYPES = {"load_job": LoadJob}
+ PATH = "projects/%s/jobs" % PROJECT
+ TOKEN = "TOKEN"
+ LOAD_DATA = {
+ "id": "%s:%s" % (PROJECT, "load_job"),
+ "jobReference": {"projectId": PROJECT, "jobId": "load_job"},
+ "state": "DONE",
+ "configuration": {
+ "load": {
+ "destinationTable": {
+ "projectId": PROJECT,
+ "datasetId": DS_ID,
+ "tableId": SOURCE_TABLE,
+ }
+ }
+ },
+ }
+ DATA = {"nextPageToken": TOKEN, "jobs": [LOAD_DATA]}
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_jobs()
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ assert len(jobs) == len(DATA["jobs"])
+ for found, expected in zip(jobs, DATA["jobs"]):
+ name = expected["jobReference"]["jobId"]
+ assert isinstance(found, JOB_TYPES[name])
+ assert found.job_id == name
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_jobs_explicit_missing(client, PROJECT):
+ PATH = "projects/%s/jobs" % PROJECT
+ DATA = {}
+ TOKEN = "TOKEN"
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_jobs(
+ max_results=1000, page_token=TOKEN, all_users=True, state_filter="done"
+ )
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/%s" % PATH}, client, None)
+ jobs = list(page)
+ token = iterator.next_page_token
+
+ assert len(jobs) == 0
+ assert token is None
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % PATH,
+ query_params={
+ "projection": "full",
+ "maxResults": 1000,
+ "pageToken": TOKEN,
+ "allUsers": True,
+ "stateFilter": "done",
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_jobs_w_project(client, PROJECT):
+ conn = client._connection = make_connection({})
+
+ list(client.list_jobs(project="other-project"))
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/other-project/jobs",
+ query_params={"projection": "full"},
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_jobs_w_timeout(client, PROJECT):
+ conn = client._connection = make_connection({})
+
+ list(client.list_jobs(timeout=7.5))
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/{}/jobs".format(PROJECT),
+ query_params={"projection": "full"},
+ timeout=7.5,
+ )
+
+
+def test_list_jobs_w_time_filter(client, PROJECT):
+ conn = client._connection = make_connection({})
+
+ # One millisecond after the unix epoch.
+ start_time = datetime.datetime(1970, 1, 1, 0, 0, 0, 1000)
+ # One millisecond after the the 2038 31-bit signed int rollover
+ end_time = datetime.datetime(2038, 1, 19, 3, 14, 7, 1000)
+ end_time_millis = (((2 ** 31) - 1) * 1000) + 1
+
+ list(client.list_jobs(min_creation_time=start_time, max_creation_time=end_time))
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/%s/jobs" % PROJECT,
+ query_params={
+ "projection": "full",
+ "minCreationTime": "1",
+ "maxCreationTime": str(end_time_millis),
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_jobs_w_parent_job_filter(client, PROJECT):
+ from google.cloud.bigquery import job
+
+ conn = client._connection = make_connection({}, {})
+
+ parent_job_args = ["parent-job-123", job._AsyncJob("parent-job-123", client)]
+
+ for parent_job in parent_job_args:
+ list(client.list_jobs(parent_job=parent_job))
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/%s/jobs" % PROJECT,
+ query_params={"projection": "full", "parentJobId": "parent-job-123"},
+ timeout=DEFAULT_TIMEOUT,
+ )
+ conn.api_request.reset_mock()
diff --git a/tests/unit/test_list_models.py b/tests/unit/test_list_models.py
new file mode 100644
index 000000000..b14852338
--- /dev/null
+++ b/tests/unit/test_list_models.py
@@ -0,0 +1,93 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
+
+def test_list_models_empty_w_timeout(client, PROJECT, DS_ID):
+ path = "/projects/{}/datasets/{}/models".format(PROJECT, DS_ID)
+ conn = client._connection = make_connection({})
+
+ dataset_id = "{}.{}".format(PROJECT, DS_ID)
+ iterator = client.list_models(dataset_id, timeout=7.5)
+ page = next(iterator.pages)
+ models = list(page)
+ token = iterator.next_page_token
+
+ assert models == []
+ assert token is None
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, query_params={}, timeout=7.5
+ )
+
+
+@pytest.mark.parametrize(
+ "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))]
+)
+@dataset_polymorphic
+def test_list_models_defaults(
+ make_dataset, get_reference, client, PROJECT, DS_ID, extra, query,
+):
+ from google.cloud.bigquery.model import Model
+
+ MODEL_1 = "model_one"
+ MODEL_2 = "model_two"
+ PATH = "projects/%s/datasets/%s/models" % (PROJECT, DS_ID)
+ TOKEN = "TOKEN"
+ DATA = {
+ "nextPageToken": TOKEN,
+ "models": [
+ {
+ "modelReference": {
+ "modelId": MODEL_1,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ }
+ },
+ {
+ "modelReference": {
+ "modelId": MODEL_2,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ }
+ },
+ ],
+ }
+
+ conn = client._connection = make_connection(DATA)
+ dataset = make_dataset(PROJECT, DS_ID)
+
+ iterator = client.list_models(dataset, **extra)
+ assert iterator.dataset == get_reference(dataset)
+ page = next(iterator.pages)
+ models = list(page)
+ token = iterator.next_page_token
+
+ assert len(models) == len(DATA["models"])
+ for found, expected in zip(models, DATA["models"]):
+ assert isinstance(found, Model)
+ assert found.model_id == expected["modelReference"]["modelId"]
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path="/%s" % PATH, query_params=query, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_list_models_wrong_type(client):
+ with pytest.raises(TypeError):
+ client.list_models(42)
diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py
new file mode 100644
index 000000000..190612b44
--- /dev/null
+++ b/tests/unit/test_list_projects.py
@@ -0,0 +1,120 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import mock
+import pytest
+
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection
+
+
+@pytest.mark.parametrize(
+ "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))]
+)
+def test_list_projects_defaults(client, PROJECT, extra, query):
+ from google.cloud.bigquery.client import Project
+
+ PROJECT_2 = "PROJECT_TWO"
+ TOKEN = "TOKEN"
+ DATA = {
+ "nextPageToken": TOKEN,
+ "projects": [
+ {
+ "kind": "bigquery#project",
+ "id": PROJECT,
+ "numericId": 1,
+ "projectReference": {"projectId": PROJECT},
+ "friendlyName": "One",
+ },
+ {
+ "kind": "bigquery#project",
+ "id": PROJECT_2,
+ "numericId": 2,
+ "projectReference": {"projectId": PROJECT_2},
+ "friendlyName": "Two",
+ },
+ ],
+ }
+ conn = client._connection = make_connection(DATA)
+ iterator = client.list_projects(**extra)
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
+ projects = list(page)
+ token = iterator.next_page_token
+
+ assert len(projects) == len(DATA["projects"])
+ for found, expected in zip(projects, DATA["projects"]):
+ assert isinstance(found, Project)
+ assert found.project_id == expected["id"]
+ assert found.numeric_id == expected["numericId"]
+ assert found.friendly_name == expected["friendlyName"]
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path="/projects", query_params=query, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_list_projects_w_timeout(client):
+ TOKEN = "TOKEN"
+ DATA = {
+ "nextPageToken": TOKEN,
+ "projects": [],
+ }
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_projects(timeout=7.5)
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path="/projects", query_params={}, timeout=7.5
+ )
+
+
+def test_list_projects_explicit_response_missing_projects_key(client):
+ TOKEN = "TOKEN"
+ DATA = {}
+ conn = client._connection = make_connection(DATA)
+
+ iterator = client.list_projects(max_results=3, page_token=TOKEN)
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ page = next(iterator.pages)
+
+ final_attributes.assert_called_once_with({"path": "/projects"}, client, None)
+ projects = list(page)
+ token = iterator.next_page_token
+
+ assert len(projects) == 0
+ assert token is None
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects",
+ query_params={"maxResults": 3, "pageToken": TOKEN},
+ timeout=DEFAULT_TIMEOUT,
+ )
diff --git a/tests/unit/test_list_routines.py b/tests/unit/test_list_routines.py
new file mode 100644
index 000000000..80e62d6bd
--- /dev/null
+++ b/tests/unit/test_list_routines.py
@@ -0,0 +1,96 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
+
+def test_list_routines_empty_w_timeout(client):
+ conn = client._connection = make_connection({})
+
+ iterator = client.list_routines("test-routines.test_routines", timeout=7.5)
+ page = next(iterator.pages)
+ routines = list(page)
+ token = iterator.next_page_token
+
+ assert routines == []
+ assert token is None
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/test-routines/datasets/test_routines/routines",
+ query_params={},
+ timeout=7.5,
+ )
+
+
+@pytest.mark.parametrize(
+ "extra,query", [({}, {}), (dict(page_size=42), dict(maxResults=42))]
+)
+@dataset_polymorphic
+def test_list_routines_defaults(
+ make_dataset, get_reference, client, PROJECT, extra, query
+):
+ from google.cloud.bigquery.routine import Routine
+
+ project_id = PROJECT
+ dataset_id = "test_routines"
+ path = f"/projects/{PROJECT}/datasets/test_routines/routines"
+ routine_1 = "routine_one"
+ routine_2 = "routine_two"
+ token = "TOKEN"
+ resource = {
+ "nextPageToken": token,
+ "routines": [
+ {
+ "routineReference": {
+ "routineId": routine_1,
+ "datasetId": dataset_id,
+ "projectId": project_id,
+ }
+ },
+ {
+ "routineReference": {
+ "routineId": routine_2,
+ "datasetId": dataset_id,
+ "projectId": project_id,
+ }
+ },
+ ],
+ }
+
+ conn = client._connection = make_connection(resource)
+ dataset = make_dataset(client.project, dataset_id)
+
+ iterator = client.list_routines(dataset, **extra)
+ assert iterator.dataset == get_reference(dataset)
+ page = next(iterator.pages)
+ routines = list(page)
+ actual_token = iterator.next_page_token
+
+ assert len(routines) == len(resource["routines"])
+ for found, expected in zip(routines, resource["routines"]):
+ assert isinstance(found, Routine)
+ assert found.routine_id == expected["routineReference"]["routineId"]
+ assert actual_token == token
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, query_params=query, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_list_routines_wrong_type(client):
+ with pytest.raises(TypeError):
+ client.list_routines(42)
diff --git a/tests/unit/test_list_tables.py b/tests/unit/test_list_tables.py
new file mode 100644
index 000000000..8360f6605
--- /dev/null
+++ b/tests/unit/test_list_tables.py
@@ -0,0 +1,180 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import google.cloud.bigquery.dataset
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+from .helpers import make_connection, dataset_polymorphic
+
+
+@dataset_polymorphic
+def test_list_tables_empty_w_timeout(
+ make_dataset, get_reference, client, PROJECT, DS_ID
+):
+ path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID)
+ conn = client._connection = make_connection({})
+
+ dataset = make_dataset(PROJECT, DS_ID)
+ iterator = client.list_tables(dataset, timeout=7.5)
+ assert iterator.dataset == get_reference(dataset)
+ page = next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ assert tables == []
+ assert token is None
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, query_params={}, timeout=7.5
+ )
+
+
+@dataset_polymorphic
+def test_list_tables_defaults(make_dataset, get_reference, client, PROJECT, DS_ID):
+ from google.cloud.bigquery.table import TableListItem
+
+ TABLE_1 = "table_one"
+ TABLE_2 = "table_two"
+ PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID)
+ TOKEN = "TOKEN"
+ DATA = {
+ "nextPageToken": TOKEN,
+ "tables": [
+ {
+ "kind": "bigquery#table",
+ "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1),
+ "tableReference": {
+ "tableId": TABLE_1,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ },
+ "type": "TABLE",
+ },
+ {
+ "kind": "bigquery#table",
+ "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2),
+ "tableReference": {
+ "tableId": TABLE_2,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ },
+ "type": "TABLE",
+ },
+ ],
+ }
+
+ conn = client._connection = make_connection(DATA)
+ dataset = make_dataset(PROJECT, DS_ID)
+
+ iterator = client.list_tables(dataset)
+ assert iterator.dataset == get_reference(dataset)
+ page = next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ assert len(tables) == len(DATA["tables"])
+ for found, expected in zip(tables, DATA["tables"]):
+ assert isinstance(found, TableListItem)
+ assert found.full_table_id == expected["id"]
+ assert found.table_type == expected["type"]
+ assert token == TOKEN
+
+ conn.api_request.assert_called_once_with(
+ method="GET", path="/%s" % PATH, query_params={}, timeout=DEFAULT_TIMEOUT
+ )
+
+
+def test_list_tables_explicit(client, PROJECT, DS_ID):
+ from google.cloud.bigquery.table import TableListItem
+
+ TABLE_1 = "table_one"
+ TABLE_2 = "table_two"
+ PATH = "projects/%s/datasets/%s/tables" % (PROJECT, DS_ID)
+ TOKEN = "TOKEN"
+ DATA = {
+ "tables": [
+ {
+ "kind": "bigquery#dataset",
+ "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_1),
+ "tableReference": {
+ "tableId": TABLE_1,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ },
+ "type": "TABLE",
+ },
+ {
+ "kind": "bigquery#dataset",
+ "id": "%s:%s.%s" % (PROJECT, DS_ID, TABLE_2),
+ "tableReference": {
+ "tableId": TABLE_2,
+ "datasetId": DS_ID,
+ "projectId": PROJECT,
+ },
+ "type": "TABLE",
+ },
+ ]
+ }
+
+ conn = client._connection = make_connection(DATA)
+ dataset = google.cloud.bigquery.dataset.DatasetReference(PROJECT, DS_ID)
+
+ iterator = client.list_tables(
+ # Test with string for dataset ID.
+ DS_ID,
+ max_results=3,
+ page_token=TOKEN,
+ )
+ assert iterator.dataset == dataset
+ page = next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ assert len(tables) == len(DATA["tables"])
+ for found, expected in zip(tables, DATA["tables"]):
+ assert isinstance(found, TableListItem)
+ assert found.full_table_id == expected["id"]
+ assert found.table_type == expected["type"]
+ assert token is None
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % PATH,
+ query_params={"maxResults": 3, "pageToken": TOKEN},
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_list_tables_wrong_type(client):
+ with pytest.raises(TypeError):
+ client.list_tables(42)
+
+
+@dataset_polymorphic
+def test_list_tables_page_size(make_dataset, get_reference, client, PROJECT, DS_ID):
+ path = "/projects/{}/datasets/{}/tables".format(PROJECT, DS_ID)
+ conn = client._connection = make_connection({})
+
+ dataset = make_dataset(PROJECT, DS_ID)
+ iterator = client.list_tables(dataset, timeout=7.5, page_size=42)
+ assert iterator.dataset == get_reference(dataset)
+ page = next(iterator.pages)
+ tables = list(page)
+ token = iterator.next_page_token
+
+ assert tables == []
+ assert token is None
+ conn.api_request.assert_called_once_with(
+ method="GET", path=path, query_params=dict(maxResults=42), timeout=7.5
+ )
diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py
index ff41fe720..36cbf4993 100644
--- a/tests/unit/test_magics.py
+++ b/tests/unit/test_magics.py
@@ -32,6 +32,7 @@
from google.cloud.bigquery import job
from google.cloud.bigquery import table
from google.cloud.bigquery.magics import magics
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
from tests.unit.helpers import make_connection
from test_utils.imports import maybe_fail_import
@@ -185,7 +186,7 @@ def test_context_with_default_connection():
method="POST",
path="/projects/project-from-env/jobs",
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
query_results_call = mock.call(
method="GET",
@@ -249,7 +250,7 @@ def test_context_with_custom_connection():
method="POST",
path="/projects/project-from-env/jobs",
data=mock.ANY,
- timeout=None,
+ timeout=DEFAULT_TIMEOUT,
)
query_results_call = mock.call(
method="GET",
@@ -317,7 +318,10 @@ def test__make_bqstorage_client_false():
credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
- got = magics._make_bqstorage_client(False, credentials_mock, {})
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
+ got = magics._make_bqstorage_client(test_client, False, {})
assert got is None
@@ -328,7 +332,10 @@ def test__make_bqstorage_client_true():
credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
- got = magics._make_bqstorage_client(True, credentials_mock, {})
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
+ got = magics._make_bqstorage_client(test_client, True, {})
assert isinstance(got, bigquery_storage.BigQueryReadClient)
@@ -336,15 +343,46 @@ def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage):
credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
with pytest.raises(ImportError) as exc_context, missing_bq_storage:
- magics._make_bqstorage_client(True, credentials_mock, {})
+ magics._make_bqstorage_client(test_client, True, {})
error_msg = str(exc_context.value)
assert "google-cloud-bigquery-storage" in error_msg
assert "pyarrow" in error_msg
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test__make_bqstorage_client_true_obsolete_dependency():
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
+ credentials_mock = mock.create_autospec(
+ google.auth.credentials.Credentials, instance=True
+ )
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.client.BQ_STORAGE_VERSIONS.verify_version",
+ side_effect=LegacyBigQueryStorageError("BQ Storage too old"),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ got = magics._make_bqstorage_client(test_client, True, {})
+
+ assert got is None
+
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
+
@pytest.mark.skipif(
bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
)
@@ -623,7 +661,9 @@ def warning_match(warning):
assert client_info.user_agent == "ipython-" + IPython.__version__
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=bqstorage_instance_mock, progress_bar_type="tqdm"
+ bqstorage_client=bqstorage_instance_mock,
+ create_bqstorage_client=mock.ANY,
+ progress_bar_type="tqdm",
)
assert isinstance(return_value, pandas.DataFrame)
@@ -666,7 +706,9 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch):
bqstorage_mock.assert_not_called()
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=None, progress_bar_type="tqdm"
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type="tqdm",
)
assert isinstance(return_value, pandas.DataFrame)
@@ -720,7 +762,12 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
client_query_mock.return_value = query_job_mock
ip.run_cell_magic("bigquery", "--max_results=5", sql)
- query_job_mock.result.assert_called_with(max_results=5)
+ query_job_mock.result.assert_called_with(max_results=5)
+ query_job_mock.result.return_value.to_dataframe.assert_called_once_with(
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=mock.ANY,
+ )
@pytest.mark.usefixtures("ipython_interactive")
@@ -887,11 +934,12 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client():
table_id = "bigquery-public-data.samples.shakespeare"
with default_patch, client_patch as client_mock, bqstorage_client_patch:
+ client_mock()._ensure_bqstorage_client.return_value = bqstorage_instance_mock
client_mock().list_rows.return_value = row_iterator_mock
ip.run_cell_magic("bigquery", "--max_results=5", table_id)
row_iterator_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=bqstorage_instance_mock
+ bqstorage_client=bqstorage_instance_mock, create_bqstorage_client=mock.ANY,
)
@@ -1208,7 +1256,9 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch):
bqstorage_mock.assert_not_called()
query_job_mock.to_dataframe.assert_called_once_with(
- bqstorage_client=None, progress_bar_type=magics.context.progress_bar_type
+ bqstorage_client=None,
+ create_bqstorage_client=False,
+ progress_bar_type=magics.context.progress_bar_type,
)
assert isinstance(return_value, pandas.DataFrame)
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index cf268daf1..69a6772e5 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -13,6 +13,7 @@
# limitations under the License.
import datetime
+import decimal
import unittest
import mock
@@ -43,6 +44,338 @@ def test___eq__(self):
self.assertNotEqual(udf, wrong_type)
+class Test__AbstractQueryParameterType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import _AbstractQueryParameterType
+
+ return _AbstractQueryParameterType
+
+ @classmethod
+ def _make_one(cls, *args, **kw):
+ return cls._get_target_class()(*args, **kw)
+
+ def test_from_api_virtual(self):
+ klass = self._get_target_class()
+ with self.assertRaises(NotImplementedError):
+ klass.from_api_repr({})
+
+ def test_to_api_virtual(self):
+ param_type = self._make_one()
+ with self.assertRaises(NotImplementedError):
+ param_type.to_api_repr()
+
+
+class Test_ScalarQueryParameterType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ return ScalarQueryParameterType
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_from_api_repr(self):
+ klass = self._get_target_class()
+ result = klass.from_api_repr({"type": "BOOLEAN"})
+ self.assertEqual(result._type, "BOOLEAN")
+ self.assertIsNone(result.name)
+ self.assertIsNone(result.description)
+
+ def test_to_api_repr(self):
+ param_type = self._make_one("BYTES", name="foo", description="bar")
+ result = param_type.to_api_repr()
+ self.assertEqual(result, {"type": "BYTES"})
+
+ def test_repr_no_optional_attrs(self):
+ param_type = self._make_one("BYTES")
+ self.assertEqual(repr(param_type), "ScalarQueryParameterType('BYTES')")
+
+ def test_repr_all_optional_attrs(self):
+ param_type = self._make_one("BYTES", name="foo", description="this is foo")
+ self.assertEqual(
+ repr(param_type),
+ "ScalarQueryParameterType('BYTES', name='foo', description='this is foo')",
+ )
+
+ def test_with_name_returns_copy_w_changed_name(self):
+ param_type = self._make_one("BOOLEAN", name=None, description="Some checkbox.")
+ modified_type = param_type.with_name("allow_emails")
+
+ self.assertIsNot(modified_type, param_type) # Result is a copy.
+ self.assertEqual(modified_type.name, "allow_emails")
+
+ # The rest of the The rest of the fields should have been preserved.
+ self.assertEqual(modified_type._type, param_type._type)
+ self.assertEqual(modified_type.description, param_type.description)
+
+ def test_with_name_clearing_the_value(self):
+ param_type = self._make_one(
+ "BOOLEAN", name="allow_emails", description="Some checkbox."
+ )
+ modified_type = param_type.with_name(None)
+
+ self.assertIsNone(modified_type.name)
+ self.assertEqual(param_type.name, "allow_emails") # original unchanged
+
+
+class Test_ArrayQueryParameterType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import ArrayQueryParameterType
+
+ return ArrayQueryParameterType
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_from_api_repr(self):
+ from google.cloud.bigquery.query import StructQueryParameterType
+
+ api_resource = {
+ "type": "ARRAY",
+ "arrayType": {
+ "type": "STRUCT",
+ "structTypes": [
+ {
+ "name": "weight",
+ "type": {"type": "INTEGER"},
+ "description": "in kg",
+ },
+ {"name": "last_name", "type": {"type": "STRING"}},
+ ],
+ },
+ }
+
+ klass = self._get_target_class()
+ result = klass.from_api_repr(api_resource)
+
+ self.assertIsNone(result.name)
+ self.assertIsNone(result.description)
+ item_type = result._array_type
+ self.assertIsInstance(item_type, StructQueryParameterType)
+
+ self.assertIsNone(item_type.name)
+ self.assertIsNone(item_type.description)
+
+ field = item_type.fields[0]
+ self.assertEqual(field.name, "weight")
+ self.assertEqual(field.description, "in kg")
+ self.assertEqual(field._type, "INTEGER")
+
+ field = item_type.fields[1]
+ self.assertEqual(field.name, "last_name")
+ self.assertIsNone(field.description)
+ self.assertEqual(field._type, "STRING")
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+ from google.cloud.bigquery.query import StructQueryParameterType
+
+ array_item_type = StructQueryParameterType(
+ ScalarQueryParameterType("INTEGER", name="weight", description="in kg"),
+ ScalarQueryParameterType("STRING", name="last_name"),
+ )
+ param_type = self._make_one(array_item_type, name="foo", description="bar")
+
+ result = param_type.to_api_repr()
+
+ expected_result = {
+ "type": "ARRAY",
+ "arrayType": {
+ "type": "STRUCT",
+ "structTypes": [
+ {
+ "name": "weight",
+ "type": {"type": "INTEGER"},
+ "description": "in kg",
+ },
+ {"name": "last_name", "type": {"type": "STRING"}},
+ ],
+ },
+ }
+ self.assertEqual(result, expected_result)
+
+ def test_repr_no_optional_attrs(self):
+ param_type = self._make_one("BOOLEAN")
+ self.assertEqual(repr(param_type), "ArrayQueryParameterType('BOOLEAN')")
+
+ def test_repr_all_optional_attrs(self):
+ param_type = self._make_one("INT64", name="bar", description="this is bar")
+ self.assertEqual(
+ repr(param_type),
+ "ArrayQueryParameterType('INT64', name='bar', description='this is bar')",
+ )
+
+
+class Test_StructQueryParameterType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import StructQueryParameterType
+
+ return StructQueryParameterType
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_raises_error_without_any_fields(self):
+ with self.assertRaisesRegex(ValueError, ".*at least one field.*"):
+ self._make_one()
+
+ def test_from_api_repr(self):
+ from google.cloud.bigquery.query import ArrayQueryParameterType
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ api_resource = {
+ "type": "STRUCT",
+ "structTypes": [
+ {
+ "name": "age",
+ "type": {"type": "INTEGER"},
+ "description": "in years",
+ },
+ {
+ "name": "aliases",
+ "type": {"type": "ARRAY", "arrayType": {"type": "STRING"}},
+ },
+ {
+ "description": "a nested struct",
+ "type": {
+ "type": "STRUCT",
+ "structTypes": [
+ {"type": {"type": "DATE"}, "name": "nested_date"},
+ {
+ "type": {"type": "BOOLEAN"},
+ "description": "nested bool field",
+ },
+ ],
+ },
+ },
+ ],
+ }
+
+ klass = self._get_target_class()
+ result = klass.from_api_repr(api_resource)
+
+ self.assertIsNone(result.name)
+ self.assertIsNone(result.description)
+ self.assertEqual(len(result.fields), 3)
+
+ field = result.fields[0]
+ self.assertIsInstance(field, ScalarQueryParameterType)
+ self.assertEqual(field.name, "age")
+ self.assertEqual(field.description, "in years")
+
+ field = result.fields[1]
+ self.assertIsInstance(field, ArrayQueryParameterType)
+ self.assertEqual(field.name, "aliases")
+ self.assertIsNone(field.description)
+ self.assertIsInstance(field._array_type, ScalarQueryParameterType)
+ self.assertEqual(field._array_type._type, "STRING")
+
+ field = result.fields[2]
+ self.assertIsInstance(field, self._get_target_class())
+ self.assertIsNone(field.name)
+ self.assertEqual(field.description, "a nested struct")
+
+ date_field = field.fields[0]
+ self.assertEqual(date_field._type, "DATE")
+ self.assertEqual(date_field.name, "nested_date")
+ self.assertIsNone(date_field.description)
+
+ bool_field = field.fields[1]
+ self.assertEqual(bool_field._type, "BOOLEAN")
+ self.assertIsNone(bool_field.name)
+ self.assertEqual(bool_field.description, "nested bool field")
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ int_type = ScalarQueryParameterType("INTEGER", description="in years")
+ date_type = ScalarQueryParameterType("DATE", name="day_of_birth")
+ param_type = self._make_one(int_type, date_type, name="foo", description="bar")
+
+ result = param_type.to_api_repr()
+
+ expected_result = {
+ "type": "STRUCT",
+ "structTypes": [
+ {"type": {"type": "INTEGER"}, "description": "in years"},
+ {"name": "day_of_birth", "type": {"type": "DATE"}},
+ ],
+ }
+ self.assertEqual(result, expected_result)
+
+ def test_to_api_repr_nested(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ struct_class = self._get_target_class()
+
+ int_type = ScalarQueryParameterType("INTEGER", description="in years")
+ nested_struct_type = struct_class(
+ ScalarQueryParameterType("DATE", name="nested_date"),
+ ScalarQueryParameterType("BOOLEAN", description="nested bool field"),
+ name="nested",
+ )
+ param_type = self._make_one(
+ int_type, nested_struct_type, name="foo", description="bar"
+ )
+
+ result = param_type.to_api_repr()
+
+ expected_result = {
+ "type": "STRUCT",
+ "structTypes": [
+ {"type": {"type": "INTEGER"}, "description": "in years"},
+ {
+ "name": "nested",
+ "type": {
+ "type": "STRUCT",
+ "structTypes": [
+ {"type": {"type": "DATE"}, "name": "nested_date"},
+ {
+ "type": {"type": "BOOLEAN"},
+ "description": "nested bool field",
+ },
+ ],
+ },
+ },
+ ],
+ }
+ self.assertEqual(result, expected_result)
+
+ def test_repr_no_optional_attrs(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ param_type = self._make_one(
+ ScalarQueryParameterType("BOOLEAN"), ScalarQueryParameterType("STRING")
+ )
+ expected = (
+ "StructQueryParameterType("
+ "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING')"
+ ")"
+ )
+ self.assertEqual(repr(param_type), expected)
+
+ def test_repr_all_optional_attrs(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ param_type = self._make_one(
+ ScalarQueryParameterType("BOOLEAN"),
+ ScalarQueryParameterType("STRING"),
+ name="data_record",
+ description="this is it",
+ )
+ expected = (
+ "StructQueryParameterType("
+ "ScalarQueryParameterType('BOOLEAN'), ScalarQueryParameterType('STRING'), "
+ "name='data_record', description='this is it'"
+ ")"
+ )
+ self.assertEqual(repr(param_type), expected)
+
+
class Test__AbstractQueryParameter(unittest.TestCase):
@staticmethod
def _get_target_class():
@@ -98,6 +431,18 @@ def test_positional(self):
self.assertEqual(param.type_, "INT64")
self.assertEqual(param.value, 123)
+ def test_ctor_w_scalar_query_parameter_type(self):
+ from google.cloud.bigquery import enums
+
+ param = self._make_one(
+ name="foo",
+ type_=enums.SqlParameterScalarTypes.BIGNUMERIC,
+ value=decimal.Decimal("123.456"),
+ )
+ self.assertEqual(param.name, "foo")
+ self.assertEqual(param.type_, "BIGNUMERIC")
+ self.assertEqual(param.value, decimal.Decimal("123.456"))
+
def test_from_api_repr_w_name(self):
RESOURCE = {
"name": "foo",
@@ -166,6 +511,16 @@ def test_to_api_repr_w_numeric(self):
param = klass.positional(type_="NUMERIC", value="123456789.123456789")
self.assertEqual(param.to_api_repr(), EXPECTED)
+ def test_to_api_repr_w_bignumeric(self):
+ big_num_string = "{d38}.{d38}".format(d38="9" * 38)
+ EXPECTED = {
+ "parameterType": {"type": "BIGNUMERIC"},
+ "parameterValue": {"value": big_num_string},
+ }
+ klass = self._get_target_class()
+ param = klass.positional(type_="BIGNUMERIC", value=big_num_string)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
def test_to_api_repr_w_bool(self):
EXPECTED = {
"parameterType": {"type": "BOOL"},
@@ -330,6 +685,10 @@ def test_ctor(self):
self.assertEqual(param.array_type, "INT64")
self.assertEqual(param.values, [1, 2])
+ def test_ctor_empty_struct_array_wo_type_info(self):
+ with self.assertRaisesRegex(ValueError, r"(?i)missing.*struct.*type info.*"):
+ self._make_one(name="foo", array_type="STRUCT", values=[])
+
def test___eq__(self):
param = self._make_one(name="foo", array_type="INT64", values=[123])
self.assertEqual(param, param)
@@ -457,6 +816,19 @@ def test_to_api_repr_wo_name(self):
param = klass.positional(array_type="INT64", values=[1, 2])
self.assertEqual(param.to_api_repr(), EXPECTED)
+ def test_to_api_repr_array_type_as_type_instance(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ EXPECTED = {
+ "parameterType": {"type": "ARRAY", "arrayType": {"type": "BOOLEAN"}},
+ "parameterValue": {"arrayValues": [{"value": "true"}, {"value": "false"}]},
+ }
+ klass = self._get_target_class()
+ param = klass.positional(
+ array_type=ScalarQueryParameterType("BOOLEAN"), values=[True, False],
+ )
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
def test_to_api_repr_w_unknown_type(self):
EXPECTED = {
"parameterType": {"type": "ARRAY", "arrayType": {"type": "UNKNOWN"}},
@@ -493,6 +865,31 @@ def test_to_api_repr_w_record_type(self):
param = klass.positional(array_type="RECORD", values=[struct])
self.assertEqual(param.to_api_repr(), EXPECTED)
+ def test_to_api_repr_w_empty_array_of_records_type(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+ from google.cloud.bigquery.query import StructQueryParameterType
+
+ EXPECTED = {
+ "parameterType": {
+ "type": "ARRAY",
+ "arrayType": {
+ "type": "STRUCT",
+ "structTypes": [
+ {"name": "foo", "type": {"type": "STRING"}},
+ {"name": "bar", "type": {"type": "INT64"}},
+ ],
+ },
+ },
+ "parameterValue": {"arrayValues": []},
+ }
+ item_type = StructQueryParameterType(
+ ScalarQueryParameterType("STRING", name="foo"),
+ ScalarQueryParameterType("INT64", name="bar"),
+ )
+ klass = self._get_target_class()
+ param = klass.positional(array_type=item_type, values=[])
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
def test___eq___wrong_type(self):
field = self._make_one("test", "STRING", ["value"])
other = object()
@@ -537,11 +934,38 @@ def test___ne___different_values(self):
field2 = self._make_one("test", "INT64", [12])
self.assertNotEqual(field1, field2)
- def test___repr__(self):
+ def test___repr__array_type_str(self):
field1 = self._make_one("field1", "STRING", ["value"])
expected = "ArrayQueryParameter('field1', 'STRING', ['value'])"
self.assertEqual(repr(field1), expected)
+ def test___repr__array_type_scalar_type_instance(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+
+ int_items = self._make_one(
+ "int_items", ScalarQueryParameterType("INTEGER"), [64]
+ )
+ expected = "ArrayQueryParameter('int_items', 'INTEGER', [64])"
+ self.assertEqual(repr(int_items), expected)
+
+ def test___repr__array_type_struct_type_instance(self):
+ from google.cloud.bigquery.query import ScalarQueryParameterType
+ from google.cloud.bigquery.query import StructQueryParameterType
+
+ struct_items = self._make_one(
+ "struct_items",
+ StructQueryParameterType(
+ ScalarQueryParameterType("INTEGER", name="age"),
+ ScalarQueryParameterType("STRING", name="last_name"),
+ ),
+ [{"age": 18, "last_name": "Doe"}],
+ )
+ expected = (
+ "ArrayQueryParameter('struct_items', 'STRUCT', "
+ "[{'age': 18, 'last_name': 'Doe'}])"
+ )
+ self.assertEqual(repr(struct_items), expected)
+
class Test_StructQueryParameter(unittest.TestCase):
@staticmethod
@@ -891,7 +1315,7 @@ def _verifySchema(self, query, resource):
self.assertEqual(found.description, expected.get("description"))
self.assertEqual(found.fields, expected.get("fields", ()))
else:
- self.assertEqual(query.schema, ())
+ self.assertEqual(query.schema, [])
def test_ctor_defaults(self):
query = self._make_one(self._make_resource())
@@ -901,7 +1325,7 @@ def test_ctor_defaults(self):
self.assertIsNone(query.page_token)
self.assertEqual(query.project, self.PROJECT)
self.assertEqual(query.rows, [])
- self.assertEqual(query.schema, ())
+ self.assertEqual(query.schema, [])
self.assertIsNone(query.total_rows)
self.assertIsNone(query.total_bytes_processed)
diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py
index d9f867cb3..e0a992f78 100644
--- a/tests/unit/test_retry.py
+++ b/tests/unit/test_retry.py
@@ -15,6 +15,7 @@
import unittest
import mock
+import requests.exceptions
class Test_should_retry(unittest.TestCase):
@@ -42,6 +43,36 @@ def test_w_rateLimitExceeded(self):
exc = mock.Mock(errors=[{"reason": "rateLimitExceeded"}], spec=["errors"])
self.assertTrue(self._call_fut(exc))
+ def test_w_unstructured_connectionerror(self):
+ exc = ConnectionError()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_connectionerror(self):
+ exc = requests.exceptions.ConnectionError()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_chunked_encoding_error(self):
+ exc = requests.exceptions.ChunkedEncodingError()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_connecttimeout(self):
+ exc = requests.exceptions.ConnectTimeout()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_readtimeout(self):
+ exc = requests.exceptions.ReadTimeout()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_unstructured_requests_timeout(self):
+ exc = requests.exceptions.Timeout()
+ self.assertTrue(self._call_fut(exc))
+
+ def test_w_auth_transporterror(self):
+ from google.auth.exceptions import TransportError
+
+ exc = TransportError("testing")
+ self.assertTrue(self._call_fut(exc))
+
def test_w_unstructured_too_many_requests(self):
from google.api_core.exceptions import TooManyRequests
@@ -67,3 +98,27 @@ def test_w_unstructured_bad_gateway(self):
exc = BadGateway("testing")
self.assertTrue(self._call_fut(exc))
+
+
+def test_DEFAULT_JOB_RETRY_predicate():
+ from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
+ from google.api_core.exceptions import ClientError
+
+ assert not DEFAULT_JOB_RETRY._predicate(TypeError())
+ assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail"))
+ assert not DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="idk")])
+ )
+
+ assert DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="rateLimitExceeded")])
+ )
+ assert DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="backendError")])
+ )
+
+
+def test_DEFAULT_JOB_RETRY_deadline():
+ from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
+
+ assert DEFAULT_JOB_RETRY._deadline == 600
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 71bf6b5ae..d0b5ca54c 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from google.cloud.bigquery.schema import PolicyTagList
import unittest
import mock
+import pytest
class TestSchemaField(unittest.TestCase):
@@ -35,19 +37,20 @@ def _make_one(self, *args, **kw):
def test_constructor_defaults(self):
field = self._make_one("test", "STRING")
- self.assertEqual(field._name, "test")
- self.assertEqual(field._field_type, "STRING")
- self.assertEqual(field._mode, "NULLABLE")
- self.assertIsNone(field._description)
- self.assertEqual(field._fields, ())
+ self.assertEqual(field.name, "test")
+ self.assertEqual(field.field_type, "STRING")
+ self.assertEqual(field.mode, "NULLABLE")
+ self.assertIsNone(field.description)
+ self.assertEqual(field.fields, ())
+ self.assertEqual(field.policy_tags, PolicyTagList())
def test_constructor_explicit(self):
field = self._make_one("test", "STRING", mode="REQUIRED", description="Testing")
- self.assertEqual(field._name, "test")
- self.assertEqual(field._field_type, "STRING")
- self.assertEqual(field._mode, "REQUIRED")
- self.assertEqual(field._description, "Testing")
- self.assertEqual(field._fields, ())
+ self.assertEqual(field.name, "test")
+ self.assertEqual(field.field_type, "STRING")
+ self.assertEqual(field.mode, "REQUIRED")
+ self.assertEqual(field.description, "Testing")
+ self.assertEqual(field.fields, ())
def test_constructor_subfields(self):
sub_field1 = self._make_one("area_code", "STRING")
@@ -55,13 +58,13 @@ def test_constructor_subfields(self):
field = self._make_one(
"phone_number", "RECORD", fields=[sub_field1, sub_field2]
)
- self.assertEqual(field._name, "phone_number")
- self.assertEqual(field._field_type, "RECORD")
- self.assertEqual(field._mode, "NULLABLE")
- self.assertIsNone(field._description)
- self.assertEqual(len(field._fields), 2)
- self.assertIs(field._fields[0], sub_field1)
- self.assertIs(field._fields[1], sub_field2)
+ self.assertEqual(field.name, "phone_number")
+ self.assertEqual(field.field_type, "RECORD")
+ self.assertEqual(field.mode, "NULLABLE")
+ self.assertIsNone(field.description)
+ self.assertEqual(len(field.fields), 2)
+ self.assertEqual(field.fields[0], sub_field1)
+ self.assertEqual(field.fields[1], sub_field2)
def test_constructor_with_policy_tags(self):
from google.cloud.bigquery.schema import PolicyTagList
@@ -70,12 +73,12 @@ def test_constructor_with_policy_tags(self):
field = self._make_one(
"test", "STRING", mode="REQUIRED", description="Testing", policy_tags=policy
)
- self.assertEqual(field._name, "test")
- self.assertEqual(field._field_type, "STRING")
- self.assertEqual(field._mode, "REQUIRED")
- self.assertEqual(field._description, "Testing")
- self.assertEqual(field._fields, ())
- self.assertEqual(field._policy_tags, policy)
+ self.assertEqual(field.name, "test")
+ self.assertEqual(field.field_type, "STRING")
+ self.assertEqual(field.mode, "REQUIRED")
+ self.assertEqual(field.description, "Testing")
+ self.assertEqual(field.fields, ())
+ self.assertEqual(field.policy_tags, policy)
def test_to_api_repr(self):
from google.cloud.bigquery.schema import PolicyTagList
@@ -92,7 +95,6 @@ def test_to_api_repr(self):
"mode": "NULLABLE",
"name": "foo",
"type": "INTEGER",
- "description": None,
"policyTags": {"names": ["foo", "bar"]},
},
)
@@ -109,13 +111,12 @@ def test_to_api_repr_with_subfield(self):
"mode": "NULLABLE",
"name": "bar",
"type": "INTEGER",
- "description": None,
+ "policyTags": {"names": []},
}
],
"mode": "REQUIRED",
"name": "foo",
"type": record_type,
- "description": None,
},
)
@@ -168,17 +169,17 @@ def test_from_api_repr_defaults(self):
def test_name_property(self):
name = "lemon-ness"
schema_field = self._make_one(name, "INTEGER")
- self.assertIs(schema_field.name, name)
+ self.assertEqual(schema_field.name, name)
def test_field_type_property(self):
field_type = "BOOLEAN"
schema_field = self._make_one("whether", field_type)
- self.assertIs(schema_field.field_type, field_type)
+ self.assertEqual(schema_field.field_type, field_type)
def test_mode_property(self):
mode = "REPEATED"
schema_field = self._make_one("again", "FLOAT", mode=mode)
- self.assertIs(schema_field.mode, mode)
+ self.assertEqual(schema_field.mode, mode)
def test_is_nullable(self):
mode = "NULLABLE"
@@ -193,14 +194,14 @@ def test_is_not_nullable(self):
def test_description_property(self):
description = "It holds some data."
schema_field = self._make_one("do", "TIMESTAMP", description=description)
- self.assertIs(schema_field.description, description)
+ self.assertEqual(schema_field.description, description)
def test_fields_property(self):
sub_field1 = self._make_one("one", "STRING")
sub_field2 = self._make_one("fish", "INTEGER")
fields = (sub_field1, sub_field2)
schema_field = self._make_one("boat", "RECORD", fields=fields)
- self.assertIs(schema_field.fields, fields)
+ self.assertEqual(schema_field.fields, fields)
def test_to_standard_sql_simple_type(self):
sql_type = self._get_standard_sql_data_type_class()
@@ -412,6 +413,23 @@ def test___eq___hit_w_fields(self):
other = self._make_one("test", "RECORD", fields=[sub1, sub2])
self.assertEqual(field, other)
+ def test___eq___hit_w_policy_tags(self):
+ field = self._make_one(
+ "test",
+ "STRING",
+ mode="REQUIRED",
+ description="Testing",
+ policy_tags=PolicyTagList(names=["foo", "bar"]),
+ )
+ other = self._make_one(
+ "test",
+ "STRING",
+ mode="REQUIRED",
+ description="Testing",
+ policy_tags=PolicyTagList(names=["bar", "foo"]),
+ )
+ self.assertEqual(field, other) # Policy tags order does not matter.
+
def test___ne___wrong_type(self):
field = self._make_one("toast", "INTEGER")
other = object()
@@ -434,6 +452,23 @@ def test___ne___different_values(self):
)
self.assertNotEqual(field1, field2)
+ def test___ne___different_policy_tags(self):
+ field = self._make_one(
+ "test",
+ "STRING",
+ mode="REQUIRED",
+ description="Testing",
+ policy_tags=PolicyTagList(names=["foo", "bar"]),
+ )
+ other = self._make_one(
+ "test",
+ "STRING",
+ mode="REQUIRED",
+ description="Testing",
+ policy_tags=PolicyTagList(names=["foo", "baz"]),
+ )
+ self.assertNotEqual(field, other)
+
def test___hash__set_equality(self):
sub1 = self._make_one("sub1", "STRING")
sub2 = self._make_one("sub2", "STRING")
@@ -454,7 +489,7 @@ def test___hash__not_equals(self):
def test___repr__(self):
field1 = self._make_one("field1", "STRING")
- expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)"
+ expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), ())"
self.assertEqual(repr(field1), expected)
@@ -537,12 +572,17 @@ def test_defaults(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
)
self.assertEqual(
resource[1],
- {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None},
+ {
+ "name": "age",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "policyTags": {"names": []},
+ },
)
def test_w_description(self):
@@ -552,7 +592,13 @@ def test_w_description(self):
full_name = SchemaField(
"full_name", "STRING", mode="REQUIRED", description=DESCRIPTION
)
- age = SchemaField("age", "INTEGER", mode="REQUIRED")
+ age = SchemaField(
+ "age",
+ "INTEGER",
+ mode="REQUIRED",
+ # Explicitly unset description.
+ description=None,
+ )
resource = self._call_fut([full_name, age])
self.assertEqual(len(resource), 2)
self.assertEqual(
@@ -562,11 +608,18 @@ def test_w_description(self):
"type": "STRING",
"mode": "REQUIRED",
"description": DESCRIPTION,
+ "policyTags": {"names": []},
},
)
self.assertEqual(
resource[1],
- {"name": "age", "type": "INTEGER", "mode": "REQUIRED", "description": None},
+ {
+ "name": "age",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": None,
+ "policyTags": {"names": []},
+ },
)
def test_w_subfields(self):
@@ -586,7 +639,7 @@ def test_w_subfields(self):
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
)
self.assertEqual(
@@ -595,19 +648,18 @@ def test_w_subfields(self):
"name": "phone",
"type": "RECORD",
"mode": "REPEATED",
- "description": None,
"fields": [
{
"name": "type",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
{
"name": "number",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
+ "policyTags": {"names": []},
},
],
},
@@ -742,3 +794,165 @@ def test___hash__not_equals(self):
set_one = {policy1}
set_two = {policy2}
self.assertNotEqual(set_one, set_two)
+
+
+@pytest.mark.parametrize(
+ "api,expect,key2",
+ [
+ (
+ dict(name="n", type="NUMERIC"),
+ ("n", "NUMERIC", None, None, None),
+ ("n", "NUMERIC"),
+ ),
+ (
+ dict(name="n", type="NUMERIC", precision=9),
+ ("n", "NUMERIC", 9, None, None),
+ ("n", "NUMERIC(9)"),
+ ),
+ (
+ dict(name="n", type="NUMERIC", precision=9, scale=2),
+ ("n", "NUMERIC", 9, 2, None),
+ ("n", "NUMERIC(9, 2)"),
+ ),
+ (
+ dict(name="n", type="BIGNUMERIC"),
+ ("n", "BIGNUMERIC", None, None, None),
+ ("n", "BIGNUMERIC"),
+ ),
+ (
+ dict(name="n", type="BIGNUMERIC", precision=40),
+ ("n", "BIGNUMERIC", 40, None, None),
+ ("n", "BIGNUMERIC(40)"),
+ ),
+ (
+ dict(name="n", type="BIGNUMERIC", precision=40, scale=2),
+ ("n", "BIGNUMERIC", 40, 2, None),
+ ("n", "BIGNUMERIC(40, 2)"),
+ ),
+ (
+ dict(name="n", type="STRING"),
+ ("n", "STRING", None, None, None),
+ ("n", "STRING"),
+ ),
+ (
+ dict(name="n", type="STRING", maxLength=9),
+ ("n", "STRING", None, None, 9),
+ ("n", "STRING(9)"),
+ ),
+ (
+ dict(name="n", type="BYTES"),
+ ("n", "BYTES", None, None, None),
+ ("n", "BYTES"),
+ ),
+ (
+ dict(name="n", type="BYTES", maxLength=9),
+ ("n", "BYTES", None, None, 9),
+ ("n", "BYTES(9)"),
+ ),
+ ],
+)
+def test_from_api_repr_parameterized(api, expect, key2):
+ from google.cloud.bigquery.schema import SchemaField
+
+ field = SchemaField.from_api_repr(api)
+
+ assert (
+ field.name,
+ field.field_type,
+ field.precision,
+ field.scale,
+ field.max_length,
+ ) == expect
+
+ assert field._key()[:2] == key2
+
+
+@pytest.mark.parametrize(
+ "field,api",
+ [
+ (
+ dict(name="n", field_type="NUMERIC"),
+ dict(name="n", type="NUMERIC", mode="NULLABLE", policyTags={"names": []}),
+ ),
+ (
+ dict(name="n", field_type="NUMERIC", precision=9),
+ dict(
+ name="n",
+ type="NUMERIC",
+ mode="NULLABLE",
+ precision=9,
+ policyTags={"names": []},
+ ),
+ ),
+ (
+ dict(name="n", field_type="NUMERIC", precision=9, scale=2),
+ dict(
+ name="n",
+ type="NUMERIC",
+ mode="NULLABLE",
+ precision=9,
+ scale=2,
+ policyTags={"names": []},
+ ),
+ ),
+ (
+ dict(name="n", field_type="BIGNUMERIC"),
+ dict(
+ name="n", type="BIGNUMERIC", mode="NULLABLE", policyTags={"names": []}
+ ),
+ ),
+ (
+ dict(name="n", field_type="BIGNUMERIC", precision=40),
+ dict(
+ name="n",
+ type="BIGNUMERIC",
+ mode="NULLABLE",
+ precision=40,
+ policyTags={"names": []},
+ ),
+ ),
+ (
+ dict(name="n", field_type="BIGNUMERIC", precision=40, scale=2),
+ dict(
+ name="n",
+ type="BIGNUMERIC",
+ mode="NULLABLE",
+ precision=40,
+ scale=2,
+ policyTags={"names": []},
+ ),
+ ),
+ (
+ dict(name="n", field_type="STRING"),
+ dict(name="n", type="STRING", mode="NULLABLE", policyTags={"names": []}),
+ ),
+ (
+ dict(name="n", field_type="STRING", max_length=9),
+ dict(
+ name="n",
+ type="STRING",
+ mode="NULLABLE",
+ maxLength=9,
+ policyTags={"names": []},
+ ),
+ ),
+ (
+ dict(name="n", field_type="BYTES"),
+ dict(name="n", type="BYTES", mode="NULLABLE", policyTags={"names": []}),
+ ),
+ (
+ dict(name="n", field_type="BYTES", max_length=9),
+ dict(
+ name="n",
+ type="BYTES",
+ mode="NULLABLE",
+ maxLength=9,
+ policyTags={"names": []},
+ ),
+ ),
+ ],
+)
+def test_to_api_repr_parameterized(field, api):
+ from google.cloud.bigquery.schema import SchemaField
+
+ assert SchemaField(**field).to_api_repr() == api
diff --git a/tests/unit/test_signature_compatibility.py b/tests/unit/test_signature_compatibility.py
index 6002ae3e8..07b823e2c 100644
--- a/tests/unit/test_signature_compatibility.py
+++ b/tests/unit/test_signature_compatibility.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from collections import OrderedDict
import inspect
import pytest
@@ -31,21 +32,31 @@ def row_iterator_class():
return RowIterator
-@pytest.mark.skipif(
- not hasattr(inspect, "signature"),
- reason="inspect.signature() is not availalbe in older Python versions",
-)
def test_to_arrow_method_signatures_match(query_job_class, row_iterator_class):
- sig = inspect.signature(query_job_class.to_arrow)
- sig2 = inspect.signature(row_iterator_class.to_arrow)
- assert sig == sig2
+ query_job_sig = inspect.signature(query_job_class.to_arrow)
+ iterator_sig = inspect.signature(row_iterator_class.to_arrow)
+
+ assert "max_results" in query_job_sig.parameters
+
+ # Compare the signatures while ignoring the max_results parameter, which is
+ # specific to the method on QueryJob.
+ params = OrderedDict(query_job_sig.parameters)
+ del params["max_results"]
+ query_job_sig = query_job_sig.replace(parameters=params.values())
+
+ assert query_job_sig == iterator_sig
-@pytest.mark.skipif(
- not hasattr(inspect, "signature"),
- reason="inspect.signature() is not availalbe in older Python versions",
-)
def test_to_dataframe_method_signatures_match(query_job_class, row_iterator_class):
- sig = inspect.signature(query_job_class.to_dataframe)
- sig2 = inspect.signature(row_iterator_class.to_dataframe)
- assert sig == sig2
+ query_job_sig = inspect.signature(query_job_class.to_dataframe)
+ iterator_sig = inspect.signature(row_iterator_class.to_dataframe)
+
+ assert "max_results" in query_job_sig.parameters
+
+ # Compare the signatures while ignoring the max_results parameter, which is
+ # specific to the method on QueryJob.
+ params = OrderedDict(query_job_sig.parameters)
+ del params["max_results"]
+ query_job_sig = query_job_sig.replace(parameters=params.values())
+
+ assert query_job_sig == iterator_sig
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index 3373528e0..1ce930ee4 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -14,16 +14,17 @@
import datetime
import logging
+import re
import time
+import types
import unittest
import warnings
import mock
-import pkg_resources
import pytest
-import pytz
import google.api_core.exceptions
+from test_utils.imports import maybe_fail_import
try:
from google.cloud import bigquery_storage
@@ -39,14 +40,16 @@
except (ImportError, AttributeError): # pragma: NO COVER
pandas = None
+try:
+ import geopandas
+except (ImportError, AttributeError): # pragma: NO COVER
+ geopandas = None
+
try:
import pyarrow
import pyarrow.types
-
- PYARROW_VERSION = pkg_resources.parse_version(pyarrow.__version__)
except ImportError: # pragma: NO COVER
pyarrow = None
- PYARROW_VERSION = pkg_resources.parse_version("0.0.1")
try:
from tqdm import tqdm
@@ -56,9 +59,6 @@
from google.cloud.bigquery.dataset import DatasetReference
-PYARROW_TIMESTAMP_VERSION = pkg_resources.parse_version("2.0.0")
-
-
def _mock_client():
from google.cloud.bigquery import client
@@ -113,8 +113,6 @@ def _make_one(self, *args, **kw):
return self._get_target_class()(*args, **kw)
def test_ctor_defaults(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset_ref = DatasetReference("project_1", "dataset_1")
table_ref = self._make_one(dataset_ref, "table_1")
@@ -122,8 +120,6 @@ def test_ctor_defaults(self):
self.assertEqual(table_ref.table_id, "table_1")
def test_to_api_repr(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset_ref = DatasetReference("project_1", "dataset_1")
table_ref = self._make_one(dataset_ref, "table_1")
@@ -135,7 +131,6 @@ def test_to_api_repr(self):
)
def test_from_api_repr(self):
- from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.table import TableReference
dataset_ref = DatasetReference("project_1", "dataset_1")
@@ -202,8 +197,6 @@ def test_from_string_ignores_default_project(self):
self.assertEqual(got.table_id, "string_table")
def test___eq___wrong_type(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset_ref = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset_ref, "table_1")
other = object()
@@ -211,8 +204,6 @@ def test___eq___wrong_type(self):
self.assertEqual(table, mock.ANY)
def test___eq___project_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
other_dataset = DatasetReference("project_2", "dataset_1")
table = self._make_one(dataset, "table_1")
@@ -220,8 +211,6 @@ def test___eq___project_mismatch(self):
self.assertNotEqual(table, other)
def test___eq___dataset_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
other_dataset = DatasetReference("project_1", "dataset_2")
table = self._make_one(dataset, "table_1")
@@ -229,24 +218,18 @@ def test___eq___dataset_mismatch(self):
self.assertNotEqual(table, other)
def test___eq___table_mismatch(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset, "table_1")
other = self._make_one(dataset, "table_2")
self.assertNotEqual(table, other)
def test___eq___equality(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
table = self._make_one(dataset, "table_1")
other = self._make_one(dataset, "table_1")
self.assertEqual(table, other)
def test___hash__set_equality(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
table1 = self._make_one(dataset, "table1")
table2 = self._make_one(dataset, "table2")
@@ -255,8 +238,6 @@ def test___hash__set_equality(self):
self.assertEqual(set_one, set_two)
def test___hash__not_equals(self):
- from google.cloud.bigquery.dataset import DatasetReference
-
dataset = DatasetReference("project_1", "dataset_1")
table1 = self._make_one(dataset, "table1")
table2 = self._make_one(dataset, "table2")
@@ -292,8 +273,6 @@ def _get_target_class():
return Table
def _make_one(self, *args, **kw):
- from google.cloud.bigquery.dataset import DatasetReference
-
if len(args) == 0:
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -579,6 +558,68 @@ def test_num_rows_getter(self):
with self.assertRaises(ValueError):
getattr(table, "num_rows")
+ def test__eq__wrong_type(self):
+ table = self._make_one("project_foo.dataset_bar.table_baz")
+
+ class TableWannabe:
+ pass
+
+ not_a_table = TableWannabe()
+ not_a_table._properties = table._properties
+
+ assert table != not_a_table # Can't fake it.
+
+ def test__eq__same_table_basic(self):
+ table_1 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_2 = self._make_one("project_foo.dataset_bar.table_baz")
+ assert table_1 == table_2
+
+ def test__eq__same_table_multiple_properties(self):
+ from google.cloud.bigquery import SchemaField
+
+ table_1 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_1.require_partition_filter = True
+ table_1.labels = {"first": "one", "second": "two"}
+
+ table_1.schema = [
+ SchemaField("name", "STRING", "REQUIRED"),
+ SchemaField("age", "INTEGER", "NULLABLE"),
+ ]
+
+ table_2 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_2.require_partition_filter = True
+ table_2.labels = {"first": "one", "second": "two"}
+ table_2.schema = [
+ SchemaField("name", "STRING", "REQUIRED"),
+ SchemaField("age", "INTEGER", "NULLABLE"),
+ ]
+
+ assert table_1 == table_2
+
+ def test__eq__same_table_property_different(self):
+ table_1 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_1.description = "This is table baz"
+
+ table_2 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_2.description = "This is also table baz"
+
+ assert table_1 == table_2 # Still equal, only table reference is important.
+
+ def test__eq__different_table(self):
+ table_1 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_2 = self._make_one("project_foo.dataset_bar.table_baz_2")
+
+ assert table_1 != table_2
+
+ def test_hashable(self):
+ table_1 = self._make_one("project_foo.dataset_bar.table_baz")
+ table_1.description = "This is a table"
+
+ table_1b = self._make_one("project_foo.dataset_bar.table_baz")
+ table_1b.description = "Metadata is irrelevant for hashes"
+
+ assert hash(table_1) == hash(table_1b)
+
def test_schema_setter_non_sequence(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -683,6 +724,40 @@ def test_props_set_by_server(self):
self.assertEqual(table.full_table_id, TABLE_FULL_ID)
self.assertEqual(table.table_type, "TABLE")
+ def test_snapshot_definition_not_set(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ assert table.snapshot_definition is None
+
+ def test_snapshot_definition_set(self):
+ from google.cloud._helpers import UTC
+ from google.cloud.bigquery.table import SnapshotDefinition
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ table._properties["snapshotDefinition"] = {
+ "baseTableReference": {
+ "projectId": "project_x",
+ "datasetId": "dataset_y",
+ "tableId": "table_z",
+ },
+ "snapshotTime": "2010-09-28T10:20:30.123Z",
+ }
+
+ snapshot = table.snapshot_definition
+
+ assert isinstance(snapshot, SnapshotDefinition)
+ assert snapshot.base_table_reference.path == (
+ "/projects/project_x/datasets/dataset_y/tables/table_z"
+ )
+ assert snapshot.snapshot_time == datetime.datetime(
+ 2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
+ )
+
def test_description_setter_bad_value(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -837,7 +912,9 @@ def test_mview_last_refresh_time(self):
}
self.assertEqual(
table.mview_last_refresh_time,
- datetime.datetime(2020, 11, 30, 15, 57, 22, 496000, tzinfo=pytz.utc),
+ datetime.datetime(
+ 2020, 11, 30, 15, 57, 22, 496000, tzinfo=datetime.timezone.utc
+ ),
)
def test_mview_enable_refresh(self):
@@ -1210,8 +1287,8 @@ def test_clustering_fields_setter_w_none(self):
table._properties["clustering"] = {"fields": fields}
table.clustering_fields = None
- self.assertEqual(table.clustering_fields, None)
- self.assertFalse("clustering" in table._properties)
+ self.assertIsNone(table.clustering_fields)
+ self.assertTrue("clustering" in table._properties) # None stored explicitly
def test_clustering_fields_setter_w_none_noop(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
@@ -1219,8 +1296,8 @@ def test_clustering_fields_setter_w_none_noop(self):
table = self._make_one(table_ref)
table.clustering_fields = None
- self.assertEqual(table.clustering_fields, None)
- self.assertFalse("clustering" in table._properties)
+ self.assertIsNone(table.clustering_fields)
+ self.assertTrue("clustering" in table._properties) # None stored explicitly
def test_encryption_configuration_setter(self):
# Previously, the EncryptionConfiguration class was in the table module, not the
@@ -1507,6 +1584,188 @@ def test_to_api_repr(self):
table = self._make_one(resource)
self.assertEqual(table.to_api_repr(), resource)
+ def test__eq__wrong_type(self):
+ resource = {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ table = self._make_one(resource)
+
+ class FakeTableListItem:
+ project = "project_foo"
+ dataset_id = "dataset_bar"
+ table_id = "table_baz"
+
+ not_a_table = FakeTableListItem()
+
+ assert table != not_a_table # Can't fake it.
+
+ def test__eq__same_table(self):
+ resource = {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ table_1 = self._make_one(resource)
+ table_2 = self._make_one(resource)
+
+ assert table_1 == table_2
+
+ def test__eq__same_table_property_different(self):
+ table_ref_resource = {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+
+ resource_1 = {"tableReference": table_ref_resource, "friendlyName": "Table One"}
+ table_1 = self._make_one(resource_1)
+
+ resource_2 = {"tableReference": table_ref_resource, "friendlyName": "Table Two"}
+ table_2 = self._make_one(resource_2)
+
+ assert table_1 == table_2 # Still equal, only table reference is important.
+
+ def test__eq__different_table(self):
+ resource_1 = {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ table_1 = self._make_one(resource_1)
+
+ resource_2 = {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_quux",
+ }
+ }
+ table_2 = self._make_one(resource_2)
+
+ assert table_1 != table_2
+
+ def test_hashable(self):
+ resource = {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ table_item = self._make_one(resource)
+ table_item_2 = self._make_one(resource)
+
+ assert hash(table_item) == hash(table_item_2)
+
+
+class TestTableClassesInterchangeability:
+ @staticmethod
+ def _make_table(*args, **kwargs):
+ from google.cloud.bigquery.table import Table
+
+ return Table(*args, **kwargs)
+
+ @staticmethod
+ def _make_table_ref(*args, **kwargs):
+ from google.cloud.bigquery.table import TableReference
+
+ return TableReference(*args, **kwargs)
+
+ @staticmethod
+ def _make_table_list_item(*args, **kwargs):
+ from google.cloud.bigquery.table import TableListItem
+
+ return TableListItem(*args, **kwargs)
+
+ def test_table_eq_table_ref(self):
+
+ table = self._make_table("project_foo.dataset_bar.table_baz")
+ dataset_ref = DatasetReference("project_foo", "dataset_bar")
+ table_ref = self._make_table_ref(dataset_ref, "table_baz")
+
+ assert table == table_ref
+ assert table_ref == table
+
+ def test_table_eq_table_list_item(self):
+ table = self._make_table("project_foo.dataset_bar.table_baz")
+ table_list_item = self._make_table_list_item(
+ {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ )
+
+ assert table == table_list_item
+ assert table_list_item == table
+
+ def test_table_ref_eq_table_list_item(self):
+
+ dataset_ref = DatasetReference("project_foo", "dataset_bar")
+ table_ref = self._make_table_ref(dataset_ref, "table_baz")
+ table_list_item = self._make_table_list_item(
+ {
+ "tableReference": {
+ "projectId": "project_foo",
+ "datasetId": "dataset_bar",
+ "tableId": "table_baz",
+ }
+ }
+ )
+
+ assert table_ref == table_list_item
+ assert table_list_item == table_ref
+
+
+class TestSnapshotDefinition:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import SnapshotDefinition
+
+ return SnapshotDefinition
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ klass = cls._get_target_class()
+ return klass(*args, **kwargs)
+
+ def test_ctor_empty_resource(self):
+ instance = self._make_one(resource={})
+ assert instance.base_table_reference is None
+ assert instance.snapshot_time is None
+
+ def test_ctor_full_resource(self):
+ from google.cloud._helpers import UTC
+ from google.cloud.bigquery.table import TableReference
+
+ resource = {
+ "baseTableReference": {
+ "projectId": "my-project",
+ "datasetId": "your-dataset",
+ "tableId": "our-table",
+ },
+ "snapshotTime": "2005-06-07T19:35:02.123Z",
+ }
+ instance = self._make_one(resource)
+
+ expected_table_ref = TableReference.from_string(
+ "my-project.your-dataset.our-table"
+ )
+ assert instance.base_table_reference == expected_table_ref
+
+ expected_time = datetime.datetime(2005, 6, 7, 19, 35, 2, 123000, tzinfo=UTC)
+ assert instance.snapshot_time == expected_time
+
class TestRow(unittest.TestCase):
def test_row(self):
@@ -1570,6 +1829,46 @@ def test_to_dataframe(self):
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 0) # verify the number of rows
+ @mock.patch("google.cloud.bigquery.table.pandas", new=None)
+ def test_to_dataframe_iterable_error_if_pandas_is_none(self):
+ row_iterator = self._make_one()
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe_iterable()
+
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_to_dataframe_iterable(self):
+ row_iterator = self._make_one()
+ df_iter = row_iterator.to_dataframe_iterable()
+
+ result = list(df_iter)
+
+ self.assertEqual(len(result), 1)
+ df = result[0]
+ self.assertIsInstance(df, pandas.DataFrame)
+ self.assertEqual(len(df), 0) # Verify the number of rows.
+ self.assertEqual(len(df.columns), 0)
+
+ @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+ def test_to_geodataframe_if_geopandas_is_none(self):
+ row_iterator = self._make_one()
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe(self):
+ row_iterator = self._make_one()
+ df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 0) # verify the number of rows
+ self.assertEqual(df.crs.srs, "EPSG:4326")
+ self.assertEqual(df.crs.name, "WGS 84")
+
class TestRowIterator(unittest.TestCase):
def _class_under_test(self):
@@ -1607,6 +1906,16 @@ def _make_one(
client, api_request, path, schema, table=table, **kwargs
)
+ def _make_one_from_data(self, schema=(), rows=()):
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [SchemaField(*a) for a in schema]
+ rows = [{"f": [{"v": v} for v in row]} for row in rows]
+
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ return self._make_one(_mock_client(), api_request, path, schema)
+
def test_constructor(self):
from google.cloud.bigquery.table import _item_to_row
from google.cloud.bigquery.table import _rows_page_start
@@ -1768,6 +2077,57 @@ def test__validate_bqstorage_returns_false_when_completely_cached(self):
)
)
+ def test__validate_bqstorage_returns_false_if_max_results_set(self):
+ iterator = self._make_one(
+ max_results=10, first_page_response=None # not cached
+ )
+ result = iterator._validate_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+ self.assertFalse(result)
+
+ def test__validate_bqstorage_returns_false_if_missing_dependency(self):
+ iterator = self._make_one(first_page_response=None) # not cached
+
+ def fail_bqstorage_import(name, globals, locals, fromlist, level):
+ # NOTE: *very* simplified, assuming a straightforward absolute import
+ return "bigquery_storage" in name or (
+ fromlist is not None and "bigquery_storage" in fromlist
+ )
+
+ no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import)
+
+ with no_bqstorage:
+ result = iterator._validate_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+
+ self.assertFalse(result)
+
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ def test__validate_bqstorage_returns_false_w_warning_if_obsolete_version(self):
+ from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+
+ iterator = self._make_one(first_page_response=None) # not cached
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.table._helpers.BQ_STORAGE_VERSIONS.verify_version",
+ side_effect=LegacyBigQueryStorageError("BQ Storage too old"),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ result = iterator._validate_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+
+ self.assertFalse(result)
+
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
def test_to_arrow(self):
from google.cloud.bigquery.schema import SchemaField
@@ -1969,7 +2329,7 @@ def test_to_arrow_w_empty_table(self):
@unittest.skipIf(
bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
)
- def test_to_arrow_max_results_w_create_bqstorage_warning(self):
+ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -1983,6 +2343,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
mock_client = _mock_client()
+ mock_bqstorage_client = mock.sentinel.bq_storage_client
row_iterator = self._make_one(
client=mock_client,
@@ -1993,7 +2354,7 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
)
with warnings.catch_warnings(record=True) as warned:
- row_iterator.to_arrow(create_bqstorage_client=True)
+ row_iterator.to_arrow(bqstorage_client=mock_bqstorage_client)
matches = [
warning
@@ -2003,7 +2364,50 @@ def test_to_arrow_max_results_w_create_bqstorage_warning(self):
and "REST" in str(warning)
]
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
- mock_client._create_bqstorage_client.assert_not_called()
+ self.assertIn(
+ __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel"
+ )
+ mock_client._ensure_bqstorage_client.assert_not_called()
+
+ @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING", mode="REQUIRED"),
+ SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ mock_client = _mock_client()
+
+ row_iterator = self._make_one(
+ client=mock_client,
+ api_request=api_request,
+ path=path,
+ schema=schema,
+ max_results=42,
+ )
+
+ with warnings.catch_warnings(record=True) as warned:
+ row_iterator.to_arrow(create_bqstorage_client=True)
+
+ matches = [
+ warning
+ for warning in warned
+ if warning.category is UserWarning
+ and "cannot use bqstorage_client" in str(warning).lower()
+ and "REST" in str(warning)
+ ]
+ self.assertFalse(matches)
+ mock_client._ensure_bqstorage_client.assert_not_called()
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
@unittest.skipIf(
@@ -2099,7 +2503,7 @@ def test_to_arrow_w_bqstorage_creates_client(self):
bqstorage_client._transport = mock.create_autospec(
big_query_read_grpc_transport.BigQueryReadGrpcTransport
)
- mock_client._create_bqstorage_client.return_value = bqstorage_client
+ mock_client._ensure_bqstorage_client.return_value = bqstorage_client
session = bigquery_storage.types.ReadSession()
bqstorage_client.create_read_session.return_value = session
row_iterator = mut.RowIterator(
@@ -2114,11 +2518,11 @@ def test_to_arrow_w_bqstorage_creates_client(self):
table=mut.TableReference.from_string("proj.dset.tbl"),
)
row_iterator.to_arrow(create_bqstorage_client=True)
- mock_client._create_bqstorage_client.assert_called_once()
+ mock_client._ensure_bqstorage_client.assert_called_once()
bqstorage_client._transport.grpc_channel.close.assert_called_once()
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
- def test_to_arrow_create_bqstorage_client_wo_bqstorage(self):
+ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2133,14 +2537,14 @@ def test_to_arrow_create_bqstorage_client_wo_bqstorage(self):
api_request = mock.Mock(return_value={"rows": rows})
mock_client = _mock_client()
- mock_client._create_bqstorage_client.return_value = None
+ mock_client._ensure_bqstorage_client.return_value = None
row_iterator = self._make_one(mock_client, api_request, path, schema)
tbl = row_iterator.to_arrow(create_bqstorage_client=True)
# The client attempted to create a BQ Storage client, and even though
# that was not possible, results were still returned without errors.
- mock_client._create_bqstorage_client.assert_called_once()
+ mock_client._ensure_bqstorage_client.assert_called_once()
self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 2)
@@ -2236,7 +2640,6 @@ def test_to_arrow_w_pyarrow_none(self):
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable(self):
from google.cloud.bigquery.schema import SchemaField
- import types
schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
@@ -2279,7 +2682,6 @@ def test_to_dataframe_iterable(self):
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable_with_dtypes(self):
from google.cloud.bigquery.schema import SchemaField
- import types
schema = [
SchemaField("name", "STRING", mode="REQUIRED"),
@@ -2391,6 +2793,61 @@ def test_to_dataframe_iterable_w_bqstorage(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ @unittest.skipIf(
+ bigquery_storage is None, "Requires `google-cloud-bigquery-storage`"
+ )
+ @unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
+ def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
+ from google.cloud.bigquery import schema
+ from google.cloud.bigquery import table as mut
+
+ bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
+
+ iterator_schema = [
+ schema.SchemaField("name", "STRING", mode="REQUIRED"),
+ schema.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ path = "/foo"
+ api_request = mock.Mock(
+ side_effect=[
+ {
+ "rows": [{"f": [{"v": "Bengt"}, {"v": "32"}]}],
+ "pageToken": "NEXTPAGE",
+ },
+ {"rows": [{"f": [{"v": "Sven"}, {"v": "33"}]}]},
+ ]
+ )
+ row_iterator = mut.RowIterator(
+ _mock_client(),
+ api_request,
+ path,
+ iterator_schema,
+ table=mut.TableReference.from_string("proj.dset.tbl"),
+ selected_fields=iterator_schema,
+ max_results=25,
+ )
+
+ with warnings.catch_warnings(record=True) as warned:
+ dfs = row_iterator.to_dataframe_iterable(bqstorage_client=bqstorage_client)
+
+ # Was a warning emitted?
+ matches = [
+ warning
+ for warning in warned
+ if warning.category is UserWarning
+ and "cannot use bqstorage_client" in str(warning).lower()
+ and "REST" in str(warning)
+ ]
+ assert len(matches) == 1, "User warning was not emitted."
+ assert __file__ in str(matches[0]), "Warning emitted with incorrect stacklevel"
+
+ # Basic check of what we got as a result.
+ dataframes = list(dfs)
+ assert len(dataframes) == 2
+ assert isinstance(dataframes[0], pandas.DataFrame)
+ assert isinstance(dataframes[1], pandas.DataFrame)
+
@mock.patch("google.cloud.bigquery.table.pandas", new=None)
def test_to_dataframe_iterable_error_if_pandas_is_none(self):
from google.cloud.bigquery.schema import SchemaField
@@ -2452,10 +2909,7 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):
df = row_iterator.to_dataframe(create_bqstorage_client=False)
- tzinfo = None
- if PYARROW_VERSION >= PYARROW_TIMESTAMP_VERSION:
- tzinfo = datetime.timezone.utc
-
+ tzinfo = datetime.timezone.utc
self.assertIsInstance(df, pandas.DataFrame)
self.assertEqual(len(df), 2) # verify the number of rows
self.assertEqual(list(df.columns), ["some_timestamp"])
@@ -2753,6 +3207,18 @@ def test_to_dataframe_error_if_pandas_is_none(self):
with self.assertRaises(ValueError):
row_iterator.to_dataframe()
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ @mock.patch("google.cloud.bigquery.table.shapely", new=None)
+ def test_to_dataframe_error_if_shapely_is_none(self):
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The shapely library is not installed, please install "
+ "shapely to use the geography_as_object option."
+ ),
+ ):
+ self._make_one_from_data().to_dataframe(geography_as_object=True)
+
@unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_max_results_w_bqstorage_warning(self):
from google.cloud.bigquery.schema import SchemaField
@@ -2790,7 +3256,7 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self):
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
@unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
+ def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2804,6 +3270,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
mock_client = _mock_client()
+ mock_bqstorage_client = mock.sentinel.bq_storage_client
row_iterator = self._make_one(
client=mock_client,
@@ -2814,7 +3281,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
)
with warnings.catch_warnings(record=True) as warned:
- row_iterator.to_dataframe(create_bqstorage_client=True)
+ row_iterator.to_dataframe(bqstorage_client=mock_bqstorage_client)
matches = [
warning
@@ -2824,7 +3291,47 @@ def test_to_dataframe_max_results_w_create_bqstorage_warning(self):
and "REST" in str(warning)
]
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
- mock_client._create_bqstorage_client.assert_not_called()
+ self.assertIn(
+ __file__, str(matches[0]), msg="Warning emitted with incorrect stacklevel"
+ )
+ mock_client._ensure_bqstorage_client.assert_not_called()
+
+ @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING", mode="REQUIRED"),
+ SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ mock_client = _mock_client()
+
+ row_iterator = self._make_one(
+ client=mock_client,
+ api_request=api_request,
+ path=path,
+ schema=schema,
+ max_results=42,
+ )
+
+ with warnings.catch_warnings(record=True) as warned:
+ row_iterator.to_dataframe(create_bqstorage_client=True)
+
+ matches = [
+ warning
+ for warning in warned
+ if warning.category is UserWarning
+ and "cannot use bqstorage_client" in str(warning).lower()
+ and "REST" in str(warning)
+ ]
+ self.assertFalse(matches)
+ mock_client._ensure_bqstorage_client.assert_not_called()
@unittest.skipIf(pandas is None, "Requires `pandas`")
@unittest.skipIf(
@@ -2839,7 +3346,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self):
bqstorage_client._transport = mock.create_autospec(
big_query_read_grpc_transport.BigQueryReadGrpcTransport
)
- mock_client._create_bqstorage_client.return_value = bqstorage_client
+ mock_client._ensure_bqstorage_client.return_value = bqstorage_client
session = bigquery_storage.types.ReadSession()
bqstorage_client.create_read_session.return_value = session
row_iterator = mut.RowIterator(
@@ -2854,7 +3361,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self):
table=mut.TableReference.from_string("proj.dset.tbl"),
)
row_iterator.to_dataframe(create_bqstorage_client=True)
- mock_client._create_bqstorage_client.assert_called_once()
+ mock_client._ensure_bqstorage_client.assert_called_once()
bqstorage_client._transport.grpc_channel.close.assert_called_once()
@unittest.skipIf(pandas is None, "Requires `pandas`")
@@ -3469,6 +3976,199 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_dataframe_geography_as_object(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)"),
+ ("bar", None),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+ ),
+ )
+ df = row_iterator.to_dataframe(
+ create_bqstorage_client=False, geography_as_object=True,
+ )
+ self.assertIsInstance(df, pandas.DataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "object")
+ self.assertIsInstance(df.geog, pandas.Series)
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog], ["Point", "float", "Polygon"]
+ )
+
+ @mock.patch("google.cloud.bigquery.table.geopandas", new=None)
+ def test_to_geodataframe_error_if_geopandas_is_none(self):
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The geopandas library is not installed, please install "
+ "geopandas to use the to_geodataframe() function."
+ ),
+ ):
+ self._make_one_from_data().to_geodataframe()
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)"),
+ ("bar", None),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))"),
+ ),
+ )
+ df = row_iterator.to_geodataframe(create_bqstorage_client=False)
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "geometry")
+ self.assertIsInstance(df.geog, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(df.crs.srs, "EPSG:4326")
+ self.assertEqual(df.crs.name, "WGS 84")
+ self.assertEqual(df.geog.crs.srs, "EPSG:4326")
+ self.assertEqual(df.geog.crs.name, "WGS 84")
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_ambiguous_geog(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+ )
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "There is more than one GEOGRAPHY column in the result. "
+ "The geography_column argument must be used to specify which "
+ "one to use to create a GeoDataFrame"
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_bad_geography_column(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
+ )
+ with self.assertRaisesRegex(
+ ValueError,
+ re.escape(
+ "The given geography column, xxx, doesn't name"
+ " a GEOGRAPHY column in the result."
+ ),
+ ):
+ row_iterator.to_geodataframe(
+ create_bqstorage_client=False, geography_column="xxx"
+ )
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_no_geog(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "STRING")), ()
+ )
+ with self.assertRaisesRegex(
+ TypeError,
+ re.escape(
+ "There must be at least one GEOGRAPHY column"
+ " to create a GeoDataFrame"
+ ),
+ ):
+ row_iterator.to_geodataframe(create_bqstorage_client=False)
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ def test_to_geodataframe_w_geography_column(self):
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
+ (
+ ("foo", "Point(0 0)", "Point(1 1)"),
+ ("bar", None, "Point(2 2)"),
+ ("baz", "Polygon((0 0, 0 1, 1 0, 0 0))", "Point(3 3)"),
+ ),
+ )
+ df = row_iterator.to_geodataframe(
+ create_bqstorage_client=False, geography_column="geog"
+ )
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 3) # verify the number of rows
+ self.assertEqual(list(df), ["name", "geog", "geog2"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.geog.dtype.name, "geometry")
+ self.assertEqual(df.geog2.dtype.name, "object")
+ self.assertIsInstance(df.geog, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(list(map(str, df.geog.area)), ["0.0", "nan", "0.5"])
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog], ["Point", "NoneType", "Polygon"]
+ )
+
+ # Geog2 isn't a GeoSeries, but it contains geomentries:
+ self.assertIsInstance(df.geog2, pandas.Series)
+ self.assertEqual(
+ [v.__class__.__name__ for v in df.geog2], ["Point", "Point", "Point"]
+ )
+ # and can easily be converted to a GeoSeries
+ self.assertEqual(
+ list(map(str, geopandas.GeoSeries(df.geog2).area)), ["0.0", "0.0", "0.0"]
+ )
+
+ @unittest.skipIf(geopandas is None, "Requires `geopandas`")
+ @mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe")
+ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
+ """
+ RowIterator.to_geodataframe just delegates to RowIterator.to_dataframe.
+
+ This test just demonstrates that. We don't need to test all the
+ variations, which are tested for to_dataframe.
+ """
+ import numpy
+ from shapely import wkt
+
+ row_iterator = self._make_one_from_data(
+ (("name", "STRING"), ("g", "GEOGRAPHY"))
+ )
+ bqstorage_client = object()
+ dtypes = dict(xxx=numpy.dtype("int64"))
+ progress_bar_type = "normal"
+ create_bqstorage_client = False
+ date_as_object = False
+ geography_column = "g"
+
+ to_dataframe.return_value = pandas.DataFrame(
+ dict(name=["foo"], g=[wkt.loads("point(0 0)")],)
+ )
+
+ df = row_iterator.to_geodataframe(
+ bqstorage_client=bqstorage_client,
+ dtypes=dtypes,
+ progress_bar_type=progress_bar_type,
+ create_bqstorage_client=create_bqstorage_client,
+ date_as_object=date_as_object,
+ geography_column=geography_column,
+ )
+
+ to_dataframe.assert_called_once_with(
+ bqstorage_client,
+ dtypes,
+ progress_bar_type,
+ create_bqstorage_client,
+ date_as_object,
+ geography_as_object=True,
+ )
+
+ self.assertIsInstance(df, geopandas.GeoDataFrame)
+ self.assertEqual(len(df), 1) # verify the number of rows
+ self.assertEqual(list(df), ["name", "g"]) # verify the column names
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.g.dtype.name, "geometry")
+ self.assertIsInstance(df.g, geopandas.GeoSeries)
+ self.assertEqual(list(map(str, df.area)), ["0.0"])
+ self.assertEqual(list(map(str, df.g.area)), ["0.0"])
+ self.assertEqual([v.__class__.__name__ for v in df.g], ["Point"])
+
class TestPartitionRange(unittest.TestCase):
def _get_target_class(self):