|
28 | 28 | from unittest import mock
|
29 | 29 | import warnings
|
30 | 30 |
|
31 |
| -import requests |
| 31 | +import freezegun |
32 | 32 | import packaging
|
33 | 33 | import pytest
|
| 34 | +import requests |
| 35 | + |
| 36 | +import google.api |
34 | 37 |
|
35 | 38 |
|
36 | 39 | try:
|
|
55 | 58 | import google.cloud._helpers
|
56 | 59 | from google.cloud import bigquery
|
57 | 60 |
|
| 61 | +from google.cloud.bigquery import job as bqjob |
| 62 | +import google.cloud.bigquery._job_helpers |
58 | 63 | from google.cloud.bigquery.dataset import DatasetReference
|
59 | 64 | from google.cloud.bigquery import exceptions
|
60 | 65 | from google.cloud.bigquery import ParquetOptions
|
@@ -5308,6 +5313,173 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self):
|
5308 | 5313 | with pytest.raises(DataLoss, match="we lost your job, sorry"):
|
5309 | 5314 | client.query("SELECT 1;", job_id=None)
|
5310 | 5315 |
|
| 5316 | + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self): |
| 5317 | + from google.api_core.exceptions import Conflict |
| 5318 | + from google.api_core.exceptions import DataLoss |
| 5319 | + from google.cloud.bigquery.job import QueryJob |
| 5320 | + |
| 5321 | + creds = _make_credentials() |
| 5322 | + http = object() |
| 5323 | + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) |
| 5324 | + |
| 5325 | + job_create_error = Conflict("Job already exists.") |
| 5326 | + job_begin_patcher = mock.patch.object( |
| 5327 | + QueryJob, "_begin", side_effect=job_create_error |
| 5328 | + ) |
| 5329 | + get_job_patcher = mock.patch.object( |
| 5330 | + client, "get_job", side_effect=DataLoss("we lost your job, sorry") |
| 5331 | + ) |
| 5332 | + |
| 5333 | + with job_begin_patcher, get_job_patcher: |
| 5334 | + # If get job request fails but supposedly there does exist a job |
| 5335 | + # with this ID already, raise the exception explaining why we |
| 5336 | + # couldn't recover the job. |
| 5337 | + with pytest.raises(DataLoss, match="we lost your job, sorry"): |
| 5338 | + client.query( |
| 5339 | + "SELECT 1;", |
| 5340 | + job_id=None, |
| 5341 | + # Explicitly test with no retries to make sure those branches are covered. |
| 5342 | + retry=None, |
| 5343 | + job_retry=None, |
| 5344 | + ) |
| 5345 | + |
| 5346 | + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): |
| 5347 | + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 |
| 5348 | +
|
| 5349 | + Sometimes after a Conflict, the fetch fails with a 404, but we know |
| 5350 | + because of the conflict that really the job does exist. Retry until we |
| 5351 | + get the job status (or timeout). |
| 5352 | + """ |
| 5353 | + job_id = "abc123" |
| 5354 | + creds = _make_credentials() |
| 5355 | + http = object() |
| 5356 | + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) |
| 5357 | + conn = client._connection = make_connection( |
| 5358 | + # We're mocking QueryJob._begin, so this is only going to be |
| 5359 | + # jobs.get requests and responses. |
| 5360 | + google.api_core.exceptions.TooManyRequests("this is retriable by default"), |
| 5361 | + google.api_core.exceptions.NotFound("we lost your job"), |
| 5362 | + google.api_core.exceptions.NotFound("we lost your job again, sorry"), |
| 5363 | + { |
| 5364 | + "jobReference": { |
| 5365 | + "projectId": self.PROJECT, |
| 5366 | + "location": "TESTLOC", |
| 5367 | + "jobId": job_id, |
| 5368 | + } |
| 5369 | + }, |
| 5370 | + ) |
| 5371 | + |
| 5372 | + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") |
| 5373 | + job_begin_patcher = mock.patch.object( |
| 5374 | + bqjob.QueryJob, "_begin", side_effect=job_create_error |
| 5375 | + ) |
| 5376 | + job_id_patcher = mock.patch.object( |
| 5377 | + google.cloud.bigquery._job_helpers, |
| 5378 | + "make_job_id", |
| 5379 | + return_value=job_id, |
| 5380 | + ) |
| 5381 | + |
| 5382 | + with job_begin_patcher, job_id_patcher: |
| 5383 | + # If get job request fails there does exist a job |
| 5384 | + # with this ID already, retry 404 until we get it (or fails for a |
| 5385 | + # non-retriable reason, see other tests). |
| 5386 | + result = client.query("SELECT 1;", job_id=None) |
| 5387 | + |
| 5388 | + jobs_get_path = mock.call( |
| 5389 | + method="GET", |
| 5390 | + path=f"/projects/{self.PROJECT}/jobs/{job_id}", |
| 5391 | + query_params={ |
| 5392 | + "projection": "full", |
| 5393 | + }, |
| 5394 | + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, |
| 5395 | + ) |
| 5396 | + conn.api_request.assert_has_calls( |
| 5397 | + # Double-check that it was jobs.get that was called for each of our |
| 5398 | + # mocked responses. |
| 5399 | + [jobs_get_path] |
| 5400 | + * 4, |
| 5401 | + ) |
| 5402 | + assert result.job_id == job_id |
| 5403 | + |
| 5404 | + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( |
| 5405 | + self, |
| 5406 | + ): |
| 5407 | + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 |
| 5408 | +
|
| 5409 | + Sometimes after a Conflict, the fetch fails with a 404. If it keeps |
| 5410 | + failing with a 404, assume that the job actually doesn't exist. |
| 5411 | + """ |
| 5412 | + job_id_1 = "abc123" |
| 5413 | + job_id_2 = "xyz789" |
| 5414 | + creds = _make_credentials() |
| 5415 | + http = object() |
| 5416 | + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) |
| 5417 | + |
| 5418 | + # We're mocking QueryJob._begin, so that the connection should only get |
| 5419 | + # jobs.get requests. |
| 5420 | + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") |
| 5421 | + job_begin_patcher = mock.patch.object( |
| 5422 | + bqjob.QueryJob, "_begin", side_effect=job_create_error |
| 5423 | + ) |
| 5424 | + conn = client._connection = make_connection( |
| 5425 | + google.api_core.exceptions.NotFound("we lost your job again, sorry"), |
| 5426 | + { |
| 5427 | + "jobReference": { |
| 5428 | + "projectId": self.PROJECT, |
| 5429 | + "location": "TESTLOC", |
| 5430 | + "jobId": job_id_2, |
| 5431 | + } |
| 5432 | + }, |
| 5433 | + ) |
| 5434 | + |
| 5435 | + # Choose a small deadline so the 404 retries give up. |
| 5436 | + retry = ( |
| 5437 | + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) |
| 5438 | + ) |
| 5439 | + job_id_patcher = mock.patch.object( |
| 5440 | + google.cloud.bigquery._job_helpers, |
| 5441 | + "make_job_id", |
| 5442 | + side_effect=[job_id_1, job_id_2], |
| 5443 | + ) |
| 5444 | + retry_patcher = mock.patch.object( |
| 5445 | + google.cloud.bigquery.retry, |
| 5446 | + "_DEFAULT_GET_JOB_CONFLICT_RETRY", |
| 5447 | + retry, |
| 5448 | + ) |
| 5449 | + |
| 5450 | + with freezegun.freeze_time( |
| 5451 | + "2025-01-01 00:00:00", |
| 5452 | + # 10x the retry deadline to guarantee a timeout. |
| 5453 | + auto_tick_seconds=10, |
| 5454 | + ), job_begin_patcher, job_id_patcher, retry_patcher: |
| 5455 | + # If get job request fails there does exist a job |
| 5456 | + # with this ID already, retry 404 until we get it (or fails for a |
| 5457 | + # non-retriable reason, see other tests). |
| 5458 | + result = client.query("SELECT 1;", job_id=None) |
| 5459 | + |
| 5460 | + jobs_get_path_1 = mock.call( |
| 5461 | + method="GET", |
| 5462 | + path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", |
| 5463 | + query_params={ |
| 5464 | + "projection": "full", |
| 5465 | + }, |
| 5466 | + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, |
| 5467 | + ) |
| 5468 | + jobs_get_path_2 = mock.call( |
| 5469 | + method="GET", |
| 5470 | + path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", |
| 5471 | + query_params={ |
| 5472 | + "projection": "full", |
| 5473 | + }, |
| 5474 | + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, |
| 5475 | + ) |
| 5476 | + conn.api_request.assert_has_calls( |
| 5477 | + # Double-check that it was jobs.get that was called for each of our |
| 5478 | + # mocked responses. |
| 5479 | + [jobs_get_path_1, jobs_get_path_2], |
| 5480 | + ) |
| 5481 | + assert result.job_id == job_id_2 |
| 5482 | + |
5311 | 5483 | def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self):
|
5312 | 5484 | from google.api_core.exceptions import Conflict
|
5313 | 5485 | from google.cloud.bigquery.job import QueryJob
|
|
0 commit comments