Skip to content

Commit e97771e

Browse files
plamuttswast
authored andcommitted
fix(bigquery): preserve job config passed to Client methods (#9735)
This commit assures that Client's methods that accept job config as an argument operate on deep copies, and do not modify the original job config instances passed to them.
1 parent 96baa3d commit e97771e

File tree

2 files changed

+139
-8
lines changed

2 files changed

+139
-8
lines changed

bigquery/google/cloud/bigquery/client.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def __init__(
187187

188188
self._connection = Connection(self, **kw_args)
189189
self._location = location
190-
self._default_query_job_config = default_query_job_config
190+
self._default_query_job_config = copy.deepcopy(default_query_job_config)
191191

192192
@property
193193
def location(self):
@@ -1381,6 +1381,7 @@ def load_table_from_uri(
13811381
destination = _table_arg_to_table_ref(destination, default_project=self.project)
13821382

13831383
if job_config:
1384+
job_config = copy.deepcopy(job_config)
13841385
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
13851386

13861387
load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config)
@@ -1465,6 +1466,7 @@ def load_table_from_file(
14651466
destination = _table_arg_to_table_ref(destination, default_project=self.project)
14661467
job_ref = job._JobReference(job_id, project=project, location=location)
14671468
if job_config:
1469+
job_config = copy.deepcopy(job_config)
14681470
_verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
14691471
load_job = job.LoadJob(job_ref, None, destination, self, job_config)
14701472
job_resource = load_job.to_api_repr()
@@ -1969,6 +1971,8 @@ def copy_table(
19691971

19701972
if job_config:
19711973
_verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig)
1974+
job_config = copy.deepcopy(job_config)
1975+
19721976
copy_job = job.CopyJob(
19731977
job_ref, sources, destination, client=self, job_config=job_config
19741978
)
@@ -2049,6 +2053,8 @@ def extract_table(
20492053
_verify_job_config_type(
20502054
job_config, google.cloud.bigquery.job.ExtractJobConfig
20512055
)
2056+
job_config = copy.deepcopy(job_config)
2057+
20522058
extract_job = job.ExtractJob(
20532059
job_ref, source, destination_uris, client=self, job_config=job_config
20542060
)
@@ -2112,6 +2118,8 @@ def query(
21122118
if location is None:
21132119
location = self.location
21142120

2121+
job_config = copy.deepcopy(job_config)
2122+
21152123
if self._default_query_job_config:
21162124
if job_config:
21172125
_verify_job_config_type(
@@ -2129,7 +2137,7 @@ def query(
21292137
self._default_query_job_config,
21302138
google.cloud.bigquery.job.QueryJobConfig,
21312139
)
2132-
job_config = self._default_query_job_config
2140+
job_config = copy.deepcopy(self._default_query_job_config)
21332141

21342142
job_ref = job._JobReference(job_id, project=project, location=location)
21352143
query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config)

bigquery/tests/unit/test_client.py

Lines changed: 129 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2997,6 +2997,8 @@ def test_load_table_from_uri(self):
29972997
creds = _make_credentials()
29982998
http = object()
29992999
job_config = LoadJobConfig()
3000+
original_config_copy = copy.deepcopy(job_config)
3001+
30003002
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
30013003
conn = client._connection = make_connection(RESOURCE)
30023004
destination = client.dataset(self.DS_ID).table(DESTINATION)
@@ -3010,6 +3012,9 @@ def test_load_table_from_uri(self):
30103012
method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE
30113013
)
30123014

3015+
# the original config object should not have been modified
3016+
self.assertEqual(job_config.to_api_repr(), original_config_copy.to_api_repr())
3017+
30133018
self.assertIsInstance(job, LoadJob)
30143019
self.assertIsInstance(job._configuration, LoadJobConfig)
30153020
self.assertIs(job._client, client)
@@ -3496,19 +3501,24 @@ def test_copy_table_w_valid_job_config(self):
34963501
creds = _make_credentials()
34973502
http = object()
34983503
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
3499-
job_config = CopyJobConfig()
35003504
conn = client._connection = make_connection(RESOURCE)
35013505
dataset = client.dataset(self.DS_ID)
35023506
source = dataset.table(SOURCE)
35033507
destination = dataset.table(DESTINATION)
35043508

3509+
job_config = CopyJobConfig()
3510+
original_config_copy = copy.deepcopy(job_config)
35053511
job = client.copy_table(source, destination, job_id=JOB, job_config=job_config)
3512+
35063513
# Check that copy_table actually starts the job.
35073514
conn.api_request.assert_called_once_with(
35083515
method="POST", path="/projects/%s/jobs" % self.PROJECT, data=RESOURCE
35093516
)
35103517
self.assertIsInstance(job._configuration, CopyJobConfig)
35113518

3519+
# the original config object should not have been modified
3520+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
3521+
35123522
def test_extract_table(self):
35133523
from google.cloud.bigquery.job import ExtractJob
35143524

@@ -3679,6 +3689,7 @@ def test_extract_table_generated_job_id(self):
36793689
source = dataset.table(SOURCE)
36803690
job_config = ExtractJobConfig()
36813691
job_config.destination_format = DestinationFormat.NEWLINE_DELIMITED_JSON
3692+
original_config_copy = copy.deepcopy(job_config)
36823693

36833694
job = client.extract_table(source, DESTINATION, job_config=job_config)
36843695

@@ -3695,6 +3706,9 @@ def test_extract_table_generated_job_id(self):
36953706
self.assertEqual(job.source, source)
36963707
self.assertEqual(list(job.destination_uris), [DESTINATION])
36973708

3709+
# the original config object should not have been modified
3710+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
3711+
36983712
def test_extract_table_w_destination_uris(self):
36993713
from google.cloud.bigquery.job import ExtractJob
37003714

@@ -3840,6 +3854,7 @@ def test_query_w_explicit_job_config(self):
38403854
job_config = QueryJobConfig()
38413855
job_config.use_query_cache = True
38423856
job_config.maximum_bytes_billed = 2000
3857+
original_config_copy = copy.deepcopy(job_config)
38433858

38443859
client.query(
38453860
query, job_id=job_id, location=self.LOCATION, job_config=job_config
@@ -3850,6 +3865,105 @@ def test_query_w_explicit_job_config(self):
38503865
method="POST", path="/projects/PROJECT/jobs", data=resource
38513866
)
38523867

3868+
# the original config object should not have been modified
3869+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
3870+
3871+
def test_query_preserving_explicit_job_config(self):
3872+
job_id = "some-job-id"
3873+
query = "select count(*) from persons"
3874+
resource = {
3875+
"jobReference": {
3876+
"jobId": job_id,
3877+
"projectId": self.PROJECT,
3878+
"location": self.LOCATION,
3879+
},
3880+
"configuration": {
3881+
"query": {
3882+
"query": query,
3883+
"useLegacySql": False,
3884+
"useQueryCache": True,
3885+
"maximumBytesBilled": "2000",
3886+
}
3887+
},
3888+
}
3889+
3890+
creds = _make_credentials()
3891+
http = object()
3892+
3893+
from google.cloud.bigquery import QueryJobConfig
3894+
3895+
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http,)
3896+
conn = client._connection = make_connection(resource)
3897+
3898+
job_config = QueryJobConfig()
3899+
job_config.use_query_cache = True
3900+
job_config.maximum_bytes_billed = 2000
3901+
original_config_copy = copy.deepcopy(job_config)
3902+
3903+
client.query(
3904+
query, job_id=job_id, location=self.LOCATION, job_config=job_config
3905+
)
3906+
3907+
# Check that query actually starts the job.
3908+
conn.api_request.assert_called_once_with(
3909+
method="POST", path="/projects/PROJECT/jobs", data=resource
3910+
)
3911+
3912+
# the original config object should not have been modified
3913+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
3914+
3915+
def test_query_preserving_explicit_default_job_config(self):
3916+
job_id = "some-job-id"
3917+
query = "select count(*) from persons"
3918+
resource = {
3919+
"jobReference": {
3920+
"jobId": job_id,
3921+
"projectId": self.PROJECT,
3922+
"location": self.LOCATION,
3923+
},
3924+
"configuration": {
3925+
"query": {
3926+
"query": query,
3927+
"defaultDataset": {
3928+
"projectId": self.PROJECT,
3929+
"datasetId": "some-dataset",
3930+
},
3931+
"useLegacySql": False,
3932+
"maximumBytesBilled": "1000",
3933+
}
3934+
},
3935+
}
3936+
3937+
creds = _make_credentials()
3938+
http = object()
3939+
3940+
from google.cloud.bigquery import QueryJobConfig, DatasetReference
3941+
3942+
default_job_config = QueryJobConfig()
3943+
default_job_config.default_dataset = DatasetReference(
3944+
self.PROJECT, "some-dataset"
3945+
)
3946+
default_job_config.maximum_bytes_billed = 1000
3947+
default_config_copy = copy.deepcopy(default_job_config)
3948+
3949+
client = self._make_one(
3950+
project=self.PROJECT,
3951+
credentials=creds,
3952+
_http=http,
3953+
default_query_job_config=default_job_config,
3954+
)
3955+
conn = client._connection = make_connection(resource)
3956+
3957+
client.query(query, job_id=job_id, location=self.LOCATION, job_config=None)
3958+
3959+
# Check that query actually starts the job.
3960+
conn.api_request.assert_called_once_with(
3961+
method="POST", path="/projects/PROJECT/jobs", data=resource
3962+
)
3963+
3964+
# the original default config object should not have been modified
3965+
assert default_job_config.to_api_repr() == default_config_copy.to_api_repr()
3966+
38533967
def test_query_w_invalid_job_config(self):
38543968
from google.cloud.bigquery import QueryJobConfig, DatasetReference
38553969
from google.cloud.bigquery import job
@@ -5429,22 +5543,24 @@ def test_load_table_from_file_resumable(self):
54295543

54305544
client = self._make_client()
54315545
file_obj = self._make_file_obj()
5546+
job_config = self._make_config()
5547+
original_config_copy = copy.deepcopy(job_config)
54325548

54335549
do_upload_patch = self._make_do_upload_patch(
54345550
client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION
54355551
)
54365552
with do_upload_patch as do_upload:
54375553
client.load_table_from_file(
5438-
file_obj,
5439-
self.TABLE_REF,
5440-
job_id="job_id",
5441-
job_config=self._make_config(),
5554+
file_obj, self.TABLE_REF, job_id="job_id", job_config=job_config,
54425555
)
54435556

54445557
do_upload.assert_called_once_with(
54455558
file_obj, self.EXPECTED_CONFIGURATION, _DEFAULT_NUM_RETRIES
54465559
)
54475560

5561+
# the original config object should not have been modified
5562+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
5563+
54485564
def test_load_table_from_file_w_explicit_project(self):
54495565
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
54505566

@@ -5790,6 +5906,7 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
57905906
job_config = job.LoadJobConfig(
57915907
write_disposition=job.WriteDisposition.WRITE_TRUNCATE
57925908
)
5909+
original_config_copy = copy.deepcopy(job_config)
57935910

57945911
get_table_patch = mock.patch(
57955912
"google.cloud.bigquery.client.Client.get_table",
@@ -5826,6 +5943,9 @@ def test_load_table_from_dataframe_w_custom_job_config(self):
58265943
assert sent_config.source_format == job.SourceFormat.PARQUET
58275944
assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE
58285945

5946+
# the original config object should not have been modified
5947+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
5948+
58295949
@unittest.skipIf(pandas is None, "Requires `pandas`")
58305950
@unittest.skipIf(pyarrow is None, "Requires `pyarrow`")
58315951
def test_load_table_from_dataframe_w_automatic_schema(self):
@@ -6466,6 +6586,7 @@ def test_load_table_from_json_non_default_args(self):
64666586
]
64676587
job_config = job.LoadJobConfig(schema=schema)
64686588
job_config._properties["load"]["unknown_field"] = "foobar"
6589+
original_config_copy = copy.deepcopy(job_config)
64696590

64706591
load_patch = mock.patch(
64716592
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
@@ -6493,13 +6614,15 @@ def test_load_table_from_json_non_default_args(self):
64936614
)
64946615

64956616
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
6496-
assert job_config.source_format is None # the original was not modified
64976617
assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
64986618
assert sent_config.schema == schema
64996619
assert not sent_config.autodetect
65006620
# all properties should have been cloned and sent to the backend
65016621
assert sent_config._properties.get("load", {}).get("unknown_field") == "foobar"
65026622

6623+
# the original config object should not have been modified
6624+
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
6625+
65036626
def test_load_table_from_json_w_invalid_job_config(self):
65046627
from google.cloud.bigquery import job
65056628

0 commit comments

Comments
 (0)