Skip to content

Commit f27a546

Browse files
authored
fix(auth): fail-fast on invalid or non-workload certificate configs in agent identity discovery (#17116)
The `GOOGLE_API_CERTIFICATE_CONFIG` environment variable is shared between Managed Workload Identity (MWLID) token-binding and other flows like Enterprise Certificate Provider (ECP) configs (e.g., PKCS#11). When this env var is set, agent identity discovery is triggered. If the config file exists but lacks the `"workload"` section (as with ECP configurations),we should exit early and return `None` to avoid delaying non-workload flows. In addition, if the config file on disk had syntax errors or invalid JSON, the previous logic entered a 30-second blocking retry loop before failing with `RefreshError`. To resolve this, the lookup logic now assumes that if the config file exists on disk, it is in its final format. If the file exists but lacks a `"workload"` section, has syntax errors, or is unreadable, we return `None` immediately to fail-fast and avoid startup delays. b/512912028 fixes #17145
1 parent c375821 commit f27a546

3 files changed

Lines changed: 210 additions & 43 deletions

File tree

packages/google-auth/google/auth/_agent_identity_utils.py

Lines changed: 64 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,41 +80,86 @@ def get_agent_identity_certificate_path():
8080
import json
8181

8282
cert_config_path = os.environ.get(environment_vars.GOOGLE_API_CERTIFICATE_CONFIG)
83-
if not cert_config_path:
83+
84+
# Check if the well-known workload directory is mounted.
85+
well_known_dir = os.path.dirname(_WELL_KNOWN_CERT_PATH)
86+
has_well_known_dir = os.path.exists(well_known_dir)
87+
88+
# If we have neither a config path nor a well-known mount directory, exit immediately.
89+
if not cert_config_path and not has_well_known_dir:
8490
return None
8591

86-
has_logged_warning = False
92+
has_logged_config_warning = False
93+
has_logged_cert_warning = False
8794

8895
for interval in _POLLING_INTERVALS:
8996
try:
90-
with open(cert_config_path, "r") as f:
91-
cert_config = json.load(f)
92-
cert_path = (
93-
cert_config.get("cert_configs", {})
94-
.get("workload", {})
95-
.get("cert_path")
97+
# Path A: Config file is explicitly set
98+
if cert_config_path:
99+
with open(cert_config_path, "r") as f:
100+
cert_config = json.load(f)
101+
102+
cert_configs = (
103+
cert_config.get("cert_configs")
104+
if isinstance(cert_config, dict)
105+
else None
96106
)
107+
workload_config = (
108+
cert_configs.get("workload")
109+
if isinstance(cert_configs, dict)
110+
else None
111+
)
112+
113+
if (
114+
not isinstance(workload_config, dict)
115+
or "cert_path" not in workload_config
116+
):
117+
return None
118+
119+
cert_path = workload_config["cert_path"]
97120
if _is_certificate_file_ready(cert_path):
98121
return cert_path
99-
except (IOError, ValueError, KeyError):
100-
if not has_logged_warning:
122+
123+
# The config was parsed, but the cert file is not ready yet
124+
target_path = cert_path
125+
126+
# Path B: Config is NOT set, fallback to the well-known path
127+
else:
128+
if _is_certificate_file_ready(_WELL_KNOWN_CERT_PATH):
129+
return _WELL_KNOWN_CERT_PATH
130+
131+
# The well-known cert file is not ready yet
132+
target_path = _WELL_KNOWN_CERT_PATH
133+
134+
# Log a warning on the first failed attempt to load the certificate file
135+
if not has_logged_cert_warning:
101136
_LOGGER.warning(
102-
"Certificate config file not found at %s (from %s environment "
103-
"variable). Retrying for up to %s seconds.",
104-
cert_config_path,
137+
"Certificate file not ready at %s. Retrying until startup timeout (up to %s seconds total)...",
138+
target_path,
139+
_TOTAL_TIMEOUT,
140+
)
141+
has_logged_cert_warning = True
142+
143+
except (IOError, ValueError, KeyError) as e:
144+
if cert_config_path and os.path.exists(cert_config_path):
145+
# If the file exists but has invalid JSON or is unreadable,
146+
# we assume it is in its final format and fail-fast by returning None.
147+
return None
148+
149+
if not has_logged_config_warning and cert_config_path:
150+
_LOGGER.warning(
151+
"Certificate config file not found or incomplete: %s (from %s "
152+
"environment variable). Retrying until startup timeout (up to %s seconds total)...",
153+
e,
105154
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG,
106155
_TOTAL_TIMEOUT,
107156
)
108-
has_logged_warning = True
157+
has_logged_config_warning = True
109158
pass
110159

111-
# As a fallback, check the well-known certificate path.
112-
if _is_certificate_file_ready(_WELL_KNOWN_CERT_PATH):
113-
return _WELL_KNOWN_CERT_PATH
114-
115160
# A sleep is required in two cases:
116161
# 1. The config file is not found (the except block).
117-
# 2. The config file is found, but the certificate is not yet available.
162+
# 2. The config file/well-known path is found, but the certificate is not yet available.
118163
# In both cases, we need to poll, so we sleep on every iteration
119164
# that doesn't return a certificate.
120165
time.sleep(interval)

packages/google-auth/noxfile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ def docs(session):
196196
"""Build the docs for this library."""
197197

198198
session.install("-e", ".[aiohttp]")
199+
session.install("requests==2.31.0")
199200
session.install("sphinx", "alabaster", "recommonmark", "sphinx-docstring-typing")
200201

201202
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)

packages/google-auth/tests/test_agent_identity_utils.py

Lines changed: 145 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_get_agent_identity_certificate_path_retry(
172172
with pytest.raises(exceptions.RefreshError):
173173
_agent_identity_utils.get_agent_identity_certificate_path()
174174

175-
assert mock_sleep.call_count == 100
175+
assert mock_sleep.call_count == len(_agent_identity_utils._POLLING_INTERVALS)
176176

177177
@mock.patch("time.sleep")
178178
def test_get_agent_identity_certificate_path_failure(
@@ -191,7 +191,7 @@ def test_get_agent_identity_certificate_path_failure(
191191
environment_vars.GOOGLE_API_PREVENT_AGENT_TOKEN_SHARING_FOR_GCP_SERVICES
192192
in str(excinfo.value)
193193
)
194-
assert mock_sleep.call_count == 100
194+
assert mock_sleep.call_count == len(_agent_identity_utils._POLLING_INTERVALS)
195195

196196
@mock.patch("time.sleep")
197197
@mock.patch("os.path.exists")
@@ -215,7 +215,149 @@ def exists_side_effect(path):
215215
with pytest.raises(exceptions.RefreshError):
216216
_agent_identity_utils.get_agent_identity_certificate_path()
217217

218-
assert mock_sleep.call_count == 100
218+
assert mock_sleep.call_count == len(_agent_identity_utils._POLLING_INTERVALS)
219+
220+
@mock.patch("time.sleep")
221+
def test_get_agent_identity_certificate_path_non_workload_config(
222+
self, mock_sleep, tmpdir, monkeypatch
223+
):
224+
config_path = tmpdir.join("config.json")
225+
config_path.write(
226+
json.dumps({"cert_configs": {"pkcs11": {"module": "some_module"}}})
227+
)
228+
monkeypatch.setenv(
229+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, str(config_path)
230+
)
231+
232+
result = _agent_identity_utils.get_agent_identity_certificate_path()
233+
234+
# Should return None immediately without polling
235+
assert result is None
236+
mock_sleep.assert_not_called()
237+
238+
@mock.patch("time.sleep")
239+
def test_get_agent_identity_certificate_path_invalid_json(
240+
self, mock_sleep, tmpdir, monkeypatch
241+
):
242+
config_path = tmpdir.join("config.json")
243+
config_path.write("{invalid_json: true}") # Invalid JSON missing quotes
244+
monkeypatch.setenv(
245+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, str(config_path)
246+
)
247+
248+
result = _agent_identity_utils.get_agent_identity_certificate_path()
249+
250+
# Should return None immediately without polling/sleeping
251+
assert result is None
252+
mock_sleep.assert_not_called()
253+
254+
@mock.patch("time.sleep")
255+
def test_get_agent_identity_certificate_path_non_dict_json(
256+
self, mock_sleep, tmpdir, monkeypatch
257+
):
258+
config_path = tmpdir.join("config.json")
259+
config_path.write(json.dumps(["not", "a", "dict"])) # Valid JSON list
260+
monkeypatch.setenv(
261+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, str(config_path)
262+
)
263+
264+
result = _agent_identity_utils.get_agent_identity_certificate_path()
265+
266+
# Should fail fast immediately and return None without polling
267+
assert result is None
268+
mock_sleep.assert_not_called()
269+
270+
@mock.patch("time.sleep")
271+
def test_get_agent_identity_certificate_path_workload_config_missing_cert_path(
272+
self, mock_sleep, tmpdir, monkeypatch
273+
):
274+
config_path = tmpdir.join("config.json")
275+
config_path.write(json.dumps({"cert_configs": {"workload": {}}}))
276+
monkeypatch.setenv(
277+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, str(config_path)
278+
)
279+
280+
result = _agent_identity_utils.get_agent_identity_certificate_path()
281+
282+
# Should return None immediately without polling
283+
assert result is None
284+
mock_sleep.assert_not_called()
285+
286+
@mock.patch("time.sleep")
287+
@mock.patch("os.path.exists")
288+
@mock.patch("google.auth._agent_identity_utils._is_certificate_file_ready")
289+
def test_get_agent_identity_certificate_path_no_config_but_has_well_known_dir(
290+
self, mock_is_ready, mock_exists, mock_sleep, monkeypatch
291+
):
292+
monkeypatch.delenv(
293+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, raising=False
294+
)
295+
296+
# Simulate that the well-known workload mount directory exists, and the cert is ready
297+
mock_exists.return_value = True
298+
mock_is_ready.return_value = True
299+
300+
result = _agent_identity_utils.get_agent_identity_certificate_path()
301+
302+
# Should return the well-known path immediately
303+
assert result == _agent_identity_utils._WELL_KNOWN_CERT_PATH
304+
mock_sleep.assert_not_called()
305+
306+
@mock.patch("time.sleep")
307+
@mock.patch("os.path.exists")
308+
def test_get_agent_identity_certificate_path_no_config_no_well_known_dir(
309+
self, mock_exists, mock_sleep, monkeypatch
310+
):
311+
monkeypatch.delenv(
312+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, raising=False
313+
)
314+
315+
# Simulate that the well-known mount directory does NOT exist
316+
mock_exists.return_value = False
317+
318+
result = _agent_identity_utils.get_agent_identity_certificate_path()
319+
320+
# Should return None immediately without polling
321+
assert result is None
322+
mock_sleep.assert_not_called()
323+
324+
@mock.patch("time.sleep")
325+
@mock.patch("os.path.exists")
326+
@mock.patch("google.auth._agent_identity_utils._is_certificate_file_ready")
327+
def test_get_agent_identity_certificate_path_no_config_well_known_polling_success(
328+
self, mock_is_ready, mock_exists, mock_sleep, monkeypatch
329+
):
330+
monkeypatch.delenv(
331+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, raising=False
332+
)
333+
334+
# Simulate that the directory exists, file appears on 2nd try
335+
mock_exists.return_value = True
336+
mock_is_ready.side_effect = [False, True]
337+
338+
result = _agent_identity_utils.get_agent_identity_certificate_path()
339+
340+
assert result == _agent_identity_utils._WELL_KNOWN_CERT_PATH
341+
assert mock_sleep.call_count == 1
342+
343+
@mock.patch("time.sleep")
344+
@mock.patch("os.path.exists")
345+
@mock.patch("google.auth._agent_identity_utils._is_certificate_file_ready")
346+
def test_get_agent_identity_certificate_path_no_config_well_known_polling_timeout(
347+
self, mock_is_ready, mock_exists, mock_sleep, monkeypatch
348+
):
349+
monkeypatch.delenv(
350+
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, raising=False
351+
)
352+
353+
# Simulate that the directory exists, but file never appears
354+
mock_exists.return_value = True
355+
mock_is_ready.return_value = False
356+
357+
with pytest.raises(exceptions.RefreshError):
358+
_agent_identity_utils.get_agent_identity_certificate_path()
359+
360+
assert mock_sleep.call_count == len(_agent_identity_utils._POLLING_INTERVALS)
219361

220362
@mock.patch("google.auth._agent_identity_utils.get_agent_identity_certificate_path")
221363
def test_get_and_parse_agent_identity_certificate_opted_out(
@@ -261,27 +403,6 @@ def test_get_and_parse_agent_identity_certificate_success(
261403
mock_parse_certificate.assert_called_once_with(b"cert_bytes")
262404
assert result == mock_parse_certificate.return_value
263405

264-
@mock.patch("time.sleep", return_value=None)
265-
@mock.patch("google.auth._agent_identity_utils._is_certificate_file_ready")
266-
def test_get_agent_identity_certificate_path_fallback_to_well_known_path(
267-
self, mock_is_ready, mock_sleep, monkeypatch
268-
):
269-
# Set a dummy config path that won't be found.
270-
monkeypatch.setenv(
271-
environment_vars.GOOGLE_API_CERTIFICATE_CONFIG, "/dummy/config.json"
272-
)
273-
274-
# First, the primary path from the (mocked) config is not ready.
275-
# Then, the fallback well-known path is ready.
276-
mock_is_ready.side_effect = [False, True]
277-
278-
result = _agent_identity_utils.get_agent_identity_certificate_path()
279-
280-
assert result == _agent_identity_utils._WELL_KNOWN_CERT_PATH
281-
# The sleep should have been called once before the fallback is checked.
282-
mock_sleep.assert_called_once()
283-
assert mock_is_ready.call_count == 2
284-
285406
def test_get_cached_cert_fingerprint_no_cert(self):
286407
with pytest.raises(ValueError, match="mTLS connection is not configured."):
287408
_agent_identity_utils.get_cached_cert_fingerprint(None)

0 commit comments

Comments
 (0)