Skip to content

Commit abab226

Browse files
authored
[ESM] Fix flaky SQS ReportBatchItemFailures test with proper visiblity timeouts (#12323)
1 parent 4285fb1 commit abab226

File tree

3 files changed

+22
-9
lines changed

3 files changed

+22
-9
lines changed

tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,6 @@ def receive_dlq():
389389
snapshot.match("messages", messages)
390390

391391

392-
# TODO: flaky against AWS
393392
@markers.aws.validated
394393
def test_report_batch_item_failures(
395394
create_lambda_function,
@@ -412,19 +411,27 @@ def test_report_batch_item_failures(
412411
"get_destination_queue_url", aws_client.sqs.get_queue_url(QueueName=destination_queue_name)
413412
)
414413

415-
# timeout in seconds, used for both the lambda and the queue visibility timeout.
416-
# increase to 10 if testing against AWS fails.
417-
retry_timeout = 8
414+
# If an SQS queue is not receiving a lot of traffic, Lambda can take up to 20s between invocations.
415+
# See AWS docs https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html.
416+
retry_timeout = 6
417+
visibility_timeout = 8
418418
retries = 2
419419

420+
# AWS recommends a visibility timeout should be x6 a Lambda's retry timeout. To ensure a short test
421+
# runtime, we just want to ensure messages are re-queued a couple of seconda after any potential timeouts.
422+
# See https://docs.aws.amazon.com/lambda/latest/dg/services-sqs-configure.html#events-sqs-queueconfig
423+
assert visibility_timeout > retry_timeout, (
424+
"A lambda needs to finish processing prior to re-queuing invisible messages"
425+
)
426+
420427
# set up lambda function
421428
function_name = f"failing-lambda-{short_uid()}"
422429
create_lambda_function(
423430
func_name=function_name,
424431
handler_file=LAMBDA_SQS_BATCH_ITEM_FAILURE_FILE,
425432
runtime=Runtime.python3_12,
426433
role=lambda_su_role,
427-
timeout=retry_timeout, # timeout needs to be <= than visibility timeout
434+
timeout=retry_timeout,
428435
envvars={"DESTINATION_QUEUE_URL": destination_url},
429436
)
430437

@@ -441,7 +448,7 @@ def test_report_batch_item_failures(
441448
Attributes={
442449
"FifoQueue": "true",
443450
# the visibility timeout is implicitly also the time between retries
444-
"VisibilityTimeout": str(retry_timeout),
451+
"VisibilityTimeout": str(visibility_timeout),
445452
"RedrivePolicy": json.dumps(
446453
{"deadLetterTargetArn": event_dlq_arn, "maxReceiveCount": retries}
447454
),
@@ -521,8 +528,14 @@ def test_report_batch_item_failures(
521528
assert "Messages" not in dlq_messages or dlq_messages["Messages"] == []
522529

523530
# now wait for the second invocation result which is expected to have processed message 2 and 3
531+
# Since we are re-queuing twice, with a visiblity timeout of 8s, this should instead be waiting for 20s => 8s x 2 retries (+ 4s margin).
532+
# See AWS docs: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/APIReference/API_ReceiveMessage.html#API_ReceiveMessage_RequestSyntax
533+
second_timeout_with_margin = (visibility_timeout * 2) + 4
534+
assert second_timeout_with_margin <= 20, (
535+
"An SQS ReceiveMessage operation cannot wait for more than 20s"
536+
)
524537
second_invocation = aws_client.sqs.receive_message(
525-
QueueUrl=destination_url, WaitTimeSeconds=retry_timeout + 2, MaxNumberOfMessages=1
538+
QueueUrl=destination_url, WaitTimeSeconds=second_timeout_with_margin, MaxNumberOfMessages=1
526539
)
527540
assert "Messages" in second_invocation
528541
# hack to make snapshot work

tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.snapshot.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@
240240
}
241241
},
242242
"tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.py::test_report_batch_item_failures": {
243-
"recorded-date": "12-10-2024, 13:34:15",
243+
"recorded-date": "03-03-2025, 11:31:17",
244244
"recorded-content": {
245245
"get_destination_queue_url": {
246246
"QueueUrl": "<queue-url:1>",

tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.validation.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
"last_validated_date": "2024-10-12T13:33:27+00:00"
118118
},
119119
"tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.py::test_report_batch_item_failures": {
120-
"last_validated_date": "2024-10-12T13:34:12+00:00"
120+
"last_validated_date": "2025-03-03T11:31:14+00:00"
121121
},
122122
"tests/aws/services/lambda_/event_source_mapping/test_lambda_integration_sqs.py::test_report_batch_item_failures_empty_json_batch_succeeds": {
123123
"last_validated_date": "2024-10-12T13:35:40+00:00"

0 commit comments

Comments
 (0)