Skip to content

S3: implement new checksum algorithm CRC64NVME add ChecksumType for PutObject #12182

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions localstack-core/localstack/services/s3/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
ChecksumAlgorithm.SHA256,
ChecksumAlgorithm.CRC32,
ChecksumAlgorithm.CRC32C,
ChecksumAlgorithm.CRC64NVME,
]

# response header overrides the client may request
Expand Down
6 changes: 6 additions & 0 deletions localstack-core/localstack/services/s3/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
BucketRegion,
BucketVersioningStatus,
ChecksumAlgorithm,
ChecksumType,
CompletedPartList,
CORSConfiguration,
DefaultRetention,
Expand Down Expand Up @@ -259,6 +260,7 @@ class S3Object:
sse_key_hash: Optional[SSECustomerKeyMD5]
checksum_algorithm: ChecksumAlgorithm
checksum_value: str
checksum_type: ChecksumType
lock_mode: Optional[ObjectLockMode | ObjectLockRetentionMode]
lock_legal_status: Optional[ObjectLockLegalHoldStatus]
lock_until: Optional[datetime]
Expand All @@ -282,6 +284,7 @@ def __init__(
expiration: Optional[Expiration] = None,
checksum_algorithm: Optional[ChecksumAlgorithm] = None,
checksum_value: Optional[str] = None,
checksum_type: Optional[ChecksumType] = ChecksumType.FULL_OBJECT,
encryption: Optional[ServerSideEncryption] = None,
kms_key_id: Optional[SSEKMSKeyId] = None,
sse_key_hash: Optional[SSECustomerKeyMD5] = None,
Expand All @@ -305,6 +308,7 @@ def __init__(
self.expires = expires
self.checksum_algorithm = checksum_algorithm
self.checksum_value = checksum_value
self.checksum_type = checksum_type
self.encryption = encryption
self.kms_key_id = kms_key_id
self.bucket_key_enabled = bucket_key_enabled
Expand Down Expand Up @@ -461,6 +465,7 @@ def __init__(
expires=expires,
expiration=expiration,
checksum_algorithm=checksum_algorithm,
checksum_type=ChecksumType.COMPOSITE,
encryption=encryption,
kms_key_id=kms_key_id,
bucket_key_enabled=bucket_key_enabled,
Expand Down Expand Up @@ -540,6 +545,7 @@ def complete_multipart(self, parts: CompletedPartList):

multipart_etag = f"{object_etag.hexdigest()}-{len(parts)}"
self.object.etag = multipart_etag
# TODO: implement FULL_OBJECT checksum type
if has_checksum:
checksum_value = f"{base64.b64encode(checksum_hash.digest()).decode()}-{len(parts)}"
self.checksum_value = checksum_value
Expand Down
44 changes: 33 additions & 11 deletions localstack-core/localstack/services/s3/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@
validate_bucket_analytics_configuration,
validate_bucket_intelligent_tiering_configuration,
validate_canned_acl,
validate_checksum_value,
validate_cors_configuration,
validate_inventory_configuration,
validate_lifecycle_configuration,
Expand Down Expand Up @@ -782,14 +783,17 @@ def put_object(

s3_stored_object.write(body)

if (
s3_object.checksum_algorithm
and s3_object.checksum_value != s3_stored_object.checksum
):
self._storage_backend.remove(bucket_name, s3_object)
raise InvalidRequest(
f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid."
)
if s3_object.checksum_algorithm:
if not validate_checksum_value(s3_object.checksum_value, checksum_algorithm):
self._storage_backend.remove(bucket_name, s3_object)
raise InvalidRequest(
f"Value for x-amz-checksum-{s3_object.checksum_algorithm.lower()} header is invalid."
)
elif s3_object.checksum_value != s3_stored_object.checksum:
self._storage_backend.remove(bucket_name, s3_object)
raise BadDigest(
f"The {checksum_algorithm.upper()} you specified did not match the calculated checksum."
)

# TODO: handle ContentMD5 and ChecksumAlgorithm in a handler for all requests except requests with a
# streaming body. We can use the specs to verify which operations needs to have the checksum validated
Expand Down Expand Up @@ -820,6 +824,7 @@ def put_object(

if s3_object.checksum_algorithm:
response[f"Checksum{s3_object.checksum_algorithm}"] = s3_object.checksum_value
response["ChecksumType"] = getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT)

if s3_bucket.lifecycle_rules:
if expiration_header := self._get_expiration_header(
Expand Down Expand Up @@ -962,10 +967,16 @@ def get_object(
response["StatusCode"] = 206
if range_data.content_length == s3_object.size and checksum_value:
response[f"Checksum{checksum_algorithm.upper()}"] = checksum_value
response["ChecksumType"] = getattr(
s3_object, "checksum_type", ChecksumType.FULL_OBJECT
)
else:
response["Body"] = s3_stored_object
if checksum_value:
response[f"Checksum{checksum_algorithm.upper()}"] = checksum_value
response["ChecksumType"] = getattr(
s3_object, "checksum_type", ChecksumType.FULL_OBJECT
)

add_encryption_to_response(response, s3_object=s3_object)

Expand Down Expand Up @@ -1608,6 +1619,9 @@ def list_objects(

if s3_object.checksum_algorithm:
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
object_data["ChecksumType"] = getattr(
s3_object, "checksum_type", ChecksumType.FULL_OBJECT
)

s3_objects.append(object_data)

Expand Down Expand Up @@ -1742,6 +1756,9 @@ def list_objects_v2(

if s3_object.checksum_algorithm:
object_data["ChecksumAlgorithm"] = [s3_object.checksum_algorithm]
object_data["ChecksumType"] = getattr(
s3_object, "checksum_type", ChecksumType.FULL_OBJECT
)

s3_objects.append(object_data)

Expand Down Expand Up @@ -1884,6 +1901,9 @@ def list_object_versions(

if version.checksum_algorithm:
object_version["ChecksumAlgorithm"] = [version.checksum_algorithm]
object_version["ChecksumType"] = getattr(
version, "checksum_type", ChecksumType.FULL_OBJECT
)

object_versions.append(object_version)

Expand Down Expand Up @@ -1971,7 +1991,10 @@ def get_object_attributes(
checksum_value = s3_object.checksum_value.split("-")[0]
else:
checksum_value = s3_object.checksum_value
response["Checksum"] = {f"Checksum{checksum_algorithm.upper()}": checksum_value}
response["Checksum"] = {
f"Checksum{checksum_algorithm.upper()}": checksum_value,
"ChecksumType": getattr(s3_object, "checksum_type", ChecksumType.FULL_OBJECT),
}

response["LastModified"] = s3_object.last_modified

Expand Down Expand Up @@ -2071,9 +2094,7 @@ def create_multipart_upload(
if not system_metadata.get("ContentType"):
system_metadata["ContentType"] = "binary/octet-stream"

# TODO: validate the algorithm?
checksum_algorithm = request.get("ChecksumAlgorithm")
# ChecksumCRC64NVME
if checksum_algorithm and checksum_algorithm not in CHECKSUM_ALGORITHMS:
raise InvalidRequest(
"Checksum algorithm provided is unsupported. Please try again with any of the valid types: [CRC32, CRC32C, SHA1, SHA256]"
Expand Down Expand Up @@ -2254,6 +2275,7 @@ def upload_part(

if checksum_algorithm and s3_part.checksum_value != stored_s3_part.checksum:
stored_multipart.remove_part(s3_part)
# TODO: validate this to be BadDigest as well
raise InvalidRequest(
f"Value for x-amz-checksum-{checksum_algorithm.lower()} header is invalid."
)
Expand Down
5 changes: 5 additions & 0 deletions localstack-core/localstack/services/s3/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,11 @@ def get_s3_checksum(algorithm) -> ChecksumHash:

return CrtCrc32cChecksum()

case ChecksumAlgorithm.CRC64NVME:
from botocore.httpchecksum import CrtCrc64NvmeChecksum

return CrtCrc64NvmeChecksum()

case ChecksumAlgorithm.SHA1:
return hashlib.sha1(usedforsecurity=False)

Expand Down
22 changes: 22 additions & 0 deletions localstack-core/localstack/services/s3/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
BucketCannedACL,
BucketLifecycleConfiguration,
BucketName,
ChecksumAlgorithm,
CORSConfiguration,
Grant,
Grantee,
Expand Down Expand Up @@ -484,3 +485,24 @@ def validate_sse_c(
ArgumentName="x-amz-server-side-encryption",
ArgumentValue="null",
)


def validate_checksum_value(checksum_value: str, checksum_algorithm: ChecksumAlgorithm) -> bool:
try:
checksum = base64.b64decode(checksum_value)
except Exception:
return False

match checksum_algorithm:
case ChecksumAlgorithm.CRC32 | ChecksumAlgorithm.CRC32C:
valid_length = 4
case ChecksumAlgorithm.CRC64NVME:
valid_length = 8
case ChecksumAlgorithm.SHA1:
valid_length = 20
case ChecksumAlgorithm.SHA256:
valid_length = 32
case _:
valid_length = 0

return len(checksum) == valid_length
9 changes: 9 additions & 0 deletions localstack-core/localstack/utils/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,15 @@ def checksum_crc32c(string: Union[str, bytes]):
return base64.b64encode(checksum.digest()).decode()


def checksum_crc64nvme(string: Union[str, bytes]):
# import botocore locally here to avoid a dependency of the CLI to botocore
from botocore.httpchecksum import CrtCrc64NvmeChecksum

checksum = CrtCrc64NvmeChecksum()
checksum.update(to_bytes(string))
return base64.b64encode(checksum.digest()).decode()


def hash_sha1(string: Union[str, bytes]) -> str:
digest = hashlib.sha1(to_bytes(string)).digest()
return base64.b64encode(digest).decode()
Expand Down
Loading
Loading