2021-10-18 15:54:04 +02:00
|
|
|
# ***** BEGIN GPL LICENSE BLOCK *****
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or
|
|
|
|
# modify it under the terms of the GNU General Public License
|
|
|
|
# as published by the Free Software Foundation; either version 2
|
|
|
|
# of the License, or (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software Foundation,
|
|
|
|
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
#
|
|
|
|
# ***** END GPL LICENCE BLOCK *****
|
|
|
|
#
|
|
|
|
# (c) 2018, Blender Foundation - Sybren A. Stüvel
|
|
|
|
"""Amazon S3-compatible uploader."""
|
|
|
|
import hashlib
|
|
|
|
import logging
|
|
|
|
import pathlib
|
|
|
|
import typing
|
|
|
|
import urllib.parse
|
|
|
|
|
|
|
|
from . import Packer, transfer
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
# TODO(Sybren): compute MD5 sums of queued files in a separate thread, so that
|
|
|
|
# we can upload a file to S3 and compute an MD5 of another file simultaneously.
|
|
|
|
|
2023-01-10 11:41:55 +01:00
|
|
|
|
2021-10-18 15:54:04 +02:00
|
|
|
def compute_md5(filepath: pathlib.Path) -> str:
|
2023-01-10 11:41:55 +01:00
|
|
|
log.debug("Computing MD5sum of %s", filepath)
|
2021-10-18 15:54:04 +02:00
|
|
|
hasher = hashlib.md5()
|
2023-01-10 11:41:55 +01:00
|
|
|
with filepath.open("rb") as infile:
|
2021-10-18 15:54:04 +02:00
|
|
|
while True:
|
|
|
|
block = infile.read(102400)
|
|
|
|
if not block:
|
|
|
|
break
|
|
|
|
hasher.update(block)
|
|
|
|
md5 = hasher.hexdigest()
|
2023-01-10 11:41:55 +01:00
|
|
|
log.debug("MD5sum of %s is %s", filepath, md5)
|
2021-10-18 15:54:04 +02:00
|
|
|
return md5
|
|
|
|
|
|
|
|
|
|
|
|
class S3Packer(Packer):
|
|
|
|
"""Creates BAT Packs on S3-compatible storage."""
|
|
|
|
|
|
|
|
def __init__(self, *args, endpoint, **kwargs) -> None:
|
|
|
|
"""Constructor
|
|
|
|
|
|
|
|
:param endpoint: URL of the S3 storage endpoint
|
|
|
|
"""
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
import boto3
|
|
|
|
|
|
|
|
# Create a session so that credentials can be read from the [endpoint]
|
|
|
|
# section in ~/.aws/credentials.
|
|
|
|
# See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration
|
|
|
|
components = urllib.parse.urlparse(endpoint)
|
|
|
|
profile_name = components.netloc
|
|
|
|
endpoint = urllib.parse.urlunparse(components)
|
2023-01-10 11:41:55 +01:00
|
|
|
log.debug("Using Boto3 profile name %r for url %r", profile_name, endpoint)
|
2021-10-18 15:54:04 +02:00
|
|
|
self.session = boto3.Session(profile_name=profile_name)
|
|
|
|
|
2023-01-10 11:41:55 +01:00
|
|
|
self.client = self.session.client("s3", endpoint_url=endpoint)
|
2021-10-18 15:54:04 +02:00
|
|
|
|
2023-01-10 11:41:55 +01:00
|
|
|
def set_credentials(
|
|
|
|
self, endpoint: str, access_key_id: str, secret_access_key: str
|
|
|
|
):
|
2021-10-18 15:54:04 +02:00
|
|
|
"""Set S3 credentials."""
|
2023-01-10 11:41:55 +01:00
|
|
|
self.client = self.session.client(
|
|
|
|
"s3",
|
|
|
|
endpoint_url=endpoint,
|
|
|
|
aws_access_key_id=access_key_id,
|
|
|
|
aws_secret_access_key=secret_access_key,
|
|
|
|
)
|
2021-10-18 15:54:04 +02:00
|
|
|
|
|
|
|
def _create_file_transferer(self) -> transfer.FileTransferer:
|
|
|
|
return S3Transferrer(self.client)
|
|
|
|
|
|
|
|
|
|
|
|
class S3Transferrer(transfer.FileTransferer):
|
|
|
|
"""Copies or moves files in source directory order."""
|
|
|
|
|
|
|
|
class AbortUpload(Exception):
|
|
|
|
"""Raised from the upload callback to abort an upload."""
|
|
|
|
|
|
|
|
def __init__(self, botoclient) -> None:
|
|
|
|
super().__init__()
|
|
|
|
self.client = botoclient
|
|
|
|
|
|
|
|
def run(self) -> None:
|
|
|
|
files_transferred = 0
|
|
|
|
files_skipped = 0
|
|
|
|
|
|
|
|
for src, dst, act in self.iter_queue():
|
|
|
|
try:
|
|
|
|
did_upload = self.upload_file(src, dst)
|
|
|
|
files_transferred += did_upload
|
|
|
|
files_skipped += not did_upload
|
|
|
|
|
|
|
|
if act == transfer.Action.MOVE:
|
|
|
|
self.delete_file(src)
|
|
|
|
except Exception:
|
|
|
|
# We have to catch exceptions in a broad way, as this is running in
|
|
|
|
# a separate thread, and exceptions won't otherwise be seen.
|
2023-01-10 11:41:55 +01:00
|
|
|
log.exception("Error transferring %s to %s", src, dst)
|
2021-10-18 15:54:04 +02:00
|
|
|
# Put the files to copy back into the queue, and abort. This allows
|
|
|
|
# the main thread to inspect the queue and see which files were not
|
|
|
|
# copied. The one we just failed (due to this exception) should also
|
|
|
|
# be reported there.
|
|
|
|
self.queue.put((src, dst, act))
|
|
|
|
return
|
|
|
|
|
|
|
|
if files_transferred:
|
2023-01-10 11:41:55 +01:00
|
|
|
log.info("Transferred %d files", files_transferred)
|
2021-10-18 15:54:04 +02:00
|
|
|
if files_skipped:
|
2023-01-10 11:41:55 +01:00
|
|
|
log.info("Skipped %d files", files_skipped)
|
2021-10-18 15:54:04 +02:00
|
|
|
|
|
|
|
def upload_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> bool:
|
|
|
|
"""Upload a file to an S3 bucket.
|
|
|
|
|
|
|
|
The first part of 'dst' is used as the bucket name, the remained as the
|
|
|
|
path inside the bucket.
|
|
|
|
|
|
|
|
:returns: True if the file was uploaded, False if it was skipped.
|
|
|
|
"""
|
|
|
|
bucket = dst.parts[0]
|
|
|
|
dst_path = pathlib.Path(*dst.parts[1:])
|
|
|
|
md5 = compute_md5(src)
|
|
|
|
key = str(dst_path)
|
|
|
|
|
|
|
|
existing_md5, existing_size = self.get_metadata(bucket, key)
|
|
|
|
if md5 == existing_md5 and src.stat().st_size == existing_size:
|
2023-01-10 11:41:55 +01:00
|
|
|
log.debug(
|
|
|
|
"skipping %s, it already exists on the server with MD5 %s",
|
|
|
|
src,
|
|
|
|
existing_md5,
|
|
|
|
)
|
2021-10-18 15:54:04 +02:00
|
|
|
return False
|
|
|
|
|
2023-01-10 11:41:55 +01:00
|
|
|
log.info("Uploading %s", src)
|
2021-10-18 15:54:04 +02:00
|
|
|
try:
|
2023-01-10 11:41:55 +01:00
|
|
|
self.client.upload_file(
|
|
|
|
str(src),
|
|
|
|
Bucket=bucket,
|
|
|
|
Key=key,
|
|
|
|
Callback=self.report_transferred,
|
|
|
|
ExtraArgs={"Metadata": {"md5": md5}},
|
|
|
|
)
|
2021-10-18 15:54:04 +02:00
|
|
|
except self.AbortUpload:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
def report_transferred(self, bytes_transferred: int):
|
|
|
|
if self._abort.is_set():
|
2023-01-10 11:41:55 +01:00
|
|
|
log.warning("Interrupting ongoing upload")
|
|
|
|
raise self.AbortUpload("interrupting ongoing upload")
|
2021-10-18 15:54:04 +02:00
|
|
|
super().report_transferred(bytes_transferred)
|
|
|
|
|
|
|
|
def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]:
|
|
|
|
"""Get MD5 sum and size on S3.
|
|
|
|
|
|
|
|
:returns: the MD5 hexadecimal hash and the file size in bytes.
|
|
|
|
If the file does not exist or has no known MD5 sum,
|
|
|
|
returns ('', -1)
|
|
|
|
"""
|
|
|
|
import botocore.exceptions
|
|
|
|
|
2023-01-10 11:41:55 +01:00
|
|
|
log.debug("Getting metadata of %s/%s", bucket, key)
|
2021-10-18 15:54:04 +02:00
|
|
|
try:
|
|
|
|
info = self.client.head_object(Bucket=bucket, Key=key)
|
|
|
|
except botocore.exceptions.ClientError as ex:
|
2023-01-10 11:41:55 +01:00
|
|
|
error_code = ex.response.get("Error").get("Code", "Unknown")
|
2021-10-18 15:54:04 +02:00
|
|
|
# error_code already is a string, but this makes the code forward
|
|
|
|
# compatible with a time where they use integer codes.
|
2023-01-10 11:41:55 +01:00
|
|
|
if str(error_code) == "404":
|
|
|
|
return "", -1
|
|
|
|
raise ValueError("error response:" % ex.response) from None
|
2021-10-18 15:54:04 +02:00
|
|
|
|
|
|
|
try:
|
2023-01-10 11:41:55 +01:00
|
|
|
return info["Metadata"]["md5"], info["ContentLength"]
|
2021-10-18 15:54:04 +02:00
|
|
|
except KeyError:
|
2023-01-10 11:41:55 +01:00
|
|
|
return "", -1
|