blender_asset_tracer/pack/s3.py

183 lines
6.8 KiB
Python

# ***** BEGIN GPL LICENSE BLOCK *****
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# ***** END GPL LICENCE BLOCK *****
#
# (c) 2018, Blender Foundation - Sybren A. Stüvel
"""Amazon S3-compatible uploader."""
import hashlib
import logging
import pathlib
import typing
import urllib.parse
from . import Packer, transfer
log = logging.getLogger(__name__)
# TODO(Sybren): compute MD5 sums of queued files in a separate thread, so that
# we can upload a file to S3 and compute an MD5 of another file simultaneously.
def compute_md5(filepath: pathlib.Path) -> str:
log.debug('Computing MD5sum of %s', filepath)
hasher = hashlib.md5()
with filepath.open('rb') as infile:
while True:
block = infile.read(102400)
if not block:
break
hasher.update(block)
md5 = hasher.hexdigest()
log.debug('MD5sum of %s is %s', filepath, md5)
return md5
class S3Packer(Packer):
"""Creates BAT Packs on S3-compatible storage."""
def __init__(self, *args, endpoint, **kwargs) -> None:
"""Constructor
:param endpoint: URL of the S3 storage endpoint
"""
super().__init__(*args, **kwargs)
import boto3
# Create a session so that credentials can be read from the [endpoint]
# section in ~/.aws/credentials.
# See https://boto3.readthedocs.io/en/latest/guide/configuration.html#guide-configuration
components = urllib.parse.urlparse(endpoint)
profile_name = components.netloc
endpoint = urllib.parse.urlunparse(components)
log.debug('Using Boto3 profile name %r for url %r', profile_name, endpoint)
self.session = boto3.Session(profile_name=profile_name)
self.client = self.session.client('s3', endpoint_url=endpoint)
def set_credentials(self,
endpoint: str,
access_key_id: str,
secret_access_key: str):
"""Set S3 credentials."""
self.client = self.session.client('s3',
endpoint_url=endpoint,
aws_access_key_id=access_key_id,
aws_secret_access_key=secret_access_key)
def _create_file_transferer(self) -> transfer.FileTransferer:
return S3Transferrer(self.client)
class S3Transferrer(transfer.FileTransferer):
"""Copies or moves files in source directory order."""
class AbortUpload(Exception):
"""Raised from the upload callback to abort an upload."""
def __init__(self, botoclient) -> None:
super().__init__()
self.client = botoclient
def run(self) -> None:
files_transferred = 0
files_skipped = 0
for src, dst, act in self.iter_queue():
try:
did_upload = self.upload_file(src, dst)
files_transferred += did_upload
files_skipped += not did_upload
if act == transfer.Action.MOVE:
self.delete_file(src)
except Exception:
# We have to catch exceptions in a broad way, as this is running in
# a separate thread, and exceptions won't otherwise be seen.
log.exception('Error transferring %s to %s', src, dst)
# Put the files to copy back into the queue, and abort. This allows
# the main thread to inspect the queue and see which files were not
# copied. The one we just failed (due to this exception) should also
# be reported there.
self.queue.put((src, dst, act))
return
if files_transferred:
log.info('Transferred %d files', files_transferred)
if files_skipped:
log.info('Skipped %d files', files_skipped)
def upload_file(self, src: pathlib.Path, dst: pathlib.PurePath) -> bool:
"""Upload a file to an S3 bucket.
The first part of 'dst' is used as the bucket name, the remained as the
path inside the bucket.
:returns: True if the file was uploaded, False if it was skipped.
"""
bucket = dst.parts[0]
dst_path = pathlib.Path(*dst.parts[1:])
md5 = compute_md5(src)
key = str(dst_path)
existing_md5, existing_size = self.get_metadata(bucket, key)
if md5 == existing_md5 and src.stat().st_size == existing_size:
log.debug('skipping %s, it already exists on the server with MD5 %s',
src, existing_md5)
return False
log.info('Uploading %s', src)
try:
self.client.upload_file(str(src),
Bucket=bucket,
Key=key,
Callback=self.report_transferred,
ExtraArgs={'Metadata': {'md5': md5}})
except self.AbortUpload:
return False
return True
def report_transferred(self, bytes_transferred: int):
if self._abort.is_set():
log.warning('Interrupting ongoing upload')
raise self.AbortUpload('interrupting ongoing upload')
super().report_transferred(bytes_transferred)
def get_metadata(self, bucket: str, key: str) -> typing.Tuple[str, int]:
"""Get MD5 sum and size on S3.
:returns: the MD5 hexadecimal hash and the file size in bytes.
If the file does not exist or has no known MD5 sum,
returns ('', -1)
"""
import botocore.exceptions
log.debug('Getting metadata of %s/%s', bucket, key)
try:
info = self.client.head_object(Bucket=bucket, Key=key)
except botocore.exceptions.ClientError as ex:
error_code = ex.response.get('Error').get('Code', 'Unknown')
# error_code already is a string, but this makes the code forward
# compatible with a time where they use integer codes.
if str(error_code) == '404':
return '', -1
raise ValueError('error response:' % ex.response) from None
try:
return info['Metadata']['md5'], info['ContentLength']
except KeyError:
return '', -1