mirror of
https://chromium.googlesource.com/chromium/tools/depot_tools.git
synced 2026-01-11 18:51:29 +00:00
https://crrev.com/c/6796221 added a gsutil_urls output property that provides the URLs of uploads to Google storage that were performed by the build. It attempts to accumulate the URLs by updating a dictionary on the result object's properties, but the properties on a step's results object is empty when created, so it was just overriding the property each time with a single element dict. This change keeps the dict on the API object and updates it when an upload is performed so that the final value includes all of the URLs produced by the build. Change-Id: I6681ca7c137969cbef58e09ce24d0605155d2c3e Recipe-Nontrivial-Roll: build Recipe-Nontrivial-Roll: build_internal Recipe-Nontrivial-Roll: chrome_release Recipe-Nontrivial-Roll: chromiumos Recipe-Nontrivial-Roll: infra Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6880970 Commit-Queue: Robbie Iannucci <iannucci@google.com> Reviewed-by: Ben Pastene <bpastene@chromium.org> Reviewed-by: Robbie Iannucci <iannucci@google.com> Auto-Submit: Garrett Beaty <gbeaty@google.com>
301 lines
11 KiB
Python
301 lines
11 KiB
Python
# Copyright 2013 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
import contextlib
|
|
import re
|
|
|
|
from recipe_engine import recipe_api
|
|
|
|
class GSUtilApi(recipe_api.RecipeApi):
|
|
|
|
def __init__(self, env_properties, *args, **kwargs):
|
|
super(GSUtilApi, self).__init__(*args, **kwargs)
|
|
self._boto_config_path = env_properties.BOTO_CONFIG
|
|
self._boto_path = env_properties.BOTO_PATH
|
|
self._upload_urls = {}
|
|
|
|
@property
|
|
def gsutil_py_path(self):
|
|
return self.repo_resource('gsutil.py')
|
|
|
|
def __call__(self,
|
|
cmd,
|
|
name=None,
|
|
use_retry_wrapper=True,
|
|
version=None,
|
|
parallel_upload=False,
|
|
multithreaded=False,
|
|
infra_step=True,
|
|
dry_run=False,
|
|
**kwargs):
|
|
"""A step to run arbitrary gsutil commands.
|
|
|
|
On LUCI this should automatically use the ambient task account credentials.
|
|
On Buildbot, this assumes that gsutil authentication environment variables
|
|
(AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to
|
|
set them to something else you can always do so using the env={} kwarg.
|
|
|
|
Note also that gsutil does its own wildcard processing, so wildcards are
|
|
valid in file-like portions of the cmd. See 'gsutil help wildcards'.
|
|
|
|
Args:
|
|
* cmd (List[str|Path]) - Arguments to pass to gsutil. Include gsutil-level
|
|
options first (see 'gsutil help options').
|
|
* name (str) - Name of the step to use. Defaults to the first non-flag
|
|
token in the cmd.
|
|
* dry_run (bool): If True, don't actually run the step; just log what
|
|
the step would have been.
|
|
"""
|
|
if name:
|
|
full_name = 'gsutil ' + name
|
|
else:
|
|
full_name = 'gsutil' # our fall-through name
|
|
# Find first cmd token not starting with '-'
|
|
for itm in cmd:
|
|
token = str(itm) # it could be a Path
|
|
if not token.startswith('-'):
|
|
full_name = 'gsutil ' + token
|
|
break
|
|
|
|
gsutil_path = self.gsutil_py_path
|
|
cmd_prefix = []
|
|
|
|
if use_retry_wrapper:
|
|
# We pass the real gsutil_path to the wrapper so it doesn't have to do
|
|
# brittle path logic.
|
|
cmd_prefix = ['--', gsutil_path]
|
|
gsutil_path = self.resource('gsutil_smart_retry.py')
|
|
|
|
if version:
|
|
cmd_prefix.extend(['--force-version', version])
|
|
|
|
if parallel_upload:
|
|
cmd_prefix.extend([
|
|
'-o',
|
|
'GSUtil:parallel_composite_upload_threshold=50M'
|
|
])
|
|
|
|
if multithreaded:
|
|
cmd_prefix.extend(['-m'])
|
|
|
|
if use_retry_wrapper:
|
|
# The -- argument for the wrapped gsutil.py is escaped as ---- as python
|
|
# 2.7.3 removes all occurrences of --, not only the first. It is unescaped
|
|
# in gsutil_wrapper.py and then passed as -- to gsutil.py.
|
|
# Note, that 2.7.6 doesn't have this problem, but it doesn't hurt.
|
|
cmd_prefix.append('----')
|
|
else:
|
|
cmd_prefix.append('--')
|
|
|
|
exec_cmd = ['python3', '-u', gsutil_path] + cmd_prefix + cmd
|
|
if dry_run:
|
|
return self.m.step.empty(full_name,
|
|
step_text='Pretending to run gsutil command',
|
|
log_text=' '.join((str(i) for i in exec_cmd)),
|
|
log_name='command')
|
|
return self.m.step(full_name, exec_cmd, infra_step=infra_step, **kwargs)
|
|
|
|
def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload',
|
|
metadata=None, unauthenticated_url=False, **kwargs):
|
|
args = [] if args is None else args[:]
|
|
# Note that metadata arguments have to be passed before the command cp.
|
|
metadata_args = self._generate_metadata_args(metadata)
|
|
full_dest = 'gs://%s/%s' % (bucket, dest)
|
|
cmd = metadata_args + ['cp'] + args + [source, full_dest]
|
|
name = kwargs.pop('name', 'upload')
|
|
|
|
result = self(cmd, name, **kwargs)
|
|
|
|
if link_name:
|
|
is_dir = '-r' in args or '--recursive' in args
|
|
link = self._http_url(bucket,
|
|
dest,
|
|
is_directory=is_dir,
|
|
is_anonymous=unauthenticated_url)
|
|
result.presentation.links[link_name] = link
|
|
self._upload_urls[result.name] = full_dest
|
|
result.presentation.properties['gsutil_urls'] = self._upload_urls
|
|
return result
|
|
|
|
def download(self, bucket, source, dest, args=None, **kwargs):
|
|
args = [] if args is None else args[:]
|
|
full_source = 'gs://%s/%s' % (bucket, source)
|
|
cmd = ['cp'] + args + [full_source, dest]
|
|
name = kwargs.pop('name', 'download')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def download_url(self, url, dest, args=None, **kwargs):
|
|
args = args or []
|
|
url = self._normalize_url(url)
|
|
cmd = ['cp'] + args + [url, dest]
|
|
name = kwargs.pop('name', 'download_url')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def cat(self, url, args=None, **kwargs):
|
|
args = args or []
|
|
url = self._normalize_url(url)
|
|
cmd = ['cat'] + args + [url]
|
|
name = kwargs.pop('name', 'cat')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def stat(self, url, args=None, **kwargs):
|
|
args = args or []
|
|
url = self._normalize_url(url)
|
|
cmd = ['stat'] + args + [url]
|
|
name = kwargs.pop('name', 'stat')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def copy(self, source_bucket, source, dest_bucket, dest, args=None,
|
|
link_name='gsutil.copy', metadata=None, unauthenticated_url=False,
|
|
**kwargs):
|
|
args = args or []
|
|
args += self._generate_metadata_args(metadata)
|
|
full_source = 'gs://%s/%s' % (source_bucket, source)
|
|
full_dest = 'gs://%s/%s' % (dest_bucket, dest)
|
|
cmd = ['cp'] + args + [full_source, full_dest]
|
|
name = kwargs.pop('name', 'copy')
|
|
|
|
result = self(cmd, name, **kwargs)
|
|
|
|
if link_name:
|
|
is_dir = '-r' in args or '--recursive' in args
|
|
result.presentation.links[link_name] = self._http_url(
|
|
dest_bucket, dest, is_directory=is_dir,
|
|
is_anonymous=unauthenticated_url)
|
|
return result
|
|
|
|
def list(self, url, args=None, **kwargs):
|
|
args = args or []
|
|
url = self._normalize_url(url)
|
|
cmd = ['ls'] + args + [url]
|
|
name = kwargs.pop('name', 'list')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def signurl(self, private_key_file, bucket, dest, args=None, **kwargs):
|
|
args = args or []
|
|
full_source = 'gs://%s/%s' % (bucket, dest)
|
|
cmd = ['signurl'] + args + [private_key_file, full_source]
|
|
name = kwargs.pop('name', 'signurl')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
def remove_url(self, url, args=None, **kwargs):
|
|
args = args or []
|
|
url = self._normalize_url(url)
|
|
cmd = ['rm'] + args + [url]
|
|
name = kwargs.pop('name', 'remove')
|
|
return self(cmd, name, **kwargs)
|
|
|
|
@contextlib.contextmanager
|
|
def configure_gsutil(self, **kwargs):
|
|
"""Temporarily configures the behavior of gsutil.
|
|
|
|
For the duration of its context, this method will temporarily append a
|
|
custom Boto file to the BOTO_PATH env var without overwriting bbagent's
|
|
BOTO_CONFIG. See https://cloud.google.com/storage/docs/boto-gsutil for
|
|
possible configurations.
|
|
|
|
Args:
|
|
kwargs: Every keyword arg is treated as config line in the temp Boto file.
|
|
"""
|
|
if self.m.platform.is_mac:
|
|
# Due to https://bugs.python.org/issue33725, using gsutil to download
|
|
# sufficiently large files on MacOS has been seen to hang indefinitely,
|
|
# and disabling multi-processing avoids that hang.
|
|
kwargs.setdefault('parallel_process_count', '1')
|
|
if not kwargs:
|
|
yield
|
|
return
|
|
|
|
# If neither BOTO_CONFIG nor BOTO_PATH are set, gsutil looks at default
|
|
# locations (/etc/boto.cfg and ~/.boto). So give up in that case just to
|
|
# avoid the hassle of incorporating all the defaults. ~All LUCI builds
|
|
# should at least be setting BOTO_CONFIG.
|
|
if not self._boto_config_path and not self._boto_path:
|
|
yield
|
|
return
|
|
custom_boto_path = self.m.path.mkstemp(prefix='custom_boto_')
|
|
contents = [
|
|
'# Generated by $depot_tools.recipe_modules.gsutil',
|
|
# https://cloud.google.com/storage/docs/boto-gsutil seems to indicate
|
|
# that the section headers are important. So certain config lines may
|
|
# not work unless they show up under the appropriate header.
|
|
'[GSUtil]',
|
|
]
|
|
for k, v in kwargs.items():
|
|
contents.append('%s = %s' % (k, str(v)))
|
|
self.m.file.write_text(
|
|
'write temp Boto file', custom_boto_path, '\n'.join(contents))
|
|
# BOTO_CONFIG can only point to one file; BOTO_PATH can point to multiple,
|
|
# each joined by ':'. If BOTO_CONFIG is set, BOTO_PATH is ignored.
|
|
if self._boto_config_path:
|
|
custom_boto_path = (
|
|
self._boto_config_path + ':' + self.m.path.abspath(custom_boto_path))
|
|
elif self._boto_path:
|
|
custom_boto_path = (
|
|
self._boto_path + ':' + self.m.path.abspath(custom_boto_path))
|
|
with self.m.context(
|
|
env={'BOTO_PATH': custom_boto_path, 'BOTO_CONFIG': None}):
|
|
yield
|
|
|
|
def _generate_metadata_args(self, metadata):
|
|
result = []
|
|
if metadata:
|
|
for k, v in sorted(metadata.items(), key=lambda k: k[0]):
|
|
field = self._get_metadata_field(k)
|
|
param = (field) if v is None else ('%s:%s' % (field, v))
|
|
result += ['-h', param]
|
|
return result
|
|
|
|
def _normalize_url(self, url):
|
|
gs_prefix = 'gs://'
|
|
# Defines the regex that matches a normalized URL.
|
|
for prefix in (
|
|
gs_prefix,
|
|
'https://storage.cloud.google.com/',
|
|
'https://storage.googleapis.com/',
|
|
):
|
|
if url.startswith(prefix):
|
|
return gs_prefix + url[len(prefix):]
|
|
raise AssertionError("%s cannot be normalized" % url)
|
|
|
|
@classmethod
|
|
def _http_url(cls, bucket, dest, is_directory=False, is_anonymous=False):
|
|
if is_directory:
|
|
# Use GCP console.
|
|
url_template = 'https://console.cloud.google.com/storage/browser/%s/%s'
|
|
elif is_anonymous:
|
|
# Use unauthenticated object viewer.
|
|
url_template = 'https://storage.googleapis.com/%s/%s'
|
|
else:
|
|
# Use authenticated object viewer.
|
|
url_template = 'https://storage.cloud.google.com/%s/%s'
|
|
return url_template % (bucket, dest)
|
|
|
|
@staticmethod
|
|
def _get_metadata_field(name, provider_prefix=None):
|
|
"""Returns: (str) the metadata field to use with Google Storage
|
|
|
|
The Google Storage specification for metadata can be found at:
|
|
https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata
|
|
"""
|
|
# Already contains custom provider prefix
|
|
if name.lower().startswith('x-'):
|
|
return name
|
|
|
|
# See if it's innately supported by Google Storage
|
|
if name in (
|
|
'Cache-Control',
|
|
'Content-Disposition',
|
|
'Content-Encoding',
|
|
'Content-Language',
|
|
'Content-MD5',
|
|
'Content-Type',
|
|
):
|
|
return name
|
|
|
|
# Add provider prefix
|
|
if not provider_prefix:
|
|
provider_prefix = 'x-goog-meta'
|
|
return '%s-%s' % (provider_prefix, name)
|