Files
chromium_depot_tools/recipes/recipe_modules/gsutil/api.py
Garrett Beaty 312d6a0313 Correctly accumulate gs upload URLs
https://crrev.com/c/6796221 added a gsutil_urls output property that
provides the URLs of uploads to Google storage that were performed by
the build. It attempts to accumulate the URLs by updating a dictionary
on the result object's properties, but the properties on a step's
results object is empty when created, so it was just overriding the
property each time with a single element dict. This change keeps the
dict on the API object and updates it when an upload is performed so
that the final value includes all of the URLs produced by the build.

Change-Id: I6681ca7c137969cbef58e09ce24d0605155d2c3e
Recipe-Nontrivial-Roll: build
Recipe-Nontrivial-Roll: build_internal
Recipe-Nontrivial-Roll: chrome_release
Recipe-Nontrivial-Roll: chromiumos
Recipe-Nontrivial-Roll: infra
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6880970
Commit-Queue: Robbie Iannucci <iannucci@google.com>
Reviewed-by: Ben Pastene <bpastene@chromium.org>
Reviewed-by: Robbie Iannucci <iannucci@google.com>
Auto-Submit: Garrett Beaty <gbeaty@google.com>
2025-08-26 15:47:55 -07:00

301 lines
11 KiB
Python

# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import contextlib
import re
from recipe_engine import recipe_api
class GSUtilApi(recipe_api.RecipeApi):
def __init__(self, env_properties, *args, **kwargs):
super(GSUtilApi, self).__init__(*args, **kwargs)
self._boto_config_path = env_properties.BOTO_CONFIG
self._boto_path = env_properties.BOTO_PATH
self._upload_urls = {}
@property
def gsutil_py_path(self):
return self.repo_resource('gsutil.py')
def __call__(self,
cmd,
name=None,
use_retry_wrapper=True,
version=None,
parallel_upload=False,
multithreaded=False,
infra_step=True,
dry_run=False,
**kwargs):
"""A step to run arbitrary gsutil commands.
On LUCI this should automatically use the ambient task account credentials.
On Buildbot, this assumes that gsutil authentication environment variables
(AWS_CREDENTIAL_FILE and BOTO_CONFIG) are already set, though if you want to
set them to something else you can always do so using the env={} kwarg.
Note also that gsutil does its own wildcard processing, so wildcards are
valid in file-like portions of the cmd. See 'gsutil help wildcards'.
Args:
* cmd (List[str|Path]) - Arguments to pass to gsutil. Include gsutil-level
options first (see 'gsutil help options').
* name (str) - Name of the step to use. Defaults to the first non-flag
token in the cmd.
* dry_run (bool): If True, don't actually run the step; just log what
the step would have been.
"""
if name:
full_name = 'gsutil ' + name
else:
full_name = 'gsutil' # our fall-through name
# Find first cmd token not starting with '-'
for itm in cmd:
token = str(itm) # it could be a Path
if not token.startswith('-'):
full_name = 'gsutil ' + token
break
gsutil_path = self.gsutil_py_path
cmd_prefix = []
if use_retry_wrapper:
# We pass the real gsutil_path to the wrapper so it doesn't have to do
# brittle path logic.
cmd_prefix = ['--', gsutil_path]
gsutil_path = self.resource('gsutil_smart_retry.py')
if version:
cmd_prefix.extend(['--force-version', version])
if parallel_upload:
cmd_prefix.extend([
'-o',
'GSUtil:parallel_composite_upload_threshold=50M'
])
if multithreaded:
cmd_prefix.extend(['-m'])
if use_retry_wrapper:
# The -- argument for the wrapped gsutil.py is escaped as ---- as python
# 2.7.3 removes all occurrences of --, not only the first. It is unescaped
# in gsutil_wrapper.py and then passed as -- to gsutil.py.
# Note, that 2.7.6 doesn't have this problem, but it doesn't hurt.
cmd_prefix.append('----')
else:
cmd_prefix.append('--')
exec_cmd = ['python3', '-u', gsutil_path] + cmd_prefix + cmd
if dry_run:
return self.m.step.empty(full_name,
step_text='Pretending to run gsutil command',
log_text=' '.join((str(i) for i in exec_cmd)),
log_name='command')
return self.m.step(full_name, exec_cmd, infra_step=infra_step, **kwargs)
def upload(self, source, bucket, dest, args=None, link_name='gsutil.upload',
metadata=None, unauthenticated_url=False, **kwargs):
args = [] if args is None else args[:]
# Note that metadata arguments have to be passed before the command cp.
metadata_args = self._generate_metadata_args(metadata)
full_dest = 'gs://%s/%s' % (bucket, dest)
cmd = metadata_args + ['cp'] + args + [source, full_dest]
name = kwargs.pop('name', 'upload')
result = self(cmd, name, **kwargs)
if link_name:
is_dir = '-r' in args or '--recursive' in args
link = self._http_url(bucket,
dest,
is_directory=is_dir,
is_anonymous=unauthenticated_url)
result.presentation.links[link_name] = link
self._upload_urls[result.name] = full_dest
result.presentation.properties['gsutil_urls'] = self._upload_urls
return result
def download(self, bucket, source, dest, args=None, **kwargs):
args = [] if args is None else args[:]
full_source = 'gs://%s/%s' % (bucket, source)
cmd = ['cp'] + args + [full_source, dest]
name = kwargs.pop('name', 'download')
return self(cmd, name, **kwargs)
def download_url(self, url, dest, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['cp'] + args + [url, dest]
name = kwargs.pop('name', 'download_url')
return self(cmd, name, **kwargs)
def cat(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['cat'] + args + [url]
name = kwargs.pop('name', 'cat')
return self(cmd, name, **kwargs)
def stat(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['stat'] + args + [url]
name = kwargs.pop('name', 'stat')
return self(cmd, name, **kwargs)
def copy(self, source_bucket, source, dest_bucket, dest, args=None,
link_name='gsutil.copy', metadata=None, unauthenticated_url=False,
**kwargs):
args = args or []
args += self._generate_metadata_args(metadata)
full_source = 'gs://%s/%s' % (source_bucket, source)
full_dest = 'gs://%s/%s' % (dest_bucket, dest)
cmd = ['cp'] + args + [full_source, full_dest]
name = kwargs.pop('name', 'copy')
result = self(cmd, name, **kwargs)
if link_name:
is_dir = '-r' in args or '--recursive' in args
result.presentation.links[link_name] = self._http_url(
dest_bucket, dest, is_directory=is_dir,
is_anonymous=unauthenticated_url)
return result
def list(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['ls'] + args + [url]
name = kwargs.pop('name', 'list')
return self(cmd, name, **kwargs)
def signurl(self, private_key_file, bucket, dest, args=None, **kwargs):
args = args or []
full_source = 'gs://%s/%s' % (bucket, dest)
cmd = ['signurl'] + args + [private_key_file, full_source]
name = kwargs.pop('name', 'signurl')
return self(cmd, name, **kwargs)
def remove_url(self, url, args=None, **kwargs):
args = args or []
url = self._normalize_url(url)
cmd = ['rm'] + args + [url]
name = kwargs.pop('name', 'remove')
return self(cmd, name, **kwargs)
@contextlib.contextmanager
def configure_gsutil(self, **kwargs):
"""Temporarily configures the behavior of gsutil.
For the duration of its context, this method will temporarily append a
custom Boto file to the BOTO_PATH env var without overwriting bbagent's
BOTO_CONFIG. See https://cloud.google.com/storage/docs/boto-gsutil for
possible configurations.
Args:
kwargs: Every keyword arg is treated as config line in the temp Boto file.
"""
if self.m.platform.is_mac:
# Due to https://bugs.python.org/issue33725, using gsutil to download
# sufficiently large files on MacOS has been seen to hang indefinitely,
# and disabling multi-processing avoids that hang.
kwargs.setdefault('parallel_process_count', '1')
if not kwargs:
yield
return
# If neither BOTO_CONFIG nor BOTO_PATH are set, gsutil looks at default
# locations (/etc/boto.cfg and ~/.boto). So give up in that case just to
# avoid the hassle of incorporating all the defaults. ~All LUCI builds
# should at least be setting BOTO_CONFIG.
if not self._boto_config_path and not self._boto_path:
yield
return
custom_boto_path = self.m.path.mkstemp(prefix='custom_boto_')
contents = [
'# Generated by $depot_tools.recipe_modules.gsutil',
# https://cloud.google.com/storage/docs/boto-gsutil seems to indicate
# that the section headers are important. So certain config lines may
# not work unless they show up under the appropriate header.
'[GSUtil]',
]
for k, v in kwargs.items():
contents.append('%s = %s' % (k, str(v)))
self.m.file.write_text(
'write temp Boto file', custom_boto_path, '\n'.join(contents))
# BOTO_CONFIG can only point to one file; BOTO_PATH can point to multiple,
# each joined by ':'. If BOTO_CONFIG is set, BOTO_PATH is ignored.
if self._boto_config_path:
custom_boto_path = (
self._boto_config_path + ':' + self.m.path.abspath(custom_boto_path))
elif self._boto_path:
custom_boto_path = (
self._boto_path + ':' + self.m.path.abspath(custom_boto_path))
with self.m.context(
env={'BOTO_PATH': custom_boto_path, 'BOTO_CONFIG': None}):
yield
def _generate_metadata_args(self, metadata):
result = []
if metadata:
for k, v in sorted(metadata.items(), key=lambda k: k[0]):
field = self._get_metadata_field(k)
param = (field) if v is None else ('%s:%s' % (field, v))
result += ['-h', param]
return result
def _normalize_url(self, url):
gs_prefix = 'gs://'
# Defines the regex that matches a normalized URL.
for prefix in (
gs_prefix,
'https://storage.cloud.google.com/',
'https://storage.googleapis.com/',
):
if url.startswith(prefix):
return gs_prefix + url[len(prefix):]
raise AssertionError("%s cannot be normalized" % url)
@classmethod
def _http_url(cls, bucket, dest, is_directory=False, is_anonymous=False):
if is_directory:
# Use GCP console.
url_template = 'https://console.cloud.google.com/storage/browser/%s/%s'
elif is_anonymous:
# Use unauthenticated object viewer.
url_template = 'https://storage.googleapis.com/%s/%s'
else:
# Use authenticated object viewer.
url_template = 'https://storage.cloud.google.com/%s/%s'
return url_template % (bucket, dest)
@staticmethod
def _get_metadata_field(name, provider_prefix=None):
"""Returns: (str) the metadata field to use with Google Storage
The Google Storage specification for metadata can be found at:
https://developers.google.com/storage/docs/gsutil/addlhelp/WorkingWithObjectMetadata
"""
# Already contains custom provider prefix
if name.lower().startswith('x-'):
return name
# See if it's innately supported by Google Storage
if name in (
'Cache-Control',
'Content-Disposition',
'Content-Encoding',
'Content-Language',
'Content-MD5',
'Content-Type',
):
return name
# Add provider prefix
if not provider_prefix:
provider_prefix = 'x-goog-meta'
return '%s-%s' % (provider_prefix, name)