Add support for tar.gz archive files to download from download_from_google_storage

Also, add support for directly setting the public read flag on uploaded files.

The support for tar.gz allows the uploaded bundle to be a single tar.gz file instead of a lot of individual files using the directory support already in. The benefit here is that it is much easier to update the dependency. Simply clean out the existing files, copy in the new ones, create a tar.gz file, with the same name as the directory + 'tar.gz'. If the directory name and file name does not match up we will not clean up existing artifacts on download (i.e., there can be left over files after extracting).

I am doing this because I am moving a bunch of the dart dependencies to gcs, and a lot of our dependencies is much easier to manage with this in. If you don't like this, I can simply wrap the download script in another python script and do the logic there, but this may be handy for other people as well.

Review URL: https://codereview.chromium.org/807463005

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@295872 0039d316-1c4b-4281-b951-d872f2087c98
This commit is contained in:
ricow@google.com
2015-06-30 06:13:13 +00:00
parent 4845f0ed69
commit 0c7d94eb9d
4 changed files with 224 additions and 11 deletions

View File

@@ -11,8 +11,10 @@ import optparse
import os
import Queue
import re
import shutil
import stat
import sys
import tarfile
import threading
import time
@@ -49,7 +51,6 @@ def GetNormalizedPlatform():
return 'win32'
return sys.platform
# Common utilities
class Gsutil(object):
"""Call gsutil with some predefined settings. This is a convenience object,
@@ -186,8 +187,19 @@ def enumerate_work_queue(input_filename, work_queue, directory,
return work_queue_size
def _validate_tar_file(tar, prefix):
def _validate(tarinfo):
"""Returns false if the tarinfo is something we explicitly forbid."""
if tarinfo.issym() or tarinfo.islnk():
return False
if '..' in tarinfo.name or not tarinfo.name.startswith(prefix):
return False
return True
return all(map(_validate, tar.getmembers()))
def _downloader_worker_thread(thread_num, q, force, base_url,
gsutil, out_q, ret_codes, verbose):
gsutil, out_q, ret_codes, verbose, extract,
delete=True):
while True:
input_sha1_sum, output_filename = q.get()
if input_sha1_sum is None:
@@ -218,7 +230,8 @@ def _downloader_worker_thread(thread_num, q, force, base_url,
# Fetch the file.
out_q.put('%d> Downloading %s...' % (thread_num, output_filename))
try:
os.remove(output_filename) # Delete the file if it exists already.
if delete:
os.remove(output_filename) # Delete the file if it exists already.
except OSError:
if os.path.exists(output_filename):
out_q.put('%d> Warning: deleting %s failed.' % (
@@ -228,6 +241,34 @@ def _downloader_worker_thread(thread_num, q, force, base_url,
out_q.put('%d> %s' % (thread_num, err))
ret_codes.put((code, err))
if extract:
if (not tarfile.is_tarfile(output_filename)
or not output_filename.endswith('.tar.gz')):
out_q.put('%d> Error: %s is not a tar.gz archive.' % (
thread_num, output_filename))
ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))
continue
with tarfile.open(output_filename, 'r:gz') as tar:
dirname = os.path.dirname(os.path.abspath(output_filename))
extract_dir = output_filename[0:len(output_filename)-7]
if not _validate_tar_file(tar, os.path.basename(extract_dir)):
out_q.put('%d> Error: %s contains files outside %s.' % (
thread_num, output_filename, extract_dir))
ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))
continue
if os.path.exists(extract_dir):
try:
shutil.rmtree(extract_dir)
out_q.put('%d> Removed %s...' % (thread_num, extract_dir))
except OSError:
out_q.put('%d> Warning: Can\'t delete: %s' % (
thread_num, extract_dir))
ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))
continue
out_q.put('%d> Extracting %d entries from %s to %s' %
(thread_num, len(tar.getmembers()),output_filename,
extract_dir))
tar.extractall(path=dirname)
# Set executable bit.
if sys.platform == 'cygwin':
# Under cygwin, mark all files as executable. The executable flag in
@@ -258,7 +299,7 @@ def printer_worker(output_queue):
def download_from_google_storage(
input_filename, base_url, gsutil, num_threads, directory, recursive,
force, output, ignore_errors, sha1_file, verbose, auto_platform):
force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):
# Start up all the worker threads.
all_threads = []
download_start = time.time()
@@ -270,7 +311,7 @@ def download_from_google_storage(
t = threading.Thread(
target=_downloader_worker_thread,
args=[thread_num, work_queue, force, base_url,
gsutil, stdout_queue, ret_codes, verbose])
gsutil, stdout_queue, ret_codes, verbose, extract])
t.daemon = True
t.start()
all_threads.append(t)
@@ -358,6 +399,13 @@ def main(args):
'(linux|mac|win). If so, the script will only '
'process files that are in the paths that '
'that matches the current platform.')
parser.add_option('-u', '--extract',
action='store_true',
help='Extract a downloaded tar.gz file. '
'Leaves the tar.gz file around for sha1 verification'
'If a directory with the same name as the tar.gz '
'file already exists, is deleted (to get a '
'clean state in case of update.)')
parser.add_option('-v', '--verbose', action='store_true',
help='Output extra diagnostic and progress information.')
@@ -451,7 +499,8 @@ def main(args):
return download_from_google_storage(
input_filename, base_url, gsutil, options.num_threads, options.directory,
options.recursive, options.force, options.output, options.ignore_errors,
options.sha1_file, options.verbose, options.auto_platform)
options.sha1_file, options.verbose, options.auto_platform,
options.extract)
if __name__ == '__main__':