[gsutil] Fix race when downloading gsutil

If gsutil is not downloaded, and if gsutil (or
download_from_google_storage) is called concurrently with n>2, it's
possible that those processes are doing the same work. There is also a
critical point where gsutil.py can fail with:

Destination path '/depot_tools_path/external_bin/gsutil/gsutil_4.68/d'
already exists error.

To avoid this problem, use FS locking around code that manipulates with
files.

R=jojwang@google.com

Bug: 338040708
Change-Id: Ib83aaa1e09628f878e512d79f2fa5221c2bcfd37
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5502531
Reviewed-by: Joanna Wang <jojwang@chromium.org>
Commit-Queue: Joanna Wang <jojwang@chromium.org>
Auto-Submit: Josip Sokcevic <sokcevic@chromium.org>
This commit is contained in:
Josip Sokcevic
2024-05-01 16:41:16 +00:00
committed by LUCI CQ
parent e75b940aea
commit 19199514e8
3 changed files with 33 additions and 38 deletions

View File

@@ -17,8 +17,6 @@ import tempfile
import time import time
import urllib.request import urllib.request
import zipfile
GSUTIL_URL = 'https://storage.googleapis.com/pub/' GSUTIL_URL = 'https://storage.googleapis.com/pub/'
API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/' API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/'
@@ -101,42 +99,37 @@ def ensure_gsutil(version, target, clean):
return gsutil_bin return gsutil_bin
if not os.path.exists(target): if not os.path.exists(target):
try: os.makedirs(target, exist_ok=True)
os.makedirs(target)
except FileExistsError:
# Another process is prepping workspace, so let's check if
# gsutil_bin is present. If after several checks it's still not,
# continue with downloading gsutil.
delay = 2 # base delay, in seconds
for _ in range(3): # make N attempts
# sleep first as it's not expected to have file ready just yet.
time.sleep(delay)
delay *= 1.5 # next delay increased by that factor
if os.path.isfile(gsutil_bin):
return gsutil_bin
with temporary_directory(target) as instance_dir: import lockfile
# Clean up if we're redownloading a corrupted gsutil. import zipfile
cleanup_path = os.path.join(instance_dir, 'clean') with lockfile.lock(bin_dir, timeout=30):
try: # Check if gsutil is ready (another process may have had lock).
os.rename(bin_dir, cleanup_path) if not clean and os.path.isfile(gsutil_flag):
except (OSError, IOError): return gsutil_bin
cleanup_path = None
if cleanup_path:
shutil.rmtree(cleanup_path)
download_dir = os.path.join(instance_dir, 'd') with temporary_directory(target) as instance_dir:
target_zip_filename = download_gsutil(version, instance_dir) download_dir = os.path.join(instance_dir, 'd')
with zipfile.ZipFile(target_zip_filename, 'r') as target_zip: target_zip_filename = download_gsutil(version, instance_dir)
target_zip.extractall(download_dir) with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
target_zip.extractall(download_dir)
shutil.move(download_dir, bin_dir) # Clean up if we're redownloading a corrupted gsutil.
# Final check that the gsutil bin exists. This should never fail. cleanup_path = os.path.join(instance_dir, 'clean')
if not os.path.isfile(gsutil_bin): try:
raise InvalidGsutilError() os.rename(bin_dir, cleanup_path)
# Drop a flag file. except (OSError, IOError):
with open(gsutil_flag, 'w') as f: cleanup_path = None
f.write('This flag file is dropped by gsutil.py') if cleanup_path:
shutil.rmtree(cleanup_path)
shutil.move(download_dir, bin_dir)
# Final check that the gsutil bin exists. This should never fail.
if not os.path.isfile(gsutil_bin):
raise InvalidGsutilError()
# Drop a flag file.
with open(gsutil_flag, 'w') as f:
f.write('This flag file is dropped by gsutil.py')
return gsutil_bin return gsutil_bin

View File

@@ -80,14 +80,16 @@ def _try_lock(lockfile):
def _lock(path, timeout=0): def _lock(path, timeout=0):
"""_lock returns function to release the lock if locking was successful. """_lock returns function to release the lock if locking was successful.
_lock also implements simple retry logic.""" _lock also implements simple retry logic.
NOTE: timeout value doesn't include time it takes to aquire lock, just
overall sleep time."""
elapsed = 0 elapsed = 0
sleep_time = 0.1
while True: while True:
try: try:
return _try_lock(path + '.locked') return _try_lock(path + '.locked')
except (OSError, IOError) as e: except (OSError, IOError) as e:
if elapsed < timeout: if elapsed < timeout:
sleep_time = min(10, timeout - elapsed)
logging.info( logging.info(
'Could not create git cache lockfile; ' 'Could not create git cache lockfile; '
'will retry after sleep(%d).', sleep_time) 'will retry after sleep(%d).', sleep_time)

View File

@@ -102,7 +102,7 @@ class LockTest(unittest.TestCase):
# One result was consumed by side_effect, we expect only one in the # One result was consumed by side_effect, we expect only one in the
# queue. # queue.
self.assertEqual(1, results.qsize()) self.assertEqual(1, results.qsize())
sleep_mock.assert_called_once_with(1) sleep_mock.assert_called_with(0.1)
if __name__ == '__main__': if __name__ == '__main__':