[gsutil] Fix race when downloading gsutil

If gsutil is not downloaded, and if gsutil (or download_from_google_storage) is called concurrently with n>2, it's possible that those processes are doing the same work. There is also a critical point where gsutil.py can fail with: Destination path '/depot_tools_path/external_bin/gsutil/gsutil_4.68/d' already exists error. To avoid this problem, use FS locking around code that manipulates with files. R=jojwang@google.com Bug: 338040708 Change-Id: Ib83aaa1e09628f878e512d79f2fa5221c2bcfd37 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5502531 Reviewed-by: Joanna Wang <jojwang@chromium.org> Commit-Queue: Joanna Wang <jojwang@chromium.org> Auto-Submit: Josip Sokcevic <sokcevic@chromium.org>
2026-01-11 02:31:29 +00:00 · 2024-05-01 16:41:16 +00:00
parent e75b940aea
commit 19199514e8
3 changed files with 33 additions and 38 deletions
--- a/gsutil.py
+++ b/gsutil.py
@@ -17,8 +17,6 @@ import tempfile
 import time
 import urllib.request

-import zipfile
-
 GSUTIL_URL = 'https://storage.googleapis.com/pub/'
 API_URL = 'https://www.googleapis.com/storage/v1/b/pub/o/'

@@ -101,42 +99,37 @@ def ensure_gsutil(version, target, clean):
        return gsutil_bin

    if not os.path.exists(target):
-        try:
-            os.makedirs(target)
-        except FileExistsError:
-            # Another process is prepping workspace, so let's check if
-            # gsutil_bin is present.  If after several checks it's still not,
-            # continue with downloading gsutil.
-            delay = 2  # base delay, in seconds
-            for _ in range(3):  # make N attempts
-                # sleep first as it's not expected to have file ready just yet.
-                time.sleep(delay)
-                delay *= 1.5  # next delay increased by that factor
-                if os.path.isfile(gsutil_bin):
-                    return gsutil_bin
+        os.makedirs(target, exist_ok=True)

-    with temporary_directory(target) as instance_dir:
-        # Clean up if we're redownloading a corrupted gsutil.
-        cleanup_path = os.path.join(instance_dir, 'clean')
-        try:
-            os.rename(bin_dir, cleanup_path)
-        except (OSError, IOError):
-            cleanup_path = None
-        if cleanup_path:
-            shutil.rmtree(cleanup_path)
+    import lockfile
+    import zipfile
+    with lockfile.lock(bin_dir, timeout=30):
+        # Check if gsutil is ready (another process may have had lock).
+        if not clean and os.path.isfile(gsutil_flag):
+            return gsutil_bin

-        download_dir = os.path.join(instance_dir, 'd')
-        target_zip_filename = download_gsutil(version, instance_dir)
-        with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
-            target_zip.extractall(download_dir)
+        with temporary_directory(target) as instance_dir:
+            download_dir = os.path.join(instance_dir, 'd')
+            target_zip_filename = download_gsutil(version, instance_dir)
+            with zipfile.ZipFile(target_zip_filename, 'r') as target_zip:
+                target_zip.extractall(download_dir)

-        shutil.move(download_dir, bin_dir)
-        # Final check that the gsutil bin exists.  This should never fail.
-        if not os.path.isfile(gsutil_bin):
-            raise InvalidGsutilError()
-        # Drop a flag file.
-        with open(gsutil_flag, 'w') as f:
-            f.write('This flag file is dropped by gsutil.py')
+            # Clean up if we're redownloading a corrupted gsutil.
+            cleanup_path = os.path.join(instance_dir, 'clean')
+            try:
+                os.rename(bin_dir, cleanup_path)
+            except (OSError, IOError):
+                cleanup_path = None
+            if cleanup_path:
+                shutil.rmtree(cleanup_path)
+
+            shutil.move(download_dir, bin_dir)
+            # Final check that the gsutil bin exists.  This should never fail.
+            if not os.path.isfile(gsutil_bin):
+                raise InvalidGsutilError()
+            # Drop a flag file.
+            with open(gsutil_flag, 'w') as f:
+                f.write('This flag file is dropped by gsutil.py')

    return gsutil_bin

--- a/lockfile.py
+++ b/lockfile.py
@@ -80,14 +80,16 @@ def _try_lock(lockfile):
 def _lock(path, timeout=0):
    """_lock returns function to release the lock if locking was successful.

-    _lock also implements simple retry logic."""
+    _lock also implements simple retry logic.
+    NOTE: timeout value doesn't include time it takes to aquire lock, just
+    overall sleep time."""
    elapsed = 0
+    sleep_time = 0.1
    while True:
        try:
            return _try_lock(path + '.locked')
        except (OSError, IOError) as e:
            if elapsed < timeout:
-                sleep_time = min(10, timeout - elapsed)
                logging.info(
                    'Could not create git cache lockfile; '
                    'will retry after sleep(%d).', sleep_time)
--- a/tests/lockfile_test.py
+++ b/tests/lockfile_test.py
@@ -102,7 +102,7 @@ class LockTest(unittest.TestCase):
        # One result was consumed by side_effect, we expect only one in the
        # queue.
        self.assertEqual(1, results.qsize())
-        sleep_mock.assert_called_once_with(1)
+        sleep_mock.assert_called_with(0.1)


 if __name__ == '__main__':