From 82130f468a94af49d6dc74bcb9d4b51a293c27ab Mon Sep 17 00:00:00 2001 From: Josiah Kiehl Date: Tue, 9 Dec 2025 10:50:31 -0800 Subject: [PATCH] Speed up presubmit by caching code-owners check Currently, presubmit queries the gerrit server every run to check if the code-owners plugin is enabled. This CL caches that result in a /tmp file per repository. This saves as much as 1.5s per `git cl presubmit`. Change-Id: I1a3631074d1bcbb1a254caa6654fd8434f069aa2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7227749 Commit-Queue: Josiah Kiehl Reviewed-by: Yiwei Zhang --- gclient_scm.py | 2 +- gclient_utils.py | 8 +- gerrit_cache.py | 140 +++++++++++++++++++++++++++++++ owners_client.py | 15 +++- presubmit_support.py | 28 +++++-- tests/gclient_utils_test.py | 27 ++++++ tests/gerrit_cache_test.py | 163 ++++++++++++++++++++++++++++++++++++ 7 files changed, 371 insertions(+), 12 deletions(-) create mode 100644 gerrit_cache.py create mode 100755 tests/gerrit_cache_test.py diff --git a/gclient_scm.py b/gclient_scm.py index e692bf2343..e6b48abb10 100644 --- a/gclient_scm.py +++ b/gclient_scm.py @@ -1421,7 +1421,7 @@ class GitWrapper(SCMWrapper): logging.debug( 'Cloned into temporary dir, moving to checkout_path') gclient_utils.safe_makedirs(self.checkout_path) - gclient_utils.safe_rename( + gclient_utils.safe_replace( os.path.join(tmp_dir, '.git'), os.path.join(self.checkout_path, '.git')) except: diff --git a/gclient_utils.py b/gclient_utils.py index 53432fd961..31ac81efc2 100644 --- a/gclient_utils.py +++ b/gclient_utils.py @@ -250,10 +250,10 @@ def temporary_file(): os.remove(name) -def safe_rename(old, new): - """Renames a file reliably. +def safe_replace(old, new): + """Renames a file reliably, overwriting the destination if it exists. - Sometimes os.rename does not work because a dying git process keeps a handle + Sometimes os.replace does not work because a dying git process keeps a handle on it for a few seconds. An exception is then thrown, which make the program give up what it was doing and remove what was deleted. The only solution is to catch the exception and try again until it works. @@ -262,7 +262,7 @@ def safe_rename(old, new): retries = 100 for i in range(retries): try: - os.rename(old, new) + os.replace(old, new) break except OSError: if i == (retries - 1): diff --git a/gerrit_cache.py b/gerrit_cache.py new file mode 100644 index 0000000000..47101e8a52 --- /dev/null +++ b/gerrit_cache.py @@ -0,0 +1,140 @@ +# Copyright (c) 2025 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import contextlib +import json +import os +import re +import tempfile + +import gclient_utils +import lockfile +import scm + + +@contextlib.contextmanager +def _AtomicFileWriter(path, mode='w'): + """Atomic file writer context manager.""" + # Create temp file in same directory to ensure atomic rename works + fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(path), + text='b' not in mode) + try: + with os.fdopen(fd, mode) as f: + yield f + # Atomic rename + gclient_utils.safe_replace(temp_path, path) + except Exception: + # Cleanup on failure + if os.path.exists(temp_path): + os.remove(temp_path) + raise + + +class GerritCache(object): + """Simple JSON file-based cache for Gerrit API results.""" + + def __init__(self, root_dir): + self.root_dir = root_dir + self.cache_path = self._get_cache_path() + + @staticmethod + def get_repo_code_owners_enabled_key(host, project): + return 'code-owners.%s.%s.enabled' % (host, project) + + @staticmethod + def get_host_code_owners_enabled_key(host): + return 'code-owners.%s.enabled' % host + + def _get_cache_path(self): + path = os.environ.get('DEPOT_TOOLS_GERRIT_CACHE_PATH') + if path: + return path + + try: + path = scm.GIT.GetConfig(self.root_dir, + 'depot-tools.gerrit-cache-path') + if path: + return path + except Exception: + pass + + try: + if self.root_dir: + # Use a deterministic name based on the repo path + sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', + os.path.abspath(self.root_dir)) + filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path + path = os.path.join(tempfile.gettempdir(), filename) + else: + fd, path = tempfile.mkstemp(prefix='depot_tools_gerrit_cache_', + suffix='.json') + os.close(fd) + + if not os.path.exists(path): + # Initialize with empty JSON object + with open(path, 'w') as f: + json.dump({}, f) + + try: + scm.GIT.SetConfig(self.root_dir, + 'depot-tools.gerrit-cache-path', path) + except Exception: + # If we can't set config (e.g. not a git repo and no env var + # set), just return the temp path. It will be a per-process + # cache in that case. + pass + return path + except Exception: + # Fallback to random temp file if everything else fails + fd, path = tempfile.mkstemp(prefix='gerrit_cache_', suffix='.json') + os.close(fd) + return path + + def get(self, key): + if not self.cache_path: + return None + try: + with lockfile.lock(self.cache_path, timeout=1): + if os.path.exists(self.cache_path): + with open(self.cache_path, 'r') as f: + try: + data = json.load(f) + return data.get(key) + except ValueError: + # Corrupt cache file, treat as miss + return None + except Exception: + # Ignore cache errors + return None + + def set(self, key, value): + if not self.cache_path: + return + try: + with lockfile.lock(self.cache_path, timeout=1): + data = {} + if os.path.exists(self.cache_path): + with open(self.cache_path, 'r') as f: + try: + data = json.load(f) + except ValueError: + # Corrupt cache, start fresh + data = {} + data[key] = value + with _AtomicFileWriter(self.cache_path, 'w') as f: + json.dump(data, f) + except Exception: + # Ignore cache errors + pass + + def getBoolean(self, key): + """Returns the value for key as a boolean, or None if missing.""" + val = self.get(key) + if val is None: + return None + return bool(val) + + def setBoolean(self, key, value): + """Sets the value for key as a boolean.""" + self.set(key, bool(value)) diff --git a/owners_client.py b/owners_client.py index 1ea13d966f..108b6333f1 100644 --- a/owners_client.py +++ b/owners_client.py @@ -6,6 +6,7 @@ import os import random import gerrit_util +from gerrit_cache import GerritCache import git_common @@ -213,12 +214,22 @@ class GerritClient(OwnersClient): paths)) -def GetCodeOwnersClient(host, project, branch): +def GetCodeOwnersClient(host, project, branch, cache=None): """Get a new OwnersClient. Uses GerritClient and raises an exception if code-owners plugin is not available.""" - if gerrit_util.IsCodeOwnersEnabledOnHost(host): + key = GerritCache.get_host_code_owners_enabled_key(host) + is_enabled = None + if cache: + is_enabled = cache.getBoolean(key) + + if is_enabled is None: + is_enabled = gerrit_util.IsCodeOwnersEnabledOnHost(host) + if cache: + cache.setBoolean(key, is_enabled) + + if is_enabled: return GerritClient(host, project, branch) raise Exception( 'code-owners plugin is not enabled. Ask your host admin to enable it ' diff --git a/presubmit_support.py b/presubmit_support.py index eb6cbc0b99..8540897810 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -45,6 +45,8 @@ import gclient_paths # Exposed through the API import gclient_utils import git_footers import gerrit_util +from gerrit_cache import GerritCache +import lockfile import owners_client import owners_finder import presubmit_canned_checks @@ -499,12 +501,14 @@ class GerritAccessor(object): To avoid excessive Gerrit calls, caches the results. """ - def __init__(self, url=None, project=None, branch=None): + + def __init__(self, url=None, project=None, branch=None, root=None): self.host = urlparse.urlparse(url).netloc if url else None self.project = project self.branch = branch self.cache = {} self.code_owners_enabled = None + self.persistent_cache = GerritCache(root) def _FetchChangeDetail(self, issue): # Separate function to be easily mocked in tests. @@ -597,8 +601,19 @@ class GerritAccessor(object): def IsCodeOwnersEnabledOnRepo(self): if self.code_owners_enabled is None: - self.code_owners_enabled = gerrit_util.IsCodeOwnersEnabledOnRepo( - self.host, self.project) + if self.persistent_cache: + key = GerritCache.get_repo_code_owners_enabled_key( + self.host, self.project) + self.code_owners_enabled = self.persistent_cache.getBoolean(key) + if self.code_owners_enabled is None: + self.code_owners_enabled = gerrit_util.IsCodeOwnersEnabledOnRepo( + self.host, self.project) + self.persistent_cache.setBoolean(key, + self.code_owners_enabled) + else: + self.code_owners_enabled = gerrit_util.IsCodeOwnersEnabledOnRepo( + self.host, self.project) + return self.code_owners_enabled @@ -790,7 +805,8 @@ class InputApi(object): self.owners_client = owners_client.GetCodeOwnersClient( host=self.gerrit.host, project=self.gerrit.project, - branch=self.gerrit.branch) + branch=self.gerrit.branch, + cache=self.gerrit.persistent_cache) except Exception as e: print('Failed to set owners_client - %s' % str(e)) self.owners_finder = owners_finder.OwnersFinder @@ -808,6 +824,7 @@ class InputApi(object): (a, b, header) for (a, b, header) in cpplint._re_pattern_templates ] + def SetTimeout(self, timeout): self.thread_pool.timeout = timeout @@ -2335,7 +2352,8 @@ def _parse_gerrit_options(parser, options): if options.gerrit_url: gerrit_obj = GerritAccessor(url=options.gerrit_url, project=options.gerrit_project, - branch=options.gerrit_branch) + branch=options.gerrit_branch, + root=options.root) if not options.gerrit_fetch: return gerrit_obj diff --git a/tests/gclient_utils_test.py b/tests/gclient_utils_test.py index e4b41a8f1e..b8abc21003 100755 --- a/tests/gclient_utils_test.py +++ b/tests/gclient_utils_test.py @@ -419,4 +419,31 @@ class GClientUtilsTest(trial_dir.TestCase): if __name__ == '__main__': unittest.main() + +class SafeReplaceTest(unittest.TestCase): + + def setUp(self): + self.mock_os = mock.patch('gclient_utils.os').start() + self.addCleanup(mock.patch.stopall) + + def testReplace(self): + gclient_utils.safe_replace('old', 'new') + self.mock_os.replace.assert_called_with('old', 'new') + + def testReplaceRetry(self): + self.mock_os.replace.side_effect = [OSError, OSError, None] + with mock.patch('time.sleep') as mock_sleep: + gclient_utils.safe_replace('old', 'new') + self.assertEqual(self.mock_os.replace.call_count, 3) + self.assertEqual(mock_sleep.call_count, 2) + + def testReplaceFailure(self): + self.mock_os.replace.side_effect = OSError + with mock.patch('time.sleep'): + with self.assertRaises(OSError): + gclient_utils.safe_replace('old', 'new') + + + + # vim: ts=2:sw=2:tw=80:et: diff --git a/tests/gerrit_cache_test.py b/tests/gerrit_cache_test.py new file mode 100755 index 0000000000..0a7754247b --- /dev/null +++ b/tests/gerrit_cache_test.py @@ -0,0 +1,163 @@ +#!/usr/bin/env vpython3 +# Copyright (c) 2025 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import os +import re +import shutil +import sys +import tempfile +import unittest + +ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, ROOT_DIR) + +import gerrit_cache +import unittest.mock as mock + + +class AtomicFileWriterTest(unittest.TestCase): + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.test_dir) + + def testSuccess(self): + path = os.path.join(self.test_dir, 'test_file') + with gerrit_cache._AtomicFileWriter(path, 'w') as f: + f.write('content') + + self.assertTrue(os.path.exists(path)) + with open(path, 'r') as f: + self.assertEqual(f.read(), 'content') + + def testFailure(self): + path = os.path.join(self.test_dir, 'test_file') + try: + with gerrit_cache._AtomicFileWriter(path, 'w') as f: + f.write('content') + raise Exception('oops') + except Exception: + pass + + self.assertFalse(os.path.exists(path)) + + def testOverwrite(self): + path = os.path.join(self.test_dir, 'test_file') + with open(path, 'w') as f: + f.write('old') + + with gerrit_cache._AtomicFileWriter(path, 'w') as f: + f.write('new') + + with open(path, 'r') as f: + self.assertEqual(f.read(), 'new') + + def testOverwriteFailure(self): + path = os.path.join(self.test_dir, 'test_file') + with open(path, 'w') as f: + f.write('old') + + try: + with gerrit_cache._AtomicFileWriter(path, 'w') as f: + f.write('new') + raise Exception('oops') + except Exception: + pass + + with open(path, 'r') as f: + self.assertEqual(f.read(), 'old') + + +class GerritCacheTest(unittest.TestCase): + + def setUp(self): + self.test_dir = tempfile.mkdtemp() + self.addCleanup(shutil.rmtree, self.test_dir) + self.mock_git = mock.patch('gerrit_cache.scm.GIT').start() + self.addCleanup(mock.patch.stopall) + self.mock_git.GetConfig.return_value = None + + def testGetSet(self): + cache = gerrit_cache.GerritCache(self.test_dir) + cache.set('key', 'value') + self.assertEqual(cache.get('key'), 'value') + + # Verify persistence + cache2 = gerrit_cache.GerritCache(self.test_dir) + self.assertEqual(cache2.get('key'), 'value') + + def testGetSetBoolean(self): + cache = gerrit_cache.GerritCache(self.test_dir) + cache.setBoolean('bool_key', True) + self.assertTrue(cache.getBoolean('bool_key')) + + cache2 = gerrit_cache.GerritCache(self.test_dir) + self.assertTrue(cache2.getBoolean('bool_key')) + + def testGetMissing(self): + cache = gerrit_cache.GerritCache(self.test_dir) + self.assertIsNone(cache.get('missing')) + + def testCachePathFromEnv(self): + with mock.patch.dict( + os.environ, { + 'DEPOT_TOOLS_GERRIT_CACHE_PATH': + os.path.join(self.test_dir, 'env_cache.json') + }): + cache = gerrit_cache.GerritCache(self.test_dir) + cache.set('key', 'env_value') + self.assertEqual(cache.get('key'), 'env_value') + self.assertTrue( + os.path.exists(os.path.join(self.test_dir, 'env_cache.json'))) + + def testCachePathFromGitConfig(self): + config_path = os.path.join(self.test_dir, 'config_cache.json') + self.mock_git.GetConfig.return_value = config_path + cache = gerrit_cache.GerritCache(self.test_dir) + self.assertEqual(cache.cache_path, config_path) + + def testCachePathCreation(self): + # When no env var and no config, it should create a deterministic temp file and set config + cache = gerrit_cache.GerritCache(self.test_dir) + sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', + os.path.abspath(self.test_dir)) + expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path + self.assertTrue(cache.cache_path.endswith(expected_filename)) + self.assertTrue(cache.cache_path.startswith(tempfile.gettempdir())) + self.mock_git.SetConfig.assert_called_with( + self.test_dir, 'depot-tools.gerrit-cache-path', cache.cache_path) + + def testCachePathCreationLinux(self): + root_dir = '/usr/local/google/home/user/repo' + with mock.patch('os.path.abspath', return_value=root_dir): + cache = gerrit_cache.GerritCache(root_dir) + sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', root_dir) + expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path + self.assertTrue(cache.cache_path.endswith(expected_filename)) + + def testCachePathCreationWindows(self): + root_dir = r'C:\Users\user\repo' + # On Linux, os.path.abspath('C:\Users\user\repo') will prepend current cwd. + with mock.patch('os.path.abspath', return_value=root_dir): + cache = gerrit_cache.GerritCache(root_dir) + sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', root_dir) + expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path + self.assertTrue(cache.cache_path.endswith(expected_filename)) + + def testCorruptCache(self): + cache = gerrit_cache.GerritCache(self.test_dir) + # Write garbage to the cache file + with open(cache.cache_path, 'w') as f: + f.write('this is not json') + + # Should not crash, just return None + self.assertIsNone(cache.get('key')) + + cache.set('key', 'value') + self.assertEqual(cache.get('key'), 'value') + + +if __name__ == "__main__": + unittest.main()