Speed up presubmit by caching code-owners check

Currently, presubmit queries the gerrit server every run to check if the
code-owners plugin is enabled.

This CL caches that result in a /tmp file per repository.

This saves as much as 1.5s per `git cl presubmit`.

Change-Id: I1a3631074d1bcbb1a254caa6654fd8434f069aa2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7227749
Commit-Queue: Josiah Kiehl <kiehl@google.com>
Reviewed-by: Yiwei Zhang <yiwzhang@google.com>
This commit is contained in:
Josiah Kiehl
2025-12-09 10:50:31 -08:00
committed by LUCI CQ
parent f520a9305e
commit 82130f468a
7 changed files with 371 additions and 12 deletions

View File

@@ -1421,7 +1421,7 @@ class GitWrapper(SCMWrapper):
logging.debug(
'Cloned into temporary dir, moving to checkout_path')
gclient_utils.safe_makedirs(self.checkout_path)
gclient_utils.safe_rename(
gclient_utils.safe_replace(
os.path.join(tmp_dir, '.git'),
os.path.join(self.checkout_path, '.git'))
except:

View File

@@ -250,10 +250,10 @@ def temporary_file():
os.remove(name)
def safe_rename(old, new):
"""Renames a file reliably.
def safe_replace(old, new):
"""Renames a file reliably, overwriting the destination if it exists.
Sometimes os.rename does not work because a dying git process keeps a handle
Sometimes os.replace does not work because a dying git process keeps a handle
on it for a few seconds. An exception is then thrown, which make the program
give up what it was doing and remove what was deleted.
The only solution is to catch the exception and try again until it works.
@@ -262,7 +262,7 @@ def safe_rename(old, new):
retries = 100
for i in range(retries):
try:
os.rename(old, new)
os.replace(old, new)
break
except OSError:
if i == (retries - 1):

140
gerrit_cache.py Normal file
View File

@@ -0,0 +1,140 @@
# Copyright (c) 2025 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import contextlib
import json
import os
import re
import tempfile
import gclient_utils
import lockfile
import scm
@contextlib.contextmanager
def _AtomicFileWriter(path, mode='w'):
"""Atomic file writer context manager."""
# Create temp file in same directory to ensure atomic rename works
fd, temp_path = tempfile.mkstemp(dir=os.path.dirname(path),
text='b' not in mode)
try:
with os.fdopen(fd, mode) as f:
yield f
# Atomic rename
gclient_utils.safe_replace(temp_path, path)
except Exception:
# Cleanup on failure
if os.path.exists(temp_path):
os.remove(temp_path)
raise
class GerritCache(object):
"""Simple JSON file-based cache for Gerrit API results."""
def __init__(self, root_dir):
self.root_dir = root_dir
self.cache_path = self._get_cache_path()
@staticmethod
def get_repo_code_owners_enabled_key(host, project):
return 'code-owners.%s.%s.enabled' % (host, project)
@staticmethod
def get_host_code_owners_enabled_key(host):
return 'code-owners.%s.enabled' % host
def _get_cache_path(self):
path = os.environ.get('DEPOT_TOOLS_GERRIT_CACHE_PATH')
if path:
return path
try:
path = scm.GIT.GetConfig(self.root_dir,
'depot-tools.gerrit-cache-path')
if path:
return path
except Exception:
pass
try:
if self.root_dir:
# Use a deterministic name based on the repo path
sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_',
os.path.abspath(self.root_dir))
filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path
path = os.path.join(tempfile.gettempdir(), filename)
else:
fd, path = tempfile.mkstemp(prefix='depot_tools_gerrit_cache_',
suffix='.json')
os.close(fd)
if not os.path.exists(path):
# Initialize with empty JSON object
with open(path, 'w') as f:
json.dump({}, f)
try:
scm.GIT.SetConfig(self.root_dir,
'depot-tools.gerrit-cache-path', path)
except Exception:
# If we can't set config (e.g. not a git repo and no env var
# set), just return the temp path. It will be a per-process
# cache in that case.
pass
return path
except Exception:
# Fallback to random temp file if everything else fails
fd, path = tempfile.mkstemp(prefix='gerrit_cache_', suffix='.json')
os.close(fd)
return path
def get(self, key):
if not self.cache_path:
return None
try:
with lockfile.lock(self.cache_path, timeout=1):
if os.path.exists(self.cache_path):
with open(self.cache_path, 'r') as f:
try:
data = json.load(f)
return data.get(key)
except ValueError:
# Corrupt cache file, treat as miss
return None
except Exception:
# Ignore cache errors
return None
def set(self, key, value):
if not self.cache_path:
return
try:
with lockfile.lock(self.cache_path, timeout=1):
data = {}
if os.path.exists(self.cache_path):
with open(self.cache_path, 'r') as f:
try:
data = json.load(f)
except ValueError:
# Corrupt cache, start fresh
data = {}
data[key] = value
with _AtomicFileWriter(self.cache_path, 'w') as f:
json.dump(data, f)
except Exception:
# Ignore cache errors
pass
def getBoolean(self, key):
"""Returns the value for key as a boolean, or None if missing."""
val = self.get(key)
if val is None:
return None
return bool(val)
def setBoolean(self, key, value):
"""Sets the value for key as a boolean."""
self.set(key, bool(value))

View File

@@ -6,6 +6,7 @@ import os
import random
import gerrit_util
from gerrit_cache import GerritCache
import git_common
@@ -213,12 +214,22 @@ class GerritClient(OwnersClient):
paths))
def GetCodeOwnersClient(host, project, branch):
def GetCodeOwnersClient(host, project, branch, cache=None):
"""Get a new OwnersClient.
Uses GerritClient and raises an exception if code-owners plugin is not
available."""
if gerrit_util.IsCodeOwnersEnabledOnHost(host):
key = GerritCache.get_host_code_owners_enabled_key(host)
is_enabled = None
if cache:
is_enabled = cache.getBoolean(key)
if is_enabled is None:
is_enabled = gerrit_util.IsCodeOwnersEnabledOnHost(host)
if cache:
cache.setBoolean(key, is_enabled)
if is_enabled:
return GerritClient(host, project, branch)
raise Exception(
'code-owners plugin is not enabled. Ask your host admin to enable it '

View File

@@ -45,6 +45,8 @@ import gclient_paths # Exposed through the API
import gclient_utils
import git_footers
import gerrit_util
from gerrit_cache import GerritCache
import lockfile
import owners_client
import owners_finder
import presubmit_canned_checks
@@ -499,12 +501,14 @@ class GerritAccessor(object):
To avoid excessive Gerrit calls, caches the results.
"""
def __init__(self, url=None, project=None, branch=None):
def __init__(self, url=None, project=None, branch=None, root=None):
self.host = urlparse.urlparse(url).netloc if url else None
self.project = project
self.branch = branch
self.cache = {}
self.code_owners_enabled = None
self.persistent_cache = GerritCache(root)
def _FetchChangeDetail(self, issue):
# Separate function to be easily mocked in tests.
@@ -596,9 +600,20 @@ class GerritAccessor(object):
notify='NONE')
def IsCodeOwnersEnabledOnRepo(self):
if self.code_owners_enabled is None:
if self.persistent_cache:
key = GerritCache.get_repo_code_owners_enabled_key(
self.host, self.project)
self.code_owners_enabled = self.persistent_cache.getBoolean(key)
if self.code_owners_enabled is None:
self.code_owners_enabled = gerrit_util.IsCodeOwnersEnabledOnRepo(
self.host, self.project)
self.persistent_cache.setBoolean(key,
self.code_owners_enabled)
else:
self.code_owners_enabled = gerrit_util.IsCodeOwnersEnabledOnRepo(
self.host, self.project)
return self.code_owners_enabled
@@ -790,7 +805,8 @@ class InputApi(object):
self.owners_client = owners_client.GetCodeOwnersClient(
host=self.gerrit.host,
project=self.gerrit.project,
branch=self.gerrit.branch)
branch=self.gerrit.branch,
cache=self.gerrit.persistent_cache)
except Exception as e:
print('Failed to set owners_client - %s' % str(e))
self.owners_finder = owners_finder.OwnersFinder
@@ -808,6 +824,7 @@ class InputApi(object):
(a, b, header) for (a, b, header) in cpplint._re_pattern_templates
]
def SetTimeout(self, timeout):
self.thread_pool.timeout = timeout
@@ -2335,7 +2352,8 @@ def _parse_gerrit_options(parser, options):
if options.gerrit_url:
gerrit_obj = GerritAccessor(url=options.gerrit_url,
project=options.gerrit_project,
branch=options.gerrit_branch)
branch=options.gerrit_branch,
root=options.root)
if not options.gerrit_fetch:
return gerrit_obj

View File

@@ -419,4 +419,31 @@ class GClientUtilsTest(trial_dir.TestCase):
if __name__ == '__main__':
unittest.main()
class SafeReplaceTest(unittest.TestCase):
def setUp(self):
self.mock_os = mock.patch('gclient_utils.os').start()
self.addCleanup(mock.patch.stopall)
def testReplace(self):
gclient_utils.safe_replace('old', 'new')
self.mock_os.replace.assert_called_with('old', 'new')
def testReplaceRetry(self):
self.mock_os.replace.side_effect = [OSError, OSError, None]
with mock.patch('time.sleep') as mock_sleep:
gclient_utils.safe_replace('old', 'new')
self.assertEqual(self.mock_os.replace.call_count, 3)
self.assertEqual(mock_sleep.call_count, 2)
def testReplaceFailure(self):
self.mock_os.replace.side_effect = OSError
with mock.patch('time.sleep'):
with self.assertRaises(OSError):
gclient_utils.safe_replace('old', 'new')
# vim: ts=2:sw=2:tw=80:et:

163
tests/gerrit_cache_test.py Executable file
View File

@@ -0,0 +1,163 @@
#!/usr/bin/env vpython3
# Copyright (c) 2025 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import shutil
import sys
import tempfile
import unittest
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, ROOT_DIR)
import gerrit_cache
import unittest.mock as mock
class AtomicFileWriterTest(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.test_dir)
def testSuccess(self):
path = os.path.join(self.test_dir, 'test_file')
with gerrit_cache._AtomicFileWriter(path, 'w') as f:
f.write('content')
self.assertTrue(os.path.exists(path))
with open(path, 'r') as f:
self.assertEqual(f.read(), 'content')
def testFailure(self):
path = os.path.join(self.test_dir, 'test_file')
try:
with gerrit_cache._AtomicFileWriter(path, 'w') as f:
f.write('content')
raise Exception('oops')
except Exception:
pass
self.assertFalse(os.path.exists(path))
def testOverwrite(self):
path = os.path.join(self.test_dir, 'test_file')
with open(path, 'w') as f:
f.write('old')
with gerrit_cache._AtomicFileWriter(path, 'w') as f:
f.write('new')
with open(path, 'r') as f:
self.assertEqual(f.read(), 'new')
def testOverwriteFailure(self):
path = os.path.join(self.test_dir, 'test_file')
with open(path, 'w') as f:
f.write('old')
try:
with gerrit_cache._AtomicFileWriter(path, 'w') as f:
f.write('new')
raise Exception('oops')
except Exception:
pass
with open(path, 'r') as f:
self.assertEqual(f.read(), 'old')
class GerritCacheTest(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()
self.addCleanup(shutil.rmtree, self.test_dir)
self.mock_git = mock.patch('gerrit_cache.scm.GIT').start()
self.addCleanup(mock.patch.stopall)
self.mock_git.GetConfig.return_value = None
def testGetSet(self):
cache = gerrit_cache.GerritCache(self.test_dir)
cache.set('key', 'value')
self.assertEqual(cache.get('key'), 'value')
# Verify persistence
cache2 = gerrit_cache.GerritCache(self.test_dir)
self.assertEqual(cache2.get('key'), 'value')
def testGetSetBoolean(self):
cache = gerrit_cache.GerritCache(self.test_dir)
cache.setBoolean('bool_key', True)
self.assertTrue(cache.getBoolean('bool_key'))
cache2 = gerrit_cache.GerritCache(self.test_dir)
self.assertTrue(cache2.getBoolean('bool_key'))
def testGetMissing(self):
cache = gerrit_cache.GerritCache(self.test_dir)
self.assertIsNone(cache.get('missing'))
def testCachePathFromEnv(self):
with mock.patch.dict(
os.environ, {
'DEPOT_TOOLS_GERRIT_CACHE_PATH':
os.path.join(self.test_dir, 'env_cache.json')
}):
cache = gerrit_cache.GerritCache(self.test_dir)
cache.set('key', 'env_value')
self.assertEqual(cache.get('key'), 'env_value')
self.assertTrue(
os.path.exists(os.path.join(self.test_dir, 'env_cache.json')))
def testCachePathFromGitConfig(self):
config_path = os.path.join(self.test_dir, 'config_cache.json')
self.mock_git.GetConfig.return_value = config_path
cache = gerrit_cache.GerritCache(self.test_dir)
self.assertEqual(cache.cache_path, config_path)
def testCachePathCreation(self):
# When no env var and no config, it should create a deterministic temp file and set config
cache = gerrit_cache.GerritCache(self.test_dir)
sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_',
os.path.abspath(self.test_dir))
expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path
self.assertTrue(cache.cache_path.endswith(expected_filename))
self.assertTrue(cache.cache_path.startswith(tempfile.gettempdir()))
self.mock_git.SetConfig.assert_called_with(
self.test_dir, 'depot-tools.gerrit-cache-path', cache.cache_path)
def testCachePathCreationLinux(self):
root_dir = '/usr/local/google/home/user/repo'
with mock.patch('os.path.abspath', return_value=root_dir):
cache = gerrit_cache.GerritCache(root_dir)
sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', root_dir)
expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path
self.assertTrue(cache.cache_path.endswith(expected_filename))
def testCachePathCreationWindows(self):
root_dir = r'C:\Users\user\repo'
# On Linux, os.path.abspath('C:\Users\user\repo') will prepend current cwd.
with mock.patch('os.path.abspath', return_value=root_dir):
cache = gerrit_cache.GerritCache(root_dir)
sanitized_path = re.sub(r'[^a-zA-Z0-9]', '_', root_dir)
expected_filename = 'depot_tools_gerrit_cache_%s.json' % sanitized_path
self.assertTrue(cache.cache_path.endswith(expected_filename))
def testCorruptCache(self):
cache = gerrit_cache.GerritCache(self.test_dir)
# Write garbage to the cache file
with open(cache.cache_path, 'w') as f:
f.write('this is not json')
# Should not crash, just return None
self.assertIsNone(cache.get('key'))
cache.set('key', 'value')
self.assertEqual(cache.get('key'), 'value')
if __name__ == "__main__":
unittest.main()