Optimize CheckForCommitObjects by batching git ls-tree operations

Significantly reduces the execution time of presubmit checks by
optimizing CheckForCommitObjects.

For CLs with fewer than 1000 affected files, run `git ls-tree` only on
specific files instead of scanning the full tree. This yields a ~70x
speedup (~0.97s -> ~0.01s) for typical CLs.

Change-Id: Ia8b89dbb14a5129ba79944282deba52a3558bdf2
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7234371
Commit-Queue: Josiah Kiehl <kiehl@google.com>
Reviewed-by: Yiwei Zhang <yiwzhang@google.com>
This commit is contained in:
Josiah Kiehl
2025-12-09 15:07:14 -08:00
committed by LUCI CQ
parent 0861b0e55c
commit 7c09ebfd12
3 changed files with 164 additions and 11 deletions

View File

@@ -2074,11 +2074,25 @@ def CheckForCommitObjects(input_api, output_api):
spaceparts = tabparts[0].split(' ', 2) spaceparts = tabparts[0].split(' ', 2)
return (spaceparts[0], spaceparts[1], spaceparts[2], tabparts[1]) return (spaceparts[0], spaceparts[1], spaceparts[2], tabparts[1])
full_tree = input_api.subprocess.check_output( # If the number of affected files is small, we can avoid scanning the entire
['git', 'ls-tree', '-r', '--full-tree', '-z', 'HEAD'], # tree.
cwd=input_api.PresubmitLocalPath()) affected_files = list(input_api.AffectedFiles())
cmd = ['git', 'ls-tree', '-z', '--full-tree', 'HEAD']
if len(affected_files) < 1000:
# We need to pass the paths relative to the repository root.
repo_root = input_api.change.RepositoryRoot()
files_to_check = [
input_api.os_path.relpath(f.AbsoluteLocalPath(), repo_root)
for f in affected_files
]
cmd.extend(['--'] + files_to_check)
else:
cmd.extend(['-r'])
if _GIT_MODE_SUBMODULE not in full_tree: tree_data = input_api.subprocess.check_output(
cmd, cwd=input_api.PresubmitLocalPath())
if _GIT_MODE_SUBMODULE not in tree_data:
return [] return []
# commit_tree_entries holds all commit entries (ie gitlink, submodule # commit_tree_entries holds all commit entries (ie gitlink, submodule
@@ -2086,16 +2100,16 @@ def CheckForCommitObjects(input_api, output_api):
commit_tree_entries = [] commit_tree_entries = []
pos = 0 pos = 0
while True: while True:
pos = full_tree.find(_GIT_MODE_SUBMODULE, pos) pos = tree_data.find(_GIT_MODE_SUBMODULE, pos)
if pos == -1: if pos == -1:
break break
# Check if this occurrence is at the start of an entry. # Check if this occurrence is at the start of an entry.
# It must be at the start of the string or preceded by a null terminator. # It must be at the start of the string or preceded by a null terminator.
if pos == 0 or full_tree[pos - 1] == 0: if pos == 0 or tree_data[pos - 1] == 0:
# Find the end of this entry. # Find the end of this entry.
end = full_tree.find(b'\0', pos) end = tree_data.find(b'\0', pos)
entry = full_tree[pos:end] entry = tree_data[pos:end]
tree_entry = parse_tree_entry(entry.decode('utf-8')) tree_entry = parse_tree_entry(entry.decode('utf-8'))
if tree_entry[1] == 'commit': if tree_entry[1] == 'commit':

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
# Copyright (c) 2025 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import sys
import unittest
from unittest import mock
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import presubmit_canned_checks
from testing_support.presubmit_canned_checks_test_mocks import MockInputApi, MockOutputApi, MockFile
class CheckForCommitObjectsTest(unittest.TestCase):
def setUp(self):
self.input_api = MockInputApi()
self.output_api = MockOutputApi()
self.input_api.change.RepositoryRoot = lambda: '/tmp/repo'
self.input_api.PresubmitLocalPath = lambda: '/tmp/repo'
self.input_api.change.scm = 'git'
# Patch ParseDeps to avoid reading DEPS file
self.patcher = mock.patch('presubmit_canned_checks._ParseDeps')
self.mock_parse_deps = self.patcher.start()
self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'}
def tearDown(self):
self.patcher.stop()
def testBatchedExecutionSmallCL(self):
# 2 files, should run batched git ls-tree
self.input_api.files = [
MockFile(os.path.join('/tmp/repo', 'a.txt'), []),
MockFile(os.path.join('/tmp/repo', 'b.txt'), [])
]
# Mock check_output
self.input_api.subprocess.check_output = mock.Mock(return_value=b'')
presubmit_canned_checks.CheckForCommitObjects(self.input_api,
self.output_api)
# Verify check_output was called with specific files
args = self.input_api.subprocess.check_output.call_args[0][0]
self.assertIn('ls-tree', args)
self.assertIn('a.txt', args)
self.assertIn('b.txt', args)
self.assertIn('--full-tree', args)
def testFullTreeExecutionLargeCL(self):
# 1001 files, should run full tree scan
self.input_api.files = [
MockFile(os.path.join('/tmp/repo', f'f{i}.txt'), [])
for i in range(1001)
]
# Mock check_output
self.input_api.subprocess.check_output = mock.Mock(return_value=b'')
presubmit_canned_checks.CheckForCommitObjects(self.input_api,
self.output_api)
# Verify check_output was called with --full-tree
args = self.input_api.subprocess.check_output.call_args[0][0]
self.assertIn('ls-tree', args)
self.assertIn('--full-tree', args)
self.assertNotIn('f0.txt', args)
def testBatchedFoundCommit(self):
# 1 file, found a commit object (gitlink)
self.input_api.files = [
MockFile(os.path.join('/tmp/repo', 'submodule'), [])
]
# Mock output: 160000 commit <hash>\tsubmodule
# NOTE: The loop in CheckForCommitObjects looks for _GIT_MODE_SUBMODULE (b'160000')
self.input_api.subprocess.check_output = mock.Mock(
return_value=(b'160000 commit 1234567890abcdef\tsubmodule\0'))
results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api)
self.assertEqual(len(results), 1)
self.assertIn('submodule', results[0].message)
if __name__ == '__main__':
unittest.main()

View File

@@ -757,9 +757,6 @@ class CheckAyeAyeTest(unittest.TestCase):
self.assertEqual(results[0].type, 'error') self.assertEqual(results[0].type, 'error')
self.assertIn("Failed to run.", results[0].message) self.assertIn("Failed to run.", results[0].message)
if __name__ == '__main__':
unittest.main()
class CheckForCommitObjectsTest(unittest.TestCase): class CheckForCommitObjectsTest(unittest.TestCase):
@@ -772,6 +769,7 @@ class CheckForCommitObjectsTest(unittest.TestCase):
self.patcher = mock.patch('presubmit_canned_checks._ParseDeps') self.patcher = mock.patch('presubmit_canned_checks._ParseDeps')
self.mock_parse_deps = self.patcher.start() self.mock_parse_deps = self.patcher.start()
self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'} self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'}
self.input_api.change.RepositoryRoot = lambda: ''
def tearDown(self): def tearDown(self):
self.patcher.stop() self.patcher.stop()
@@ -865,3 +863,53 @@ class CheckForCommitObjectsTest(unittest.TestCase):
results = presubmit_canned_checks.CheckForCommitObjects( results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api) self.input_api, self.output_api)
self.assertEqual(0, len(results)) self.assertEqual(0, len(results))
def testRunFromSubdir_SmallFiles_NoSubmodules(self):
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
self.input_api.files = [MockAffectedFile('foo.txt', 'content')]
self.input_api.subprocess.check_output.return_value = b''
results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api)
self.assertEqual(0, len(results))
def testRunFromSubdir_SmallFiles_WithSubmodules(self):
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
self.input_api.files = [MockAffectedFile('foo.txt', 'content')]
self.input_api.subprocess.check_output.return_value = b'160000 commit 1234\tsubmodule\0'
results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api)
self.assertEqual(1, len(results))
self.assertIn('submodule', results[0].items)
def testRunFromSubdir_LargeFiles_NoSubmodules(self):
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
self.input_api.files = [
MockAffectedFile(f'f{i}', '') for i in range(1001)
]
self.input_api.subprocess.check_output.return_value = b''
results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api)
self.assertEqual(0, len(results))
def testRunFromSubdir_LargeFiles_WithSubmodules(self):
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
self.input_api.files = [
MockAffectedFile(f'f{i}', '') for i in range(1001)
]
self.input_api.subprocess.check_output.return_value = b'160000 commit 1234\tsubmodule\0'
results = presubmit_canned_checks.CheckForCommitObjects(
self.input_api, self.output_api)
self.assertEqual(1, len(results))
self.assertIn('submodule', results[0].items)
if __name__ == '__main__':
unittest.main()