mirror of
https://chromium.googlesource.com/chromium/tools/depot_tools.git
synced 2026-01-11 18:51:29 +00:00
Optimize CheckForCommitObjects by batching git ls-tree operations
Significantly reduces the execution time of presubmit checks by optimizing CheckForCommitObjects. For CLs with fewer than 1000 affected files, run `git ls-tree` only on specific files instead of scanning the full tree. This yields a ~70x speedup (~0.97s -> ~0.01s) for typical CLs. Change-Id: Ia8b89dbb14a5129ba79944282deba52a3558bdf2 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7234371 Commit-Queue: Josiah Kiehl <kiehl@google.com> Reviewed-by: Yiwei Zhang <yiwzhang@google.com>
This commit is contained in:
@@ -2074,11 +2074,25 @@ def CheckForCommitObjects(input_api, output_api):
|
|||||||
spaceparts = tabparts[0].split(' ', 2)
|
spaceparts = tabparts[0].split(' ', 2)
|
||||||
return (spaceparts[0], spaceparts[1], spaceparts[2], tabparts[1])
|
return (spaceparts[0], spaceparts[1], spaceparts[2], tabparts[1])
|
||||||
|
|
||||||
full_tree = input_api.subprocess.check_output(
|
# If the number of affected files is small, we can avoid scanning the entire
|
||||||
['git', 'ls-tree', '-r', '--full-tree', '-z', 'HEAD'],
|
# tree.
|
||||||
cwd=input_api.PresubmitLocalPath())
|
affected_files = list(input_api.AffectedFiles())
|
||||||
|
cmd = ['git', 'ls-tree', '-z', '--full-tree', 'HEAD']
|
||||||
|
if len(affected_files) < 1000:
|
||||||
|
# We need to pass the paths relative to the repository root.
|
||||||
|
repo_root = input_api.change.RepositoryRoot()
|
||||||
|
files_to_check = [
|
||||||
|
input_api.os_path.relpath(f.AbsoluteLocalPath(), repo_root)
|
||||||
|
for f in affected_files
|
||||||
|
]
|
||||||
|
cmd.extend(['--'] + files_to_check)
|
||||||
|
else:
|
||||||
|
cmd.extend(['-r'])
|
||||||
|
|
||||||
if _GIT_MODE_SUBMODULE not in full_tree:
|
tree_data = input_api.subprocess.check_output(
|
||||||
|
cmd, cwd=input_api.PresubmitLocalPath())
|
||||||
|
|
||||||
|
if _GIT_MODE_SUBMODULE not in tree_data:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# commit_tree_entries holds all commit entries (ie gitlink, submodule
|
# commit_tree_entries holds all commit entries (ie gitlink, submodule
|
||||||
@@ -2086,16 +2100,16 @@ def CheckForCommitObjects(input_api, output_api):
|
|||||||
commit_tree_entries = []
|
commit_tree_entries = []
|
||||||
pos = 0
|
pos = 0
|
||||||
while True:
|
while True:
|
||||||
pos = full_tree.find(_GIT_MODE_SUBMODULE, pos)
|
pos = tree_data.find(_GIT_MODE_SUBMODULE, pos)
|
||||||
if pos == -1:
|
if pos == -1:
|
||||||
break
|
break
|
||||||
|
|
||||||
# Check if this occurrence is at the start of an entry.
|
# Check if this occurrence is at the start of an entry.
|
||||||
# It must be at the start of the string or preceded by a null terminator.
|
# It must be at the start of the string or preceded by a null terminator.
|
||||||
if pos == 0 or full_tree[pos - 1] == 0:
|
if pos == 0 or tree_data[pos - 1] == 0:
|
||||||
# Find the end of this entry.
|
# Find the end of this entry.
|
||||||
end = full_tree.find(b'\0', pos)
|
end = tree_data.find(b'\0', pos)
|
||||||
entry = full_tree[pos:end]
|
entry = tree_data[pos:end]
|
||||||
|
|
||||||
tree_entry = parse_tree_entry(entry.decode('utf-8'))
|
tree_entry = parse_tree_entry(entry.decode('utf-8'))
|
||||||
if tree_entry[1] == 'commit':
|
if tree_entry[1] == 'commit':
|
||||||
|
|||||||
91
tests/check_for_commit_objects_test.py
Normal file
91
tests/check_for_commit_objects_test.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright (c) 2025 The Chromium Authors. All rights reserved.
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE file.
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import unittest
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||||
|
|
||||||
|
import presubmit_canned_checks
|
||||||
|
from testing_support.presubmit_canned_checks_test_mocks import MockInputApi, MockOutputApi, MockFile
|
||||||
|
|
||||||
|
|
||||||
|
class CheckForCommitObjectsTest(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.input_api = MockInputApi()
|
||||||
|
self.output_api = MockOutputApi()
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: '/tmp/repo'
|
||||||
|
self.input_api.PresubmitLocalPath = lambda: '/tmp/repo'
|
||||||
|
self.input_api.change.scm = 'git'
|
||||||
|
|
||||||
|
# Patch ParseDeps to avoid reading DEPS file
|
||||||
|
self.patcher = mock.patch('presubmit_canned_checks._ParseDeps')
|
||||||
|
self.mock_parse_deps = self.patcher.start()
|
||||||
|
self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'}
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
self.patcher.stop()
|
||||||
|
|
||||||
|
def testBatchedExecutionSmallCL(self):
|
||||||
|
# 2 files, should run batched git ls-tree
|
||||||
|
self.input_api.files = [
|
||||||
|
MockFile(os.path.join('/tmp/repo', 'a.txt'), []),
|
||||||
|
MockFile(os.path.join('/tmp/repo', 'b.txt'), [])
|
||||||
|
]
|
||||||
|
|
||||||
|
# Mock check_output
|
||||||
|
self.input_api.subprocess.check_output = mock.Mock(return_value=b'')
|
||||||
|
|
||||||
|
presubmit_canned_checks.CheckForCommitObjects(self.input_api,
|
||||||
|
self.output_api)
|
||||||
|
|
||||||
|
# Verify check_output was called with specific files
|
||||||
|
args = self.input_api.subprocess.check_output.call_args[0][0]
|
||||||
|
self.assertIn('ls-tree', args)
|
||||||
|
self.assertIn('a.txt', args)
|
||||||
|
self.assertIn('b.txt', args)
|
||||||
|
self.assertIn('--full-tree', args)
|
||||||
|
|
||||||
|
def testFullTreeExecutionLargeCL(self):
|
||||||
|
# 1001 files, should run full tree scan
|
||||||
|
self.input_api.files = [
|
||||||
|
MockFile(os.path.join('/tmp/repo', f'f{i}.txt'), [])
|
||||||
|
for i in range(1001)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Mock check_output
|
||||||
|
self.input_api.subprocess.check_output = mock.Mock(return_value=b'')
|
||||||
|
|
||||||
|
presubmit_canned_checks.CheckForCommitObjects(self.input_api,
|
||||||
|
self.output_api)
|
||||||
|
|
||||||
|
# Verify check_output was called with --full-tree
|
||||||
|
args = self.input_api.subprocess.check_output.call_args[0][0]
|
||||||
|
self.assertIn('ls-tree', args)
|
||||||
|
self.assertIn('--full-tree', args)
|
||||||
|
self.assertNotIn('f0.txt', args)
|
||||||
|
|
||||||
|
def testBatchedFoundCommit(self):
|
||||||
|
# 1 file, found a commit object (gitlink)
|
||||||
|
self.input_api.files = [
|
||||||
|
MockFile(os.path.join('/tmp/repo', 'submodule'), [])
|
||||||
|
]
|
||||||
|
|
||||||
|
# Mock output: 160000 commit <hash>\tsubmodule
|
||||||
|
# NOTE: The loop in CheckForCommitObjects looks for _GIT_MODE_SUBMODULE (b'160000')
|
||||||
|
self.input_api.subprocess.check_output = mock.Mock(
|
||||||
|
return_value=(b'160000 commit 1234567890abcdef\tsubmodule\0'))
|
||||||
|
|
||||||
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
|
self.input_api, self.output_api)
|
||||||
|
self.assertEqual(len(results), 1)
|
||||||
|
self.assertIn('submodule', results[0].message)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
@@ -757,9 +757,6 @@ class CheckAyeAyeTest(unittest.TestCase):
|
|||||||
self.assertEqual(results[0].type, 'error')
|
self.assertEqual(results[0].type, 'error')
|
||||||
self.assertIn("Failed to run.", results[0].message)
|
self.assertIn("Failed to run.", results[0].message)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
unittest.main()
|
|
||||||
|
|
||||||
|
|
||||||
class CheckForCommitObjectsTest(unittest.TestCase):
|
class CheckForCommitObjectsTest(unittest.TestCase):
|
||||||
|
|
||||||
@@ -772,6 +769,7 @@ class CheckForCommitObjectsTest(unittest.TestCase):
|
|||||||
self.patcher = mock.patch('presubmit_canned_checks._ParseDeps')
|
self.patcher = mock.patch('presubmit_canned_checks._ParseDeps')
|
||||||
self.mock_parse_deps = self.patcher.start()
|
self.mock_parse_deps = self.patcher.start()
|
||||||
self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'}
|
self.mock_parse_deps.return_value = {'git_dependencies': 'DEPS'}
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: ''
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.patcher.stop()
|
self.patcher.stop()
|
||||||
@@ -865,3 +863,53 @@ class CheckForCommitObjectsTest(unittest.TestCase):
|
|||||||
results = presubmit_canned_checks.CheckForCommitObjects(
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
self.input_api, self.output_api)
|
self.input_api, self.output_api)
|
||||||
self.assertEqual(0, len(results))
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
|
def testRunFromSubdir_SmallFiles_NoSubmodules(self):
|
||||||
|
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
|
||||||
|
self.input_api.files = [MockAffectedFile('foo.txt', 'content')]
|
||||||
|
self.input_api.subprocess.check_output.return_value = b''
|
||||||
|
|
||||||
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
|
self.input_api, self.output_api)
|
||||||
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
|
def testRunFromSubdir_SmallFiles_WithSubmodules(self):
|
||||||
|
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
|
||||||
|
self.input_api.files = [MockAffectedFile('foo.txt', 'content')]
|
||||||
|
self.input_api.subprocess.check_output.return_value = b'160000 commit 1234\tsubmodule\0'
|
||||||
|
|
||||||
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
|
self.input_api, self.output_api)
|
||||||
|
self.assertEqual(1, len(results))
|
||||||
|
self.assertIn('submodule', results[0].items)
|
||||||
|
|
||||||
|
def testRunFromSubdir_LargeFiles_NoSubmodules(self):
|
||||||
|
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
|
||||||
|
self.input_api.files = [
|
||||||
|
MockAffectedFile(f'f{i}', '') for i in range(1001)
|
||||||
|
]
|
||||||
|
self.input_api.subprocess.check_output.return_value = b''
|
||||||
|
|
||||||
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
|
self.input_api, self.output_api)
|
||||||
|
self.assertEqual(0, len(results))
|
||||||
|
|
||||||
|
def testRunFromSubdir_LargeFiles_WithSubmodules(self):
|
||||||
|
self.input_api.presubmit_local_path = os.path.join(ROOT_DIR, 'subdir')
|
||||||
|
self.input_api.change.RepositoryRoot = lambda: ROOT_DIR
|
||||||
|
self.input_api.files = [
|
||||||
|
MockAffectedFile(f'f{i}', '') for i in range(1001)
|
||||||
|
]
|
||||||
|
self.input_api.subprocess.check_output.return_value = b'160000 commit 1234\tsubmodule\0'
|
||||||
|
|
||||||
|
results = presubmit_canned_checks.CheckForCommitObjects(
|
||||||
|
self.input_api, self.output_api)
|
||||||
|
self.assertEqual(1, len(results))
|
||||||
|
self.assertIn('submodule', results[0].items)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user