mirror of
https://chromium.googlesource.com/chromium/tools/depot_tools.git
synced 2026-01-11 18:51:29 +00:00
[git cl split] Add trie structure for clustering
This CL defines a trie-based datastructure for representing files based on their path. It directly mirrors the structure of a file system, keeping track of directories and the files inside them. It also stores some information about OWNERS files, for use during clustering (we won't cluster files together if there's a "break" in ownership due to `set noparent). Optionally, the ownership information can be overridden; this will be done via a command-line flag when the algorithm is fully implemented. Bug: 335797528 Change-Id: I5dcdf36695a1da5714ec021e5e18b6c36855a4f1 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/6321290 Reviewed-by: Josip Sokcevic <sokcevic@chromium.org> Commit-Queue: Devon Loehr <dloehr@google.com>
This commit is contained in:
91
split_cl.py
91
split_cl.py
@@ -564,6 +564,10 @@ def SelectReviewersForFiles(cl, author, files, max_depth):
|
||||
return info_split_by_reviewers
|
||||
|
||||
|
||||
################################################################################
|
||||
# Code for saving, editing, and loading splittings.
|
||||
################################################################################
|
||||
|
||||
def SaveSplittingToFile(cl_infos: List[CLInfo], filename: str, silent=False):
|
||||
"""
|
||||
Writes the listed CLs to the designated file, in a human-readable and
|
||||
@@ -773,3 +777,90 @@ def EditSplittingInteractively(
|
||||
SaveSplittingToFile(cl_infos, tmp_file)
|
||||
ValidateSplitting(cl_infos, "the provided splitting", files_on_disk)
|
||||
return cl_infos, tmp_file
|
||||
|
||||
|
||||
################################################################################
|
||||
# Code for the clustering-based splitting algorithm.
|
||||
################################################################################
|
||||
|
||||
### Trie Code
|
||||
|
||||
|
||||
def FolderHasParent(path: str) -> bool:
|
||||
"""
|
||||
Check if a folder inherits owners from a higher-level directory:
|
||||
i.e. it's not at top level, and doesn't have an OWNERS file that contains
|
||||
`set noparent`
|
||||
"""
|
||||
# Treat each top-leve directory as having no parent, as well as the root
|
||||
# directory.
|
||||
if len(path.split(os.path.sep)) <= 1:
|
||||
# Top level
|
||||
return False
|
||||
|
||||
owners_file = os.path.join(path, 'OWNERS')
|
||||
if (os.path.isfile(owners_file)):
|
||||
with (open(owners_file)) as f:
|
||||
for line in f.readlines():
|
||||
|
||||
# Strip whitespace and comments
|
||||
line = line.split('#')[0].strip()
|
||||
|
||||
if (line == 'set noparent'):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class DirectoryTrie():
|
||||
"""
|
||||
Trie structure: Nested dictionaries representing file paths.
|
||||
Each level represents one folder, and contains:
|
||||
- The path to that folder (its prefix)
|
||||
- A list of files that reside in that folder
|
||||
- A boolean for whether that folder inherits owners from a parent folder
|
||||
- One Trie representing each of that folder's subdirectories
|
||||
|
||||
Files are stored with their entire path, so we don't need to reconstruct
|
||||
it every time we read them.
|
||||
"""
|
||||
|
||||
def __init__(self, expect_owners_override, prefix: str = ""):
|
||||
""" Create an empty DirectoryTrie with the specified prefix """
|
||||
has_parent = expect_owners_override or FolderHasParent(prefix)
|
||||
# yapf: disable
|
||||
self.subdirectories : Dict[str, DirectoryTrie] = {}
|
||||
self.files : List[str] = []
|
||||
self.prefix : str = prefix
|
||||
self.has_parent : bool = has_parent
|
||||
self.expect_owners_override : bool = expect_owners_override
|
||||
# yapf: enable
|
||||
|
||||
def AddFile(self, path: List[str]):
|
||||
"""
|
||||
Add a file to the Trie, adding new subdirectories if necessary.
|
||||
The file should be represented as a list of directories, with the final
|
||||
entry being the filename.
|
||||
"""
|
||||
if len(path) == 1:
|
||||
self.files.append(os.path.join(self.prefix, path[0]))
|
||||
else:
|
||||
directory = path[0]
|
||||
if directory not in self.subdirectories:
|
||||
prefix = os.path.join(self.prefix, directory)
|
||||
self.subdirectories[directory] = DirectoryTrie(
|
||||
self.expect_owners_override, prefix)
|
||||
self.subdirectories[directory].AddFile(path[1:])
|
||||
|
||||
def AddFiles(self, paths: List[List[str]]):
|
||||
""" Convenience function to add many files at once. """
|
||||
for path in paths:
|
||||
self.AddFile(path)
|
||||
|
||||
def ToList(self) -> List[str]:
|
||||
""" Return a list of all files in the trie. """
|
||||
files = []
|
||||
files += self.files
|
||||
for subdir in self.subdirectories.values():
|
||||
files += subdir.ToList()
|
||||
return files
|
||||
|
||||
Reference in New Issue
Block a user