From 944b60530e30ff97179b273360c8d3fb4f8cea7a Mon Sep 17 00:00:00 2001 From: dtu Date: Thu, 14 Jul 2016 14:48:21 -0700 Subject: [PATCH] Fix per-file owners check for deleted files. Previously if you deleted a file that you had per-file owners on, it would fail the owners check. This fixes that. Originally, owners.Database used glob to enumerate the directory and added all the matching files in the directory to some dicts holding the owners information. If a CL deleted a file, it'd no longer be on the filesystem, so it wouldn't be in these dicts. There'd be no per-file owners information for it. With this patch, the Database no longer enumerates individual files. It instead keeps track of the glob patterns and checks the CL's files against the patterns at lookup time. BUG=622381 TEST=tests/owners_unittest.py && tests/owners_finder_test.py # Unit test included. Review-Url: https://codereview.chromium.org/2148153002 --- git_cl.py | 5 +- owners.py | 92 +++++++++++++++++++------------------ owners_finder.py | 25 +++------- presubmit_support.py | 2 +- tests/owners_finder_test.py | 7 +-- tests/owners_unittest.py | 39 +++++++++++++--- 6 files changed, 92 insertions(+), 78 deletions(-) diff --git a/git_cl.py b/git_cl.py index 70d7e18c9a..6a59cda52b 100755 --- a/git_cl.py +++ b/git_cl.py @@ -13,7 +13,6 @@ from distutils.version import LooseVersion from multiprocessing.pool import ThreadPool import base64 import collections -import glob import httplib import json import logging @@ -2721,7 +2720,7 @@ class ChangeDescription(object): reviewers.append(name) if add_owners_tbr: owners_db = owners.Database(change.RepositoryRoot(), - fopen=file, os_path=os.path, glob=glob.glob) + fopen=file, os_path=os.path) all_reviewers = set(tbr_names + reviewers) missing_files = owners_db.files_not_covered_by(change.LocalPaths(), all_reviewers) @@ -4841,7 +4840,7 @@ def CMDowners(parser, args): [f.LocalPath() for f in cl.GetChange(base_branch, None).AffectedFiles()], change.RepositoryRoot(), author, - fopen=file, os_path=os.path, glob=glob.glob, + fopen=file, os_path=os.path, disable_color=options.no_color).run() diff --git a/owners.py b/owners.py index 56311e40e6..78c7cca9cb 100644 --- a/owners.py +++ b/owners.py @@ -55,6 +55,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py. """ import collections +import fnmatch import random import re @@ -94,35 +95,32 @@ class Database(object): of changed files, and see if a list of changed files is covered by a list of reviewers.""" - def __init__(self, root, fopen, os_path, glob): + def __init__(self, root, fopen, os_path): """Args: root: the path to the root of the Repository open: function callback to open a text file for reading os_path: module/object callback with fields for 'abspath', 'dirname', 'exists', 'join', and 'relpath' - glob: function callback to list entries in a directory match a glob - (i.e., glob.glob) """ self.root = root self.fopen = fopen self.os_path = os_path - self.glob = glob # Pick a default email regexp to use; callers can override as desired. self.email_regexp = re.compile(BASIC_EMAIL_REGEXP) - # Mapping of owners to the paths they own. - self.owned_by = {EVERYONE: set()} + # Mapping of owners to the paths or globs they own. + self._owners_to_paths = {EVERYONE: set()} # Mapping of paths to authorized owners. - self.owners_for = {} + self._paths_to_owners = {} # Mapping reviewers to the preceding comment per file in the OWNERS files. self.comments = {} # Set of paths that stop us from looking above them for owners. # (This is implicitly true for the root directory). - self.stop_looking = set(['']) + self._stop_looking = set(['']) # Set of files which have already been read. self.read_files = set() @@ -135,6 +133,7 @@ class Database(object): in order avoid suggesting the author as a reviewer for their own changes.""" self._check_paths(files) self.load_data_needed_for(files) + suggested_owners = self._covering_set_of_owners_for(files, author) if EVERYONE in suggested_owners: if len(suggested_owners) > 1: @@ -154,11 +153,7 @@ class Database(object): self._check_reviewers(reviewers) self.load_data_needed_for(files) - covered_objs = self._objs_covered_by(reviewers) - uncovered_files = [f for f in files - if not self._is_obj_covered_by(f, covered_objs)] - - return set(uncovered_files) + return set(f for f in files if not self._is_obj_covered_by(f, reviewers)) def _check_paths(self, files): def _is_under(f, pfx): @@ -171,25 +166,23 @@ class Database(object): _assert_is_collection(reviewers) assert all(self.email_regexp.match(r) for r in reviewers) - def _objs_covered_by(self, reviewers): - objs = self.owned_by[EVERYONE] - for r in reviewers: - objs = objs | self.owned_by.get(r, set()) - return objs - - def _stop_looking(self, objname): - return objname in self.stop_looking - - def _is_obj_covered_by(self, objname, covered_objs): - while not objname in covered_objs and not self._stop_looking(objname): + def _is_obj_covered_by(self, objname, reviewers): + reviewers = list(reviewers) + [EVERYONE] + while True: + for reviewer in reviewers: + for owned_pattern in self._owners_to_paths.get(reviewer, set()): + if fnmatch.fnmatch(objname, owned_pattern): + return True + if self._should_stop_looking(objname): + break objname = self.os_path.dirname(objname) - return objname in covered_objs + return False def _enclosing_dir_with_owners(self, objname): """Returns the innermost enclosing directory that has an OWNERS file.""" dirpath = objname - while not dirpath in self.owners_for: - if self._stop_looking(dirpath): + while not self._owners_for(dirpath): + if self._should_stop_looking(dirpath): break dirpath = self.os_path.dirname(dirpath) return dirpath @@ -197,12 +190,23 @@ class Database(object): def load_data_needed_for(self, files): for f in files: dirpath = self.os_path.dirname(f) - while not dirpath in self.owners_for: + while not self._owners_for(dirpath): self._read_owners(self.os_path.join(dirpath, 'OWNERS')) - if self._stop_looking(dirpath): + if self._should_stop_looking(dirpath): break dirpath = self.os_path.dirname(dirpath) + def _should_stop_looking(self, objname): + return any(fnmatch.fnmatch(objname, stop_looking) + for stop_looking in self._stop_looking) + + def _owners_for(self, objname): + obj_owners = set() + for owned_path, path_owners in self._paths_to_owners.iteritems(): + if fnmatch.fnmatch(objname, owned_path): + obj_owners |= path_owners + return obj_owners + def _read_owners(self, path): owners_path = self.os_path.join(self.root, path) if not self.os_path.exists(owners_path): @@ -231,7 +235,7 @@ class Database(object): in_comment = False if line == 'set noparent': - self.stop_looking.add(dirpath) + self._stop_looking.add(dirpath) continue m = re.match('per-file (.+)=(.+)', line) @@ -243,10 +247,9 @@ class Database(object): raise SyntaxErrorInOwnersFile(owners_path, lineno, 'per-file globs cannot span directories or use escapes: "%s"' % line) - baselines = self.glob(full_glob_string) - for baseline in (self.os_path.relpath(b, self.root) for b in baselines): - self._add_entry(baseline, directive, 'per-file line', - owners_path, lineno, '\n'.join(comment)) + relative_glob_string = self.os_path.relpath(full_glob_string, self.root) + self._add_entry(relative_glob_string, directive, 'per-file line', + owners_path, lineno, '\n'.join(comment)) continue if line.startswith('set '): @@ -259,7 +262,7 @@ class Database(object): def _add_entry(self, path, directive, line_type, owners_path, lineno, comment): if directive == 'set noparent': - self.stop_looking.add(path) + self._stop_looking.add(path) elif directive.startswith('file:'): owners_file = self._resolve_include(directive[5:], owners_path) if not owners_file: @@ -269,19 +272,20 @@ class Database(object): self._read_owners(owners_file) dirpath = self.os_path.dirname(owners_file) - for key in self.owned_by: - if not dirpath in self.owned_by[key]: + for key in self._owners_to_paths: + if not dirpath in self._owners_to_paths[key]: continue - self.owned_by[key].add(path) + self._owners_to_paths[key].add(path) - if dirpath in self.owners_for: - self.owners_for.setdefault(path, set()).update(self.owners_for[dirpath]) + if dirpath in self._paths_to_owners: + self._paths_to_owners.setdefault(path, set()).update( + self._paths_to_owners[dirpath]) elif self.email_regexp.match(directive) or directive == EVERYONE: self.comments.setdefault(directive, {}) self.comments[directive][path] = comment - self.owned_by.setdefault(directive, set()).add(path) - self.owners_for.setdefault(path, set()).add(directive) + self._owners_to_paths.setdefault(directive, set()).add(path) + self._paths_to_owners.setdefault(path, set()).add(directive) else: raise SyntaxErrorInOwnersFile(owners_path, lineno, ('%s is not a "set" directive, file include, "*", ' @@ -321,7 +325,7 @@ class Database(object): dirname = current_dir distance = 1 while True: - for owner in self.owners_for.get(dirname, []): + for owner in self._owners_for(dirname): if author and owner == author: continue all_possible_owners.setdefault(owner, []) @@ -329,7 +333,7 @@ class Database(object): # directory, only count the closest one. if not any(current_dir == el[0] for el in all_possible_owners[owner]): all_possible_owners[owner].append((current_dir, distance)) - if self._stop_looking(dirname): + if self._should_stop_looking(dirname): break dirname = self.os_path.dirname(dirname) distance += 1 diff --git a/owners_finder.py b/owners_finder.py index a0d50304db..baca18eeda 100644 --- a/owners_finder.py +++ b/owners_finder.py @@ -23,7 +23,7 @@ class OwnersFinder(object): indentation = 0 def __init__(self, files, local_root, author, - fopen, os_path, glob, + fopen, os_path, email_postfix='@chromium.org', disable_color=False): self.email_postfix = email_postfix @@ -34,7 +34,7 @@ class OwnersFinder(object): self.COLOR_GREY = '' self.COLOR_RESET = '' - self.db = owners_module.Database(local_root, fopen, os_path, glob) + self.db = owners_module.Database(local_root, fopen, os_path) self.db.load_data_needed_for(files) self.os_path = os_path @@ -43,28 +43,15 @@ class OwnersFinder(object): filtered_files = files - # Eliminate files that author himself can review. - if author: - if author in self.db.owned_by: - for dir_name in self.db.owned_by[author]: - filtered_files = [ - file_name for file_name in filtered_files - if not file_name.startswith(dir_name)] - - filtered_files = list(filtered_files) - - # Eliminate files that everyone can review. - if owners_module.EVERYONE in self.db.owned_by: - for dir_name in self.db.owned_by[owners_module.EVERYONE]: - filtered_files = filter( - lambda file_name: not file_name.startswith(dir_name), - filtered_files) + # Eliminate files that the author can review. + filtered_files = list(self.db.files_not_covered_by( + filtered_files, [author] if author else [])) # If some files are eliminated. if len(filtered_files) != len(files): files = filtered_files # Reload the database. - self.db = owners_module.Database(local_root, fopen, os_path, glob) + self.db = owners_module.Database(local_root, fopen, os_path) self.db.load_data_needed_for(files) self.all_possible_owners = self.db.all_possible_owners(files, None) diff --git a/presubmit_support.py b/presubmit_support.py index 4a388ca2fc..0e85eb0b7a 100755 --- a/presubmit_support.py +++ b/presubmit_support.py @@ -390,7 +390,7 @@ class InputApi(object): # TODO(dpranke): figure out a list of all approved owners for a repo # in order to be able to handle wildcard OWNERS files? self.owners_db = owners.Database(change.RepositoryRoot(), - fopen=file, os_path=self.os_path, glob=self.glob) + fopen=file, os_path=self.os_path) self.verbose = verbose self.Command = CommandData diff --git a/tests/owners_finder_test.py b/tests/owners_finder_test.py index 779361294d..1d9e08366b 100755 --- a/tests/owners_finder_test.py +++ b/tests/owners_finder_test.py @@ -65,11 +65,10 @@ def test_repo(): class OutputInterceptedOwnersFinder(owners_finder.OwnersFinder): def __init__(self, files, local_root, - fopen, os_path, glob, - disable_color=False): + fopen, os_path, disable_color=False): super(OutputInterceptedOwnersFinder, self).__init__( files, local_root, None, - fopen, os_path, glob, disable_color=disable_color) + fopen, os_path, disable_color=disable_color) self.output = [] self.indentation_stack = [] @@ -108,13 +107,11 @@ class _BaseTestCase(unittest.TestCase): self.repo = test_repo() self.root = '/' self.fopen = self.repo.open_for_reading - self.glob = self.repo.glob def ownersFinder(self, files): finder = OutputInterceptedOwnersFinder(files, self.root, fopen=self.fopen, os_path=self.repo, - glob=self.glob, disable_color=True) return finder diff --git a/tests/owners_unittest.py b/tests/owners_unittest.py index 5cc5371e64..0cc6d72979 100755 --- a/tests/owners_unittest.py +++ b/tests/owners_unittest.py @@ -73,14 +73,12 @@ class _BaseTestCase(unittest.TestCase): self.files = self.repo.files self.root = '/' self.fopen = self.repo.open_for_reading - self.glob = self.repo.glob - def db(self, root=None, fopen=None, os_path=None, glob=None): + def db(self, root=None, fopen=None, os_path=None): root = root or self.root fopen = fopen or self.fopen os_path = os_path or self.repo - glob = glob or self.glob - return owners.Database(root, fopen, os_path, glob) + return owners.Database(root, fopen, os_path) class OwnersDatabaseTest(_BaseTestCase): @@ -150,9 +148,15 @@ class OwnersDatabaseTest(_BaseTestCase): ['content/content.gyp', 'content/bar/foo.cc', 'content/baz/froboz.h']) def test_per_file(self): - # brett isn't allowed to approve ugly.cc self.files['/content/baz/OWNERS'] = owners_file(brett, lines=['per-file ugly.*=tom@example.com']) + + # peter isn't allowed to approve ugly.cc + self.assert_files_not_covered_by(['content/baz/ugly.cc'], + [peter], + ['content/baz/ugly.cc']) + + # brett is allowed to approve ugly.cc self.assert_files_not_covered_by(['content/baz/ugly.cc'], [brett], []) @@ -167,14 +171,22 @@ class OwnersDatabaseTest(_BaseTestCase): def test_per_file_with_spaces(self): # This is the same as test_per_file(), except that we include spaces - # on the per-file line. brett isn't allowed to approve ugly.cc; + # on the per-file line. # tom is allowed to approve ugly.cc, but not froboz.h self.files['/content/baz/OWNERS'] = owners_file(brett, lines=['per-file ugly.* = tom@example.com']) + + # peter isn't allowed to approve ugly.cc + self.assert_files_not_covered_by(['content/baz/ugly.cc'], + [peter], + ['content/baz/ugly.cc']) + + # brett is allowed to approve ugly.cc self.assert_files_not_covered_by(['content/baz/ugly.cc'], [brett], []) + # tom is allowed to approve ugly.cc, but not froboz.h self.assert_files_not_covered_by(['content/baz/ugly.cc'], [tom], []) @@ -182,6 +194,21 @@ class OwnersDatabaseTest(_BaseTestCase): [tom], ['content/baz/froboz.h']) + def test_per_file_with_nonexistent_file(self): + self.files['/content/baz/OWNERS'] = owners_file(brett, + lines=['per-file ugly.*=tom@example.com']) + + # peter isn't allowed to approve ugly.nonexistent.cc, but brett and tom are. + self.assert_files_not_covered_by(['content/baz/ugly.nonexistent.cc'], + [peter], + ['content/baz/ugly.nonexistent.cc']) + self.assert_files_not_covered_by(['content/baz/ugly.nonexistent.cc'], + [brett], + []) + self.assert_files_not_covered_by(['content/baz/ugly.nonexistent.cc'], + [tom], + []) + def test_per_file__set_noparent(self): self.files['/content/baz/OWNERS'] = owners_file(brett, lines=['per-file ugly.*=tom@example.com',