git_cl.py: Process comments as threads

Refactor GetCommentsSummary to group individual comments into threads. A
thread is considered unresolved if and only if its last comment is
unresolved.

This fixes a bug where a resolved thread (one that had been replied to
and the resolved box checked in the reply) would still appear with the
--unresolved flag, because the initial comment in the thread is
permanently marked as unresolved.

This change also introduces a CommentInfo dataclass to improve
ergonomics for handling comment data from the Gerrit API.

The display of comments have not been made thread-aware to reduce the
scope of this change.

R=agrieve@chromium.org

Bug: 463411949
Fixed: 463411949
Change-Id: I966f011140ee3874b54d8afbbe00caed7bbe14eb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7200823
Reviewed-by: Yiwei Zhang <yiwzhang@google.com>
Auto-Submit: Peter Wen <wnwen@chromium.org>
Commit-Queue: Peter Wen <wnwen@chromium.org>
This commit is contained in:
Peter Wen
2025-12-03 06:16:04 -08:00
committed by LUCI CQ
parent f3aeb370c2
commit 04f3706a63
2 changed files with 176 additions and 15 deletions

115
git_cl.py
View File

@@ -28,6 +28,7 @@ import sys
import tempfile
import textwrap
import time
import dataclasses
import typing
import urllib.error
import urllib.parse
@@ -830,6 +831,85 @@ def _prepare_superproject_push_option() -> str | None:
return f'custom-keyed-value=rootRepo:{host}/{project}@{rev}'
@dataclasses.dataclass
class CommentInfo:
# Fields from REST API, see https://gerrit-review.googlesource.com/Documentation/rest-api-changes.html#list-change-comments.
patch_set: Optional[int] = None
id: str = ''
path: Optional[str] = None
side: str = 'REVISION'
parent: Optional[int] = None
line: int = 0
range: Optional[dict] = None
in_reply_to: Optional[str] = None
message: str = ''
updated: str = ''
author: Optional[dict] = None
tag: str = ''
unresolved: bool = False
change_message_id: Optional[str] = None
commit_id: Optional[str] = None
context_lines: Optional[List[Any]] = None
source_content_type: Optional[str] = None
fix_suggestions: Optional[List[Any]] = None
@classmethod
def parse_from(cls, comment: dict):
# Filter out keys that are not fields of the dataclass to prevent TypeErrors
# if the API returns extra fields.
field_names = {f.name for f in dataclasses.fields(cls)}
filtered_comment = {
k: v
for k, v in comment.items() if k in field_names
}
return cls(**filtered_comment)
def _merge_comments_into_threads(file_comments) -> list[list[CommentInfo]]:
# We construct this reverse mapping to make it easier to find all the
# comments replying to a particular comment. Multiple comments can reply to
# the same comment, so this can become a tree-like structure.
reply_id_to_comments: dict[str, list[CommentInfo]] = (
collections.defaultdict(list))
threads: dict[str, list[CommentInfo]] = {}
num_comments = 0
for path, line_comments in file_comments.items():
for line_comment in line_comments:
num_comments += 1
# When comments are returned as a mapping, path is missing.
line_comment['path'] = path
comment = CommentInfo.parse_from(line_comment)
if not comment.in_reply_to:
# This is a root comment, start a new thread.
threads[comment.id] = [comment]
else:
reply_id_to_comments[comment.in_reply_to].append(comment)
# Walk through the thread until there are no more comments.
for thread_id, thread in threads.items():
comment_ids: set[str] = set([thread_id])
while comment_ids:
next_comment_ids = set()
for comment_id in comment_ids:
if next_comments := reply_id_to_comments.get(comment_id):
next_comment_ids.update(next_comment.id
for next_comment in next_comments)
thread.extend(next_comments)
comment_ids = next_comment_ids
# Since multiple comments can reply to the same comments creating a tree
# of comments, we need a consistent and reliable way to sort the comment
# thread. The timestamp when the comment was written can be sorted as a
# string (e.g. 2025-11-24 20:43:32.000000000).
thread.sort(key=lambda c: c.updated)
if sum(len(thread) for thread in threads.values()) != num_comments:
logging.warning(
"Some comments were missed when merging comments into threads.")
return list(threads.values())
def print_stats(args):
"""Prints statistics about the change to the user."""
# --no-ext-diff is broken in some versions of Git, so try to work around
@@ -2613,6 +2693,8 @@ class Changelist(object):
file_comments = gerrit_util.GetChangeComments(
self.GetGerritHost(), self._GerritChangeIdentifier())
threads = _merge_comments_into_threads(file_comments)
# Build dictionary of file comments for easy access and sorting later.
# {author+date: {path: {patchset: {line: url+message}}}}
comments = collections.defaultdict(lambda: collections.defaultdict(
@@ -2626,26 +2708,29 @@ class Changelist(object):
else:
url_prefix = '%s/c/%s' % (server, self.GetIssue())
for path, line_comments in file_comments.items():
for comment in line_comments:
unresolved = comment.get('unresolved', False)
if unresolved_only and not unresolved:
for thread in threads:
# A thread is considered unresolved if and only if its last comment
# is unresolved.
unresolved = thread[-1].unresolved
if unresolved_only and not unresolved:
continue
for comment in thread:
if comment.tag.startswith('autogenerated'):
continue
tag = comment.get('tag', '')
if tag.startswith('autogenerated'):
continue
key = (comment['author']['email'], comment['updated'])
if comment.get('side', 'REVISION') == 'PARENT':
assert comment.author is not None
key = (comment.author['email'], comment.updated)
if comment.side == 'PARENT':
patchset = 'Base'
else:
patchset = 'PS%d' % comment['patch_set']
line = comment.get('line', 0)
assert comment.patch_set is not None
patchset = 'PS%d' % comment.patch_set
line = comment.line if comment.line is not None else 0
url = ('%s/%s/%s#%s%s' %
(url_prefix, comment['patch_set'],
path, 'b' if comment.get('side') == 'PARENT' else '',
(url_prefix, comment.patch_set,
comment.path, 'b' if comment.side == 'PARENT' else '',
str(line) if line else ''))
comments[key][path][patchset][line] = (unresolved, url,
comment['message'])
comments[key][comment.path][patchset][line] = (unresolved, url,
comment.message)
summaries = []
for msg in messages: