mirror of
https://chromium.googlesource.com/chromium/tools/depot_tools.git
synced 2026-01-11 18:51:29 +00:00
The previous CL changed the validation criteria, but did not update the error message we're posting on the bugs we file, which is very confusing. In the future we should try to write some tests to catch things like this. Bug: 452151523 Change-Id: I08272353a3cfada8f242355b48c4f003eec4abf4 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7153383 Auto-Submit: Rachael Newitt <renewitt@google.com> Commit-Queue: Rachael Newitt <renewitt@google.com> Reviewed-by: Jordan Brown <rop@google.com> Commit-Queue: Jordan Brown <rop@google.com>
576 lines
23 KiB
Python
576 lines
23 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright 2023 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
from collections import defaultdict
|
|
import os
|
|
import sys
|
|
import itertools
|
|
from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
|
|
from urllib.parse import urlparse
|
|
|
|
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
|
# The repo's root directory.
|
|
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
|
|
|
|
# Used to identify git clonable domains.
|
|
GIT_DOMAIN_INDICATORS = ["git", "googlesource", "bitbucket", "github", "gitlab"]
|
|
|
|
# Substrings for supported package manager URLs.
|
|
PACKAGE_MANAGER_PATHS = (
|
|
"crates.io/crates/",
|
|
"npmjs.com/package/",
|
|
"developer.android.com/jetpack/androidx/releases/",
|
|
"/maven2/",
|
|
"/artifacts/repository/",
|
|
)
|
|
|
|
|
|
# Add the repo's root directory for clearer imports.
|
|
sys.path.insert(0, _ROOT_DIR)
|
|
|
|
import metadata.fields.field_types as field_types
|
|
import metadata.fields.custom.cpe_prefix as cpe_prefix_util
|
|
import metadata.fields.custom.license as license_util
|
|
import metadata.fields.custom.version as version_util
|
|
import metadata.fields.custom.mitigated as mitigated_util
|
|
import metadata.fields.known as known_fields
|
|
import metadata.fields.util as util
|
|
import metadata.validation_result as vr
|
|
import metadata.fields.custom.license_allowlist as allowlist_util
|
|
|
|
|
|
class DependencyMetadata:
|
|
"""The metadata for a single dependency.
|
|
|
|
See @property declarations below to retrieve validated fields for
|
|
downstream consumption.
|
|
|
|
The property returns `None` if the provided value (e.g. in
|
|
README.chromium file) is clearly invalid.
|
|
|
|
Otherwise, it returns a suitably typed value (see comments on each
|
|
property).
|
|
|
|
To retrieve unvalidated (i.e. raw values) fields, use get_entries().
|
|
"""
|
|
|
|
# Fields that are always required.
|
|
_MANDATORY_FIELDS = {
|
|
known_fields.NAME,
|
|
known_fields.URL,
|
|
known_fields.VERSION,
|
|
known_fields.LICENSE,
|
|
known_fields.SECURITY_CRITICAL,
|
|
known_fields.SHIPPED,
|
|
}
|
|
|
|
# Aliases for fields, where:
|
|
# * key is the alias field; and
|
|
# * value is the main field to which it should be mapped.
|
|
# Note: if both the alias and main fields are specified in metadata,
|
|
# the value from the alias field will be used.
|
|
_FIELD_ALIASES = {
|
|
known_fields.SHIPPED_IN_CHROMIUM: known_fields.SHIPPED,
|
|
}
|
|
|
|
def __init__(self):
|
|
# The record of all entries added, including repeated fields.
|
|
self._entries: List[Tuple[str, str]] = []
|
|
|
|
# The current value of each field.
|
|
self._metadata: Dict[field_types.MetadataField, str] = {}
|
|
|
|
# The line numbers of each metadata fields.
|
|
self._metadata_line_numbers: Dict[field_types.MetadataField,
|
|
Set[int]] = defaultdict(lambda: set())
|
|
|
|
# The line numbers of the first and the last line (in the text file)
|
|
# of this dependency metadata.
|
|
self._first_line = float('inf')
|
|
self._last_line = -1
|
|
|
|
# The record of how many times a field entry was added.
|
|
self._occurrences: Dict[field_types.MetadataField,
|
|
int] = defaultdict(int)
|
|
|
|
def add_entry(self, field_name: str, field_value: str):
|
|
value = field_value.strip()
|
|
self._entries.append((field_name, value))
|
|
|
|
field = known_fields.get_field(field_name)
|
|
if field:
|
|
self._metadata[field] = value
|
|
self._occurrences[field] += 1
|
|
|
|
def has_entries(self) -> bool:
|
|
return len(self._entries) > 0
|
|
|
|
def get_entries(self) -> List[Tuple[str, str]]:
|
|
return list(self._entries)
|
|
|
|
def record_line(self, line_number):
|
|
"""Records `line_number` to be part of this metadata."""
|
|
self._first_line = min(self._first_line, line_number)
|
|
self._last_line = max(self._last_line, line_number)
|
|
|
|
def record_field_line_number(self, field: field_types.MetadataField,
|
|
line_number: int):
|
|
self._metadata_line_numbers[field].add(line_number)
|
|
|
|
def get_first_and_last_line_number(self) -> Tuple[int, int]:
|
|
return (self._first_line, self._last_line)
|
|
|
|
def get_field_line_numbers(self,
|
|
field: field_types.MetadataField) -> List[int]:
|
|
return sorted(self._metadata_line_numbers[field])
|
|
|
|
def _assess_required_fields(self, is_open_source_project: bool = False) -> Set[field_types.MetadataField]:
|
|
"""Returns the set of required fields, based on the current
|
|
metadata.
|
|
"""
|
|
required = set(self._MANDATORY_FIELDS)
|
|
|
|
# Assume the dependency is shipped if not specified.
|
|
shipped_value = self._metadata.get(known_fields.SHIPPED)
|
|
is_shipped = (shipped_value is None
|
|
or util.infer_as_boolean(shipped_value, default=True))
|
|
|
|
if is_shipped:
|
|
# A license file is required if the dependency is shipped.
|
|
required.add(known_fields.LICENSE_FILE)
|
|
|
|
# License compatibility with Android must be set if the
|
|
# package is shipped and the license is not in the
|
|
# allowlist.
|
|
license_value = self._metadata.get(known_fields.LICENSE)
|
|
if not license_value or not known_fields.LICENSE.all_licenses_allowed(
|
|
license_value, is_open_source_project):
|
|
required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
|
|
|
|
return required
|
|
|
|
def validate(self, source_file_dir: str,
|
|
repo_root_dir: str,
|
|
is_open_source_project: bool = False) -> List[vr.ValidationResult]:
|
|
"""Validates all the metadata.
|
|
|
|
Args:
|
|
source_file_dir: the directory of the file that the metadata
|
|
is from.
|
|
repo_root_dir: the repository's root directory.
|
|
is_open_source_project: whether the project is open source.
|
|
|
|
Returns: the metadata's validation results.
|
|
"""
|
|
results = []
|
|
|
|
# Check for duplicate fields.
|
|
repeated_fields = [
|
|
field for field, count in self._occurrences.items() if count > 1
|
|
]
|
|
if repeated_fields:
|
|
repeated = ", ".join([
|
|
f"{field.get_name()} ({self._occurrences[field]})"
|
|
for field in repeated_fields
|
|
])
|
|
error = vr.ValidationError(reason="There is a repeated field.",
|
|
additional=[
|
|
f"Repeated fields: {repeated}",
|
|
])
|
|
# Merge line numbers.
|
|
lines = sorted(
|
|
set(
|
|
itertools.chain.from_iterable([
|
|
self.get_field_line_numbers(field)
|
|
for field in repeated_fields
|
|
])))
|
|
error.set_lines(lines)
|
|
results.append(error)
|
|
|
|
# Process alias fields.
|
|
sources = {}
|
|
for alias_field, main_field in self._FIELD_ALIASES.items():
|
|
if alias_field in self._metadata:
|
|
# Validate the value that was present for the main field
|
|
# before overwriting it with the alias field value.
|
|
if main_field in self._metadata:
|
|
main_value = self._metadata.get(main_field)
|
|
field_result = main_field.validate(main_value)
|
|
if field_result:
|
|
field_result.set_tag(tag="field",
|
|
value=main_field.get_name())
|
|
field_result.set_lines(
|
|
self.get_field_line_numbers(main_field))
|
|
results.append(field_result)
|
|
|
|
self._metadata[main_field] = self._metadata[alias_field]
|
|
sources[main_field] = alias_field
|
|
self._metadata.pop(alias_field)
|
|
|
|
# Validate values for all present fields.
|
|
for field, value in self._metadata.items():
|
|
source_field = sources.get(field) or field
|
|
field_result = source_field.validate(value)
|
|
if field_result:
|
|
field_result.set_tag(tag="field", value=source_field.get_name())
|
|
field_result.set_lines(
|
|
self.get_field_line_numbers(source_field))
|
|
results.append(field_result)
|
|
|
|
# Check required fields are present.
|
|
required_fields = self._assess_required_fields(is_open_source_project=is_open_source_project)
|
|
for field in required_fields:
|
|
if field not in self._metadata:
|
|
field_name = field.get_name()
|
|
error = vr.ValidationError(
|
|
reason=f"Required field '{field_name}' is missing.")
|
|
results.append(error)
|
|
|
|
# If CPEPrefix is provided without a version, the Version field must be
|
|
# present.
|
|
if self._cpe_prefix_lacks_version():
|
|
error = vr.ValidationWarning(
|
|
reason="CPEPrefix is missing a version, and no Version is "
|
|
"specified.",
|
|
additional=[
|
|
"When the 'Version' field is not provided, the 'CPEPrefix' "
|
|
"must include a version component."
|
|
])
|
|
error.set_lines(self.get_field_line_numbers(known_fields.CPE_PREFIX))
|
|
results.append(error)
|
|
|
|
# If the repository is hosted somewhere (i.e. Chromium isn't the
|
|
# canonical repositroy of the dependency), at least one of the fields
|
|
# Version, Date or Revision must be provided, unless it is canonical or internal.
|
|
if not (self.is_canonical or self.is_internal) and not (
|
|
self.version or self.date or self.revision
|
|
or self.revision_in_deps):
|
|
versioning_fields = [
|
|
known_fields.VERSION, known_fields.DATE, known_fields.REVISION
|
|
]
|
|
names = util.quoted(
|
|
[field.get_name() for field in versioning_fields])
|
|
error = vr.ValidationError(
|
|
reason="Versioning fields are insufficient.",
|
|
additional=[f"Provide at least one of [{names}]."],
|
|
)
|
|
results.append(error)
|
|
|
|
# Check existence of the license file(s) on disk.
|
|
license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
|
|
if license_file_value is not None:
|
|
result = known_fields.LICENSE_FILE.validate_on_disk(
|
|
value=license_file_value,
|
|
source_file_dir=source_file_dir,
|
|
repo_root_dir=repo_root_dir,
|
|
)
|
|
if result:
|
|
result.set_tag(tag="field",
|
|
value=known_fields.LICENSE_FILE.get_name())
|
|
result.set_lines(
|
|
self.get_field_line_numbers(known_fields.LICENSE_FILE))
|
|
results.append(result)
|
|
|
|
|
|
if not is_open_source_project:
|
|
license_value = self._metadata.get(known_fields.LICENSE)
|
|
if license_value is not None:
|
|
not_allowed_licenses = known_fields.LICENSE.filter_open_source_project_only_licenses(
|
|
license_value)
|
|
if len(not_allowed_licenses) > 0:
|
|
license_result = vr.ValidationWarning(
|
|
reason=f"License has a license not in the allowlist."
|
|
" (see https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:metadata/fields/custom/license_allowlist.py).",
|
|
additional=[
|
|
f"The following license{'s are' if len(not_allowed_licenses) > 1 else ' is'} only allowed in open source projects: "
|
|
f"{util.quoted(not_allowed_licenses)}.",
|
|
])
|
|
|
|
license_result.set_tag(tag="field", value=known_fields.LICENSE.get_name())
|
|
license_result.set_lines(
|
|
self.get_field_line_numbers(known_fields.LICENSE))
|
|
results.append(license_result)
|
|
|
|
# Match values reported in the 'Mitigated:' field with the supplementry
|
|
# fields e.g. 'CVE-2024-12345: description'.
|
|
mitigated_values = self._return_as_property(known_fields.MITIGATED)
|
|
mitigated_ids = set()
|
|
if mitigated_values is not None:
|
|
mitigated_ids = set(mitigated_values)
|
|
# Reported as their own field e.g. 'CVE-2024-12345: description'.
|
|
mitigated_entries = set(self._mitigations_from_entries().keys())
|
|
|
|
missing_descriptions = mitigated_ids - mitigated_entries
|
|
if missing_descriptions:
|
|
results.append(
|
|
vr.ValidationWarning(
|
|
reason="Missing descriptions for vulnerability IDs",
|
|
additional=[
|
|
f"Add descriptions for: {util.quoted(missing_descriptions)}"
|
|
]))
|
|
|
|
extra_descriptions = mitigated_entries - mitigated_ids
|
|
if extra_descriptions:
|
|
results.append(
|
|
vr.ValidationWarning(
|
|
reason="Found descriptions for unlisted vulnerability IDs",
|
|
additional=[
|
|
f"List these IDs in the 'Mitigated:' field: {util.quoted(extra_descriptions)}"
|
|
]))
|
|
|
|
# Begin by only warning for a small subset of cases.
|
|
# TODO(b/438384123): Expand this to all cases.
|
|
if (self.security_critical
|
|
and self.shipped
|
|
and self.vuln_scan_sufficiency == "insufficient"):
|
|
# TODO(b/448003595): Provide a pre-populated bug link for when people
|
|
# think this is incorrect.
|
|
results.append(
|
|
vr.ValidationWarning(
|
|
reason=
|
|
"Dependency metadata is insufficient for vulnerability scanning.",
|
|
additional=[
|
|
"Please provide one of the following combinations:",
|
|
"- 'CPEPrefix' with a version.",
|
|
"- A git clonable 'URL' and a 'Revision'.",
|
|
"- A package manager 'URL' and a 'Version'. ",
|
|
]))
|
|
return results
|
|
|
|
def _cpe_prefix_lacks_version(self) -> List[vr.ValidationResult]:
|
|
"""Validates that if CPEPrefix is provided without a version, the
|
|
Version field must be present."""
|
|
cpe_prefix = self._metadata.get(known_fields.CPE_PREFIX)
|
|
version = self._metadata.get(known_fields.VERSION)
|
|
cpe_provided = cpe_prefix and not util.is_unknown(cpe_prefix)
|
|
version_is_valid = version and not util.is_not_applicable(version)
|
|
cpe_has_version = cpe_prefix and cpe_prefix_util.has_version_component(
|
|
cpe_prefix)
|
|
return cpe_provided and not (version_is_valid or cpe_has_version)
|
|
|
|
def _mitigations_from_entries(self) -> Dict[str, str]:
|
|
result = {}
|
|
for key, value in self._entries:
|
|
if mitigated_util.PATTERN_VULN_ID_WITH_ANCHORS.match(key):
|
|
result[key] = value.strip()
|
|
return result
|
|
|
|
def _return_as_property(self, field: field_types.MetadataField) -> Any:
|
|
"""Helper function to create a property for DependencyMetadata.
|
|
|
|
The property accessor will validate and return sanitized field value.
|
|
"""
|
|
assert field in known_fields.ALL_FIELDS
|
|
|
|
raw_value = self._metadata.get(field, None)
|
|
if raw_value is None:
|
|
# Field is not set.
|
|
return None
|
|
|
|
return field.narrow_type(raw_value)
|
|
|
|
@property
|
|
def name(self) -> Optional[str]:
|
|
return self._return_as_property(known_fields.NAME)
|
|
|
|
@property
|
|
def mitigations(self) -> Dict[str, str]:
|
|
"""Returns mapping of vulnerability IDs to their descriptions."""
|
|
result = self._mitigations_from_entries()
|
|
mitigated_values = self._return_as_property(known_fields.MITIGATED) or []
|
|
# Add entries listed in Mitigated field but without a supplement
|
|
# mitigation description line.
|
|
for id in mitigated_values:
|
|
if id not in result:
|
|
result[id] = ""
|
|
return result
|
|
|
|
@property
|
|
def short_name(self) -> Optional[str]:
|
|
return self._return_as_property(known_fields.SHORT_NAME)
|
|
|
|
@property
|
|
def url(self) -> Optional[List[str]]:
|
|
"""
|
|
Returns a list of URLs that points to upstream repo.
|
|
The URLs are guaranteed to `urllib.parse.urlparse` without errors.
|
|
|
|
Returns None if this repository is the canonical repository of this
|
|
dependency (see is_canonical below).
|
|
"""
|
|
return self._return_as_property(known_fields.URL)
|
|
|
|
@property
|
|
def is_canonical(self) -> bool:
|
|
"""
|
|
Returns whether this repository is the canonical public repository of this dependency.
|
|
|
|
This is derived from a special value in the URL field.
|
|
"""
|
|
value = self._metadata.get(known_fields.URL, "")
|
|
return known_fields.URL.repo_is_canonical(value)
|
|
|
|
@property
|
|
def is_internal(self) -> bool:
|
|
"""
|
|
Returns whether this repository is internal to google/chromium.
|
|
|
|
This is derived from a special value in the URL field.
|
|
"""
|
|
value = self._metadata.get(known_fields.URL, "")
|
|
return known_fields.URL.repo_is_internal(value)
|
|
|
|
@property
|
|
def version(self) -> Optional[str]:
|
|
return self._return_as_property(known_fields.VERSION)
|
|
|
|
@property
|
|
def date(self) -> Optional[str]:
|
|
"""Returns in "YYYY-MM-DD" format."""
|
|
return self._return_as_property(known_fields.DATE)
|
|
|
|
@property
|
|
def revision(self) -> Optional[str]:
|
|
return self._return_as_property(known_fields.REVISION)
|
|
|
|
@property
|
|
def revision_in_deps(self) -> bool:
|
|
value = self._metadata.get(known_fields.REVISION, "")
|
|
return known_fields.REVISION.is_revision_in_deps(value)
|
|
|
|
@property
|
|
def license(self) -> Optional[List[str]]:
|
|
"""Returns a list of license names."""
|
|
return self._return_as_property(known_fields.LICENSE)
|
|
|
|
@property
|
|
def license_file(self) -> Optional[List[str]]:
|
|
# TODO(b/321154076): Consider excluding files that doesn't exist on
|
|
# disk if it's not too hard.
|
|
#
|
|
# Plumbing src_root and dependency_dir into field validator is
|
|
# required.
|
|
return self._return_as_property(known_fields.LICENSE_FILE)
|
|
|
|
@property
|
|
def security_critical(self) -> Optional[bool]:
|
|
return self._return_as_property(known_fields.SECURITY_CRITICAL)
|
|
|
|
@property
|
|
def shipped(self) -> Optional[bool]:
|
|
return self._return_as_property(known_fields.SHIPPED)
|
|
|
|
@property
|
|
def shipped_in_chromium(self) -> Optional[bool]:
|
|
return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
|
|
|
|
@property
|
|
def license_android_compatible(self) -> Optional[bool]:
|
|
return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
|
|
|
|
@property
|
|
def cpe_prefix(self) -> Optional[str]:
|
|
"""Returns a lowercase string (CPE names are case-insensitive)."""
|
|
return self._return_as_property(known_fields.CPE_PREFIX)
|
|
|
|
@property
|
|
def description(self) -> Optional[str]:
|
|
return self._return_as_property(known_fields.DESCRIPTION)
|
|
|
|
@property
|
|
def local_modifications(self) -> Optional[Union[Literal[False], str]]:
|
|
"""Returns `False` if there's no local modifications.
|
|
Otherwise the text content extracted from the metadata.
|
|
"""
|
|
return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)
|
|
|
|
@property
|
|
def update_mechanism(
|
|
self) -> Optional[Tuple[str, Optional[str], Optional[str]]]:
|
|
"""
|
|
Returns the parsed Update Mechanism value.
|
|
|
|
The format is `Primary[.Secondary] [(bug_link)]. This function returns
|
|
(Primary, Secondary, bug_link) if the field is valid, otherwise (None, None, None).
|
|
"""
|
|
return self._return_as_property(known_fields.UPDATE_MECHANISM)
|
|
|
|
@property
|
|
def url_is_git_clonable(self) -> bool:
|
|
"""
|
|
Checks if any of the provided URLs appear to be a clonable Git repository.
|
|
|
|
This is determined by checking for:
|
|
- The 'git://' protocol.
|
|
- A path ending in '.git'.
|
|
- subdomain matching. See GIT_DOMAIN_INDICATORS for the full list.
|
|
"""
|
|
for u in self.url:
|
|
if not u:
|
|
continue
|
|
parsed = urlparse(u)
|
|
if parsed.scheme == "git" or parsed.path.endswith(".git"):
|
|
return True
|
|
if parsed.netloc:
|
|
domain_parts = parsed.netloc.split(".")
|
|
if any(gi in domain_parts for gi in GIT_DOMAIN_INDICATORS):
|
|
return True
|
|
return False
|
|
|
|
@property
|
|
def url_is_package_manager(self) -> bool:
|
|
"""
|
|
Checks if any URL contains a known package manager path substring. See PACKAGE_MANAGER_PATHS for the supported list.
|
|
"""
|
|
for u in self.url:
|
|
if not u:
|
|
continue
|
|
for p in PACKAGE_MANAGER_PATHS:
|
|
if p in u and u.split(p)[-1]:
|
|
return True
|
|
return False
|
|
|
|
|
|
@property
|
|
def vuln_scan_sufficiency(self) -> str:
|
|
"""Determines if the dependency metadata is sufficient for vulnerability scanning.
|
|
|
|
Returns:
|
|
A string indicating the sufficiency status:
|
|
- 'sufficient:CPE' if a CPE prefix is provided and a version is included in the README.
|
|
- 'sufficient:URL and Revision' if URL is a git url and a Revision is provided.
|
|
- 'sufficient:URL and Revision[DEPS]' as above, but 'Revision:DEPS'.
|
|
- 'sufficient:Package Manager URL and Version' if a package manager URL and a Version are provided.
|
|
- 'ignore:Canonical' if the dependency is the canonical repository.
|
|
- 'ignore:Internal' if the dependency is internal.
|
|
- 'ignore:Static' if the dependency's update mechanism is static.
|
|
- 'ignore:GoogleManaged' if the dependency's update mechanism ends in .GoogleManaged.
|
|
- 'insufficient' otherwise.
|
|
"""
|
|
|
|
if self.cpe_prefix and not self._cpe_prefix_lacks_version():
|
|
return "sufficient:CPE"
|
|
if self.url:
|
|
if self.revision and self.url_is_git_clonable:
|
|
return "sufficient:URL and Revision"
|
|
if self.revision_in_deps:
|
|
return "sufficient:URL and Revision[DEPS]"
|
|
if self.version:
|
|
if self.url_is_package_manager:
|
|
return "sufficient:Package Manager URL and Version"
|
|
|
|
if self.is_canonical:
|
|
return "ignore:Canonical"
|
|
if self.is_internal:
|
|
return "ignore:Internal"
|
|
if self.update_mechanism and self.update_mechanism[0]:
|
|
if self.update_mechanism[0].lower() == "static":
|
|
return "ignore:Static"
|
|
if (self.update_mechanism[1]
|
|
and self.update_mechanism[1].lower() == "googlemanaged"):
|
|
return "ignore:GoogleManaged"
|
|
|
|
return "insufficient"
|