Files
chromium_depot_tools/metadata/dependency_metadata.py
Rachael Newitt 11b0e6b805 Update the insuffient vuln scanning warning.
The previous CL changed the validation criteria, but did not update
the error message we're posting on the bugs we file, which is very
confusing. In the future we should try to write some tests to catch
things like this.

Bug: 452151523
Change-Id: I08272353a3cfada8f242355b48c4f003eec4abf4
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/7153383
Auto-Submit: Rachael Newitt <renewitt@google.com>
Commit-Queue: Rachael Newitt <renewitt@google.com>
Reviewed-by: Jordan Brown <rop@google.com>
Commit-Queue: Jordan Brown <rop@google.com>
2025-11-13 18:15:02 -08:00

576 lines
23 KiB
Python

#!/usr/bin/env python3
# Copyright 2023 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
from collections import defaultdict
import os
import sys
import itertools
from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
from urllib.parse import urlparse
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
# The repo's root directory.
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, ".."))
# Used to identify git clonable domains.
GIT_DOMAIN_INDICATORS = ["git", "googlesource", "bitbucket", "github", "gitlab"]
# Substrings for supported package manager URLs.
PACKAGE_MANAGER_PATHS = (
"crates.io/crates/",
"npmjs.com/package/",
"developer.android.com/jetpack/androidx/releases/",
"/maven2/",
"/artifacts/repository/",
)
# Add the repo's root directory for clearer imports.
sys.path.insert(0, _ROOT_DIR)
import metadata.fields.field_types as field_types
import metadata.fields.custom.cpe_prefix as cpe_prefix_util
import metadata.fields.custom.license as license_util
import metadata.fields.custom.version as version_util
import metadata.fields.custom.mitigated as mitigated_util
import metadata.fields.known as known_fields
import metadata.fields.util as util
import metadata.validation_result as vr
import metadata.fields.custom.license_allowlist as allowlist_util
class DependencyMetadata:
"""The metadata for a single dependency.
See @property declarations below to retrieve validated fields for
downstream consumption.
The property returns `None` if the provided value (e.g. in
README.chromium file) is clearly invalid.
Otherwise, it returns a suitably typed value (see comments on each
property).
To retrieve unvalidated (i.e. raw values) fields, use get_entries().
"""
# Fields that are always required.
_MANDATORY_FIELDS = {
known_fields.NAME,
known_fields.URL,
known_fields.VERSION,
known_fields.LICENSE,
known_fields.SECURITY_CRITICAL,
known_fields.SHIPPED,
}
# Aliases for fields, where:
# * key is the alias field; and
# * value is the main field to which it should be mapped.
# Note: if both the alias and main fields are specified in metadata,
# the value from the alias field will be used.
_FIELD_ALIASES = {
known_fields.SHIPPED_IN_CHROMIUM: known_fields.SHIPPED,
}
def __init__(self):
# The record of all entries added, including repeated fields.
self._entries: List[Tuple[str, str]] = []
# The current value of each field.
self._metadata: Dict[field_types.MetadataField, str] = {}
# The line numbers of each metadata fields.
self._metadata_line_numbers: Dict[field_types.MetadataField,
Set[int]] = defaultdict(lambda: set())
# The line numbers of the first and the last line (in the text file)
# of this dependency metadata.
self._first_line = float('inf')
self._last_line = -1
# The record of how many times a field entry was added.
self._occurrences: Dict[field_types.MetadataField,
int] = defaultdict(int)
def add_entry(self, field_name: str, field_value: str):
value = field_value.strip()
self._entries.append((field_name, value))
field = known_fields.get_field(field_name)
if field:
self._metadata[field] = value
self._occurrences[field] += 1
def has_entries(self) -> bool:
return len(self._entries) > 0
def get_entries(self) -> List[Tuple[str, str]]:
return list(self._entries)
def record_line(self, line_number):
"""Records `line_number` to be part of this metadata."""
self._first_line = min(self._first_line, line_number)
self._last_line = max(self._last_line, line_number)
def record_field_line_number(self, field: field_types.MetadataField,
line_number: int):
self._metadata_line_numbers[field].add(line_number)
def get_first_and_last_line_number(self) -> Tuple[int, int]:
return (self._first_line, self._last_line)
def get_field_line_numbers(self,
field: field_types.MetadataField) -> List[int]:
return sorted(self._metadata_line_numbers[field])
def _assess_required_fields(self, is_open_source_project: bool = False) -> Set[field_types.MetadataField]:
"""Returns the set of required fields, based on the current
metadata.
"""
required = set(self._MANDATORY_FIELDS)
# Assume the dependency is shipped if not specified.
shipped_value = self._metadata.get(known_fields.SHIPPED)
is_shipped = (shipped_value is None
or util.infer_as_boolean(shipped_value, default=True))
if is_shipped:
# A license file is required if the dependency is shipped.
required.add(known_fields.LICENSE_FILE)
# License compatibility with Android must be set if the
# package is shipped and the license is not in the
# allowlist.
license_value = self._metadata.get(known_fields.LICENSE)
if not license_value or not known_fields.LICENSE.all_licenses_allowed(
license_value, is_open_source_project):
required.add(known_fields.LICENSE_ANDROID_COMPATIBLE)
return required
def validate(self, source_file_dir: str,
repo_root_dir: str,
is_open_source_project: bool = False) -> List[vr.ValidationResult]:
"""Validates all the metadata.
Args:
source_file_dir: the directory of the file that the metadata
is from.
repo_root_dir: the repository's root directory.
is_open_source_project: whether the project is open source.
Returns: the metadata's validation results.
"""
results = []
# Check for duplicate fields.
repeated_fields = [
field for field, count in self._occurrences.items() if count > 1
]
if repeated_fields:
repeated = ", ".join([
f"{field.get_name()} ({self._occurrences[field]})"
for field in repeated_fields
])
error = vr.ValidationError(reason="There is a repeated field.",
additional=[
f"Repeated fields: {repeated}",
])
# Merge line numbers.
lines = sorted(
set(
itertools.chain.from_iterable([
self.get_field_line_numbers(field)
for field in repeated_fields
])))
error.set_lines(lines)
results.append(error)
# Process alias fields.
sources = {}
for alias_field, main_field in self._FIELD_ALIASES.items():
if alias_field in self._metadata:
# Validate the value that was present for the main field
# before overwriting it with the alias field value.
if main_field in self._metadata:
main_value = self._metadata.get(main_field)
field_result = main_field.validate(main_value)
if field_result:
field_result.set_tag(tag="field",
value=main_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(main_field))
results.append(field_result)
self._metadata[main_field] = self._metadata[alias_field]
sources[main_field] = alias_field
self._metadata.pop(alias_field)
# Validate values for all present fields.
for field, value in self._metadata.items():
source_field = sources.get(field) or field
field_result = source_field.validate(value)
if field_result:
field_result.set_tag(tag="field", value=source_field.get_name())
field_result.set_lines(
self.get_field_line_numbers(source_field))
results.append(field_result)
# Check required fields are present.
required_fields = self._assess_required_fields(is_open_source_project=is_open_source_project)
for field in required_fields:
if field not in self._metadata:
field_name = field.get_name()
error = vr.ValidationError(
reason=f"Required field '{field_name}' is missing.")
results.append(error)
# If CPEPrefix is provided without a version, the Version field must be
# present.
if self._cpe_prefix_lacks_version():
error = vr.ValidationWarning(
reason="CPEPrefix is missing a version, and no Version is "
"specified.",
additional=[
"When the 'Version' field is not provided, the 'CPEPrefix' "
"must include a version component."
])
error.set_lines(self.get_field_line_numbers(known_fields.CPE_PREFIX))
results.append(error)
# If the repository is hosted somewhere (i.e. Chromium isn't the
# canonical repositroy of the dependency), at least one of the fields
# Version, Date or Revision must be provided, unless it is canonical or internal.
if not (self.is_canonical or self.is_internal) and not (
self.version or self.date or self.revision
or self.revision_in_deps):
versioning_fields = [
known_fields.VERSION, known_fields.DATE, known_fields.REVISION
]
names = util.quoted(
[field.get_name() for field in versioning_fields])
error = vr.ValidationError(
reason="Versioning fields are insufficient.",
additional=[f"Provide at least one of [{names}]."],
)
results.append(error)
# Check existence of the license file(s) on disk.
license_file_value = self._metadata.get(known_fields.LICENSE_FILE)
if license_file_value is not None:
result = known_fields.LICENSE_FILE.validate_on_disk(
value=license_file_value,
source_file_dir=source_file_dir,
repo_root_dir=repo_root_dir,
)
if result:
result.set_tag(tag="field",
value=known_fields.LICENSE_FILE.get_name())
result.set_lines(
self.get_field_line_numbers(known_fields.LICENSE_FILE))
results.append(result)
if not is_open_source_project:
license_value = self._metadata.get(known_fields.LICENSE)
if license_value is not None:
not_allowed_licenses = known_fields.LICENSE.filter_open_source_project_only_licenses(
license_value)
if len(not_allowed_licenses) > 0:
license_result = vr.ValidationWarning(
reason=f"License has a license not in the allowlist."
" (see https://source.chromium.org/chromium/chromium/tools/depot_tools/+/main:metadata/fields/custom/license_allowlist.py).",
additional=[
f"The following license{'s are' if len(not_allowed_licenses) > 1 else ' is'} only allowed in open source projects: "
f"{util.quoted(not_allowed_licenses)}.",
])
license_result.set_tag(tag="field", value=known_fields.LICENSE.get_name())
license_result.set_lines(
self.get_field_line_numbers(known_fields.LICENSE))
results.append(license_result)
# Match values reported in the 'Mitigated:' field with the supplementry
# fields e.g. 'CVE-2024-12345: description'.
mitigated_values = self._return_as_property(known_fields.MITIGATED)
mitigated_ids = set()
if mitigated_values is not None:
mitigated_ids = set(mitigated_values)
# Reported as their own field e.g. 'CVE-2024-12345: description'.
mitigated_entries = set(self._mitigations_from_entries().keys())
missing_descriptions = mitigated_ids - mitigated_entries
if missing_descriptions:
results.append(
vr.ValidationWarning(
reason="Missing descriptions for vulnerability IDs",
additional=[
f"Add descriptions for: {util.quoted(missing_descriptions)}"
]))
extra_descriptions = mitigated_entries - mitigated_ids
if extra_descriptions:
results.append(
vr.ValidationWarning(
reason="Found descriptions for unlisted vulnerability IDs",
additional=[
f"List these IDs in the 'Mitigated:' field: {util.quoted(extra_descriptions)}"
]))
# Begin by only warning for a small subset of cases.
# TODO(b/438384123): Expand this to all cases.
if (self.security_critical
and self.shipped
and self.vuln_scan_sufficiency == "insufficient"):
# TODO(b/448003595): Provide a pre-populated bug link for when people
# think this is incorrect.
results.append(
vr.ValidationWarning(
reason=
"Dependency metadata is insufficient for vulnerability scanning.",
additional=[
"Please provide one of the following combinations:",
"- 'CPEPrefix' with a version.",
"- A git clonable 'URL' and a 'Revision'.",
"- A package manager 'URL' and a 'Version'. ",
]))
return results
def _cpe_prefix_lacks_version(self) -> List[vr.ValidationResult]:
"""Validates that if CPEPrefix is provided without a version, the
Version field must be present."""
cpe_prefix = self._metadata.get(known_fields.CPE_PREFIX)
version = self._metadata.get(known_fields.VERSION)
cpe_provided = cpe_prefix and not util.is_unknown(cpe_prefix)
version_is_valid = version and not util.is_not_applicable(version)
cpe_has_version = cpe_prefix and cpe_prefix_util.has_version_component(
cpe_prefix)
return cpe_provided and not (version_is_valid or cpe_has_version)
def _mitigations_from_entries(self) -> Dict[str, str]:
result = {}
for key, value in self._entries:
if mitigated_util.PATTERN_VULN_ID_WITH_ANCHORS.match(key):
result[key] = value.strip()
return result
def _return_as_property(self, field: field_types.MetadataField) -> Any:
"""Helper function to create a property for DependencyMetadata.
The property accessor will validate and return sanitized field value.
"""
assert field in known_fields.ALL_FIELDS
raw_value = self._metadata.get(field, None)
if raw_value is None:
# Field is not set.
return None
return field.narrow_type(raw_value)
@property
def name(self) -> Optional[str]:
return self._return_as_property(known_fields.NAME)
@property
def mitigations(self) -> Dict[str, str]:
"""Returns mapping of vulnerability IDs to their descriptions."""
result = self._mitigations_from_entries()
mitigated_values = self._return_as_property(known_fields.MITIGATED) or []
# Add entries listed in Mitigated field but without a supplement
# mitigation description line.
for id in mitigated_values:
if id not in result:
result[id] = ""
return result
@property
def short_name(self) -> Optional[str]:
return self._return_as_property(known_fields.SHORT_NAME)
@property
def url(self) -> Optional[List[str]]:
"""
Returns a list of URLs that points to upstream repo.
The URLs are guaranteed to `urllib.parse.urlparse` without errors.
Returns None if this repository is the canonical repository of this
dependency (see is_canonical below).
"""
return self._return_as_property(known_fields.URL)
@property
def is_canonical(self) -> bool:
"""
Returns whether this repository is the canonical public repository of this dependency.
This is derived from a special value in the URL field.
"""
value = self._metadata.get(known_fields.URL, "")
return known_fields.URL.repo_is_canonical(value)
@property
def is_internal(self) -> bool:
"""
Returns whether this repository is internal to google/chromium.
This is derived from a special value in the URL field.
"""
value = self._metadata.get(known_fields.URL, "")
return known_fields.URL.repo_is_internal(value)
@property
def version(self) -> Optional[str]:
return self._return_as_property(known_fields.VERSION)
@property
def date(self) -> Optional[str]:
"""Returns in "YYYY-MM-DD" format."""
return self._return_as_property(known_fields.DATE)
@property
def revision(self) -> Optional[str]:
return self._return_as_property(known_fields.REVISION)
@property
def revision_in_deps(self) -> bool:
value = self._metadata.get(known_fields.REVISION, "")
return known_fields.REVISION.is_revision_in_deps(value)
@property
def license(self) -> Optional[List[str]]:
"""Returns a list of license names."""
return self._return_as_property(known_fields.LICENSE)
@property
def license_file(self) -> Optional[List[str]]:
# TODO(b/321154076): Consider excluding files that doesn't exist on
# disk if it's not too hard.
#
# Plumbing src_root and dependency_dir into field validator is
# required.
return self._return_as_property(known_fields.LICENSE_FILE)
@property
def security_critical(self) -> Optional[bool]:
return self._return_as_property(known_fields.SECURITY_CRITICAL)
@property
def shipped(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED)
@property
def shipped_in_chromium(self) -> Optional[bool]:
return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
@property
def license_android_compatible(self) -> Optional[bool]:
return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
@property
def cpe_prefix(self) -> Optional[str]:
"""Returns a lowercase string (CPE names are case-insensitive)."""
return self._return_as_property(known_fields.CPE_PREFIX)
@property
def description(self) -> Optional[str]:
return self._return_as_property(known_fields.DESCRIPTION)
@property
def local_modifications(self) -> Optional[Union[Literal[False], str]]:
"""Returns `False` if there's no local modifications.
Otherwise the text content extracted from the metadata.
"""
return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)
@property
def update_mechanism(
self) -> Optional[Tuple[str, Optional[str], Optional[str]]]:
"""
Returns the parsed Update Mechanism value.
The format is `Primary[.Secondary] [(bug_link)]. This function returns
(Primary, Secondary, bug_link) if the field is valid, otherwise (None, None, None).
"""
return self._return_as_property(known_fields.UPDATE_MECHANISM)
@property
def url_is_git_clonable(self) -> bool:
"""
Checks if any of the provided URLs appear to be a clonable Git repository.
This is determined by checking for:
- The 'git://' protocol.
- A path ending in '.git'.
- subdomain matching. See GIT_DOMAIN_INDICATORS for the full list.
"""
for u in self.url:
if not u:
continue
parsed = urlparse(u)
if parsed.scheme == "git" or parsed.path.endswith(".git"):
return True
if parsed.netloc:
domain_parts = parsed.netloc.split(".")
if any(gi in domain_parts for gi in GIT_DOMAIN_INDICATORS):
return True
return False
@property
def url_is_package_manager(self) -> bool:
"""
Checks if any URL contains a known package manager path substring. See PACKAGE_MANAGER_PATHS for the supported list.
"""
for u in self.url:
if not u:
continue
for p in PACKAGE_MANAGER_PATHS:
if p in u and u.split(p)[-1]:
return True
return False
@property
def vuln_scan_sufficiency(self) -> str:
"""Determines if the dependency metadata is sufficient for vulnerability scanning.
Returns:
A string indicating the sufficiency status:
- 'sufficient:CPE' if a CPE prefix is provided and a version is included in the README.
- 'sufficient:URL and Revision' if URL is a git url and a Revision is provided.
- 'sufficient:URL and Revision[DEPS]' as above, but 'Revision:DEPS'.
- 'sufficient:Package Manager URL and Version' if a package manager URL and a Version are provided.
- 'ignore:Canonical' if the dependency is the canonical repository.
- 'ignore:Internal' if the dependency is internal.
- 'ignore:Static' if the dependency's update mechanism is static.
- 'ignore:GoogleManaged' if the dependency's update mechanism ends in .GoogleManaged.
- 'insufficient' otherwise.
"""
if self.cpe_prefix and not self._cpe_prefix_lacks_version():
return "sufficient:CPE"
if self.url:
if self.revision and self.url_is_git_clonable:
return "sufficient:URL and Revision"
if self.revision_in_deps:
return "sufficient:URL and Revision[DEPS]"
if self.version:
if self.url_is_package_manager:
return "sufficient:Package Manager URL and Version"
if self.is_canonical:
return "ignore:Canonical"
if self.is_internal:
return "ignore:Internal"
if self.update_mechanism and self.update_mechanism[0]:
if self.update_mechanism[0].lower() == "static":
return "ignore:Static"
if (self.update_mechanism[1]
and self.update_mechanism[1].lower() == "googlemanaged"):
return "ignore:GoogleManaged"
return "insufficient"