mirror of
https://chromium.googlesource.com/chromium/tools/depot_tools.git
synced 2026-01-11 18:51:29 +00:00
metadata: define a clear DependencyMetadata interface
This CL adds a typed interface that exposes parsed metadata for downstream consumption. Conventionally: - A validated field should be retrieved by the property of the same name - A validated field returns "None" if said field is not provided, or is clearly invalid (e.g. "Unknown" values) - Raw values can still be retrieved with get_entries() When using the properties accessor, fields are normalized and/or coerced to a suitable type (e.g. list of str, str of a particular format). Bug: b/321154076 Change-Id: Ia56969a838e682a7b7eb1dc0781d48e1e38a2ff0 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/5446637 Reviewed-by: Rachael Newitt <renewitt@google.com> Commit-Queue: Jiewei Qian <qjw@chromium.org>
This commit is contained in:
@@ -6,7 +6,7 @@
|
||||
from collections import defaultdict
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, List, Set, Tuple
|
||||
from typing import Dict, List, Set, Tuple, Union, Optional, Literal, Any
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
@@ -24,7 +24,19 @@ import metadata.validation_result as vr
|
||||
|
||||
|
||||
class DependencyMetadata:
|
||||
"""The metadata for a single dependency."""
|
||||
"""The metadata for a single dependency.
|
||||
|
||||
See @property declarations below to retrieve validated fields for
|
||||
downstream consumption.
|
||||
|
||||
The property returns `None` if the provided value (e.g. in
|
||||
README.chromium file) is clearly invalid.
|
||||
|
||||
Otherwise, it returns a suitably typed value (see comments on each
|
||||
property).
|
||||
|
||||
To retrieve unvalidated (i.e. raw values) fields, use get_entries().
|
||||
"""
|
||||
|
||||
# Fields that are always required.
|
||||
_MANDATORY_FIELDS = {
|
||||
@@ -171,9 +183,11 @@ class DependencyMetadata:
|
||||
version_value = self._metadata.get(known_fields.VERSION)
|
||||
date_value = self._metadata.get(known_fields.DATE)
|
||||
revision_value = self._metadata.get(known_fields.REVISION)
|
||||
if ((not version_value or version_util.is_unknown(version_value))
|
||||
and (not date_value or util.is_unknown(date_value))
|
||||
and (not revision_value or util.is_unknown(revision_value))):
|
||||
if ((not version_value
|
||||
or version_util.version_is_unknown(version_value)) and
|
||||
(not date_value or version_util.version_is_unknown(date_value))
|
||||
and (not revision_value
|
||||
or version_util.version_is_unknown(revision_value))):
|
||||
versioning_fields = [
|
||||
known_fields.VERSION, known_fields.DATE, known_fields.REVISION
|
||||
]
|
||||
@@ -199,3 +213,105 @@ class DependencyMetadata:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
def _return_as_property(self, field: field_types.MetadataField) -> Any:
|
||||
"""Helper function to create a property for DependencyMetadata.
|
||||
|
||||
The property accessor will validate and return sanitized field value.
|
||||
"""
|
||||
assert field in known_fields.ALL_FIELDS
|
||||
|
||||
raw_value = self._metadata.get(field, None)
|
||||
if raw_value is None:
|
||||
# Field is not set.
|
||||
return None
|
||||
|
||||
return field.narrow_type(raw_value)
|
||||
|
||||
@property
|
||||
def name(self) -> Optional[str]:
|
||||
return self._return_as_property(known_fields.NAME)
|
||||
|
||||
@property
|
||||
def short_name(self) -> Optional[str]:
|
||||
return self._return_as_property(known_fields.SHORT_NAME)
|
||||
|
||||
@property
|
||||
def url(self) -> Optional[List[str]]:
|
||||
"""
|
||||
Returns a list of URLs that points to upstream repo.
|
||||
The URLs are guaranteed to `urllib.parse.urlparse` without errors.
|
||||
|
||||
Returns None if this repository is the canonical repository of this
|
||||
dependency (see is_canonical below).
|
||||
"""
|
||||
return self._return_as_property(known_fields.URL)
|
||||
|
||||
@property
|
||||
def is_canonical(self) -> bool:
|
||||
"""
|
||||
Returns whether this repository is the canonical public repository of this dependency.
|
||||
|
||||
This is derived from a special value in the URL field.
|
||||
"""
|
||||
value = self._metadata.get(known_fields.URL, "")
|
||||
return known_fields.URL.repo_is_canonical(value)
|
||||
|
||||
@property
|
||||
def version(self) -> Optional[str]:
|
||||
return self._return_as_property(known_fields.VERSION)
|
||||
|
||||
@property
|
||||
def date(self) -> Optional[str]:
|
||||
"""Returns in "YYYY-MM-DD" format."""
|
||||
return self._return_as_property(known_fields.DATE)
|
||||
|
||||
@property
|
||||
def revision(self) -> Optional[str]:
|
||||
return self._return_as_property(known_fields.REVISION)
|
||||
|
||||
@property
|
||||
def license(self) -> Optional[List[str]]:
|
||||
"""Returns a list of license names."""
|
||||
return self._return_as_property(known_fields.LICENSE)
|
||||
|
||||
@property
|
||||
def license_file(self) -> Optional[List[str]]:
|
||||
# TODO(b/321154076): Consider excluding files that doesn't exist on
|
||||
# disk if it's not too hard.
|
||||
#
|
||||
# Plumbing src_root and dependency_dir into field validator is
|
||||
# required.
|
||||
return self._return_as_property(known_fields.LICENSE_FILE)
|
||||
|
||||
@property
|
||||
def security_critical(self) -> Optional[bool]:
|
||||
return self._return_as_property(known_fields.SECURITY_CRITICAL)
|
||||
|
||||
@property
|
||||
def shipped(self) -> Optional[bool]:
|
||||
return self._return_as_property(known_fields.SHIPPED)
|
||||
|
||||
@property
|
||||
def shipped_in_chromium(self) -> Optional[bool]:
|
||||
return self._return_as_property(known_fields.SHIPPED_IN_CHROMIUM)
|
||||
|
||||
@property
|
||||
def license_android_compatible(self) -> Optional[bool]:
|
||||
return self._return_as_property(known_fields.LICENSE_ANDROID_COMPATIBLE)
|
||||
|
||||
@property
|
||||
def cpe_prefix(self) -> Optional[str]:
|
||||
"""Returns a lowercase string (CPE names are case-insensitive)."""
|
||||
return self._return_as_property(known_fields.CPE_PREFIX)
|
||||
|
||||
@property
|
||||
def description(self) -> Optional[str]:
|
||||
return self._return_as_property(known_fields.DESCRIPTION)
|
||||
|
||||
@property
|
||||
def local_modifications(self) -> Optional[Union[Literal[False], str]]:
|
||||
"""Returns `False` if there's no local modifications.
|
||||
Otherwise the text content extracted from the metadata.
|
||||
"""
|
||||
return self._return_as_property(known_fields.LOCAL_MODIFICATIONS)
|
||||
|
||||
@@ -68,12 +68,15 @@ class CPEPrefixField(field_types.SingleLineTextField):
|
||||
def __init__(self):
|
||||
super().__init__(name="CPEPrefix")
|
||||
|
||||
def _is_valid(self, value: str) -> bool:
|
||||
return (util.is_unknown(value) or is_formatted_string_cpe(value)
|
||||
or is_uri_cpe(value))
|
||||
|
||||
def validate(self, value: str) -> Optional[vr.ValidationResult]:
|
||||
"""Checks the given value is either 'unknown', or conforms to
|
||||
either the CPE 2.3 or 2.2 format.
|
||||
"""
|
||||
if (util.is_unknown(value) or is_formatted_string_cpe(value)
|
||||
or is_uri_cpe(value)):
|
||||
if self._is_valid(value):
|
||||
return None
|
||||
|
||||
return vr.ValidationError(
|
||||
@@ -85,3 +88,13 @@ class CPEPrefixField(field_types.SingleLineTextField):
|
||||
"https://nvd.nist.gov/products/cpe/search.",
|
||||
f"Current value: '{value}'.",
|
||||
])
|
||||
|
||||
def narrow_type(self, value: str) -> Optional[str]:
|
||||
if not self._is_valid(value):
|
||||
return None
|
||||
|
||||
# CPE names are case-insensitive, we normalize to lowercase.
|
||||
# See https://cpe.mitre.org/specification/.
|
||||
value = value.lower()
|
||||
|
||||
return value
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
import datetime
|
||||
import os
|
||||
import sys
|
||||
from typing import Optional
|
||||
from typing import Optional, Tuple
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
@@ -59,13 +59,65 @@ _RECOGNIZED_DATE_FORMATS = (
|
||||
)
|
||||
|
||||
|
||||
def format_matches(value: str, date_format: str):
|
||||
"""Returns whether the given value matches the date format."""
|
||||
def parse_with_format(value: str,
|
||||
date_format: str) -> Optional[datetime.datetime]:
|
||||
"""Returns datetime object if `value` can be parsed with `date_format`"""
|
||||
try:
|
||||
datetime.datetime.strptime(value, date_format)
|
||||
return datetime.datetime.strptime(value, date_format)
|
||||
except ValueError:
|
||||
return False
|
||||
return True
|
||||
return None
|
||||
|
||||
|
||||
def to_preferred_format(dt: datetime.datetime) -> str:
|
||||
return datetime.datetime.strftime(dt, _PREFERRED_PREFIX_FORMAT)
|
||||
|
||||
|
||||
def parse_date(value: str) -> Optional[Tuple[str, bool]]:
|
||||
"""Try to parse value into a YYYY-MM-DD date.
|
||||
|
||||
If successful: returns (str, int).
|
||||
- The str is guaranteed to be in YYYY-MM-DD format.
|
||||
- The bool indicates whether `value` is ambiguous.
|
||||
For example, "2020/03/05" matches both "YYYY/MM/DD" and "YYYY/DD/MM".
|
||||
"""
|
||||
matches = []
|
||||
value = value.strip()
|
||||
if not value:
|
||||
return None
|
||||
|
||||
first_part = value.split()[0]
|
||||
|
||||
# Try to match preferred prefix.
|
||||
if dt := parse_with_format(first_part, _PREFERRED_PREFIX_FORMAT):
|
||||
matches.append(dt)
|
||||
|
||||
if not matches:
|
||||
# Try alternative prefix formats.
|
||||
for date_format in _RECOGNIZED_PREFIX_FORMATS:
|
||||
if dt := parse_with_format(first_part, date_format):
|
||||
matches.append(dt)
|
||||
|
||||
if not matches:
|
||||
# Try matching the complete string.
|
||||
for date_format in _RECOGNIZED_DATE_FORMATS:
|
||||
if dt := parse_with_format(value, date_format):
|
||||
matches.append(dt)
|
||||
|
||||
if not matches:
|
||||
# Try ISO 8601.
|
||||
try:
|
||||
dt = datetime.datetime.fromisoformat(value)
|
||||
matches.append(dt)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Determine if the value is parsed without ambiguity.
|
||||
is_ambiguous = len(set(map(to_preferred_format, matches))) > 1
|
||||
|
||||
return to_preferred_format(matches[0]), is_ambiguous
|
||||
|
||||
|
||||
class DateField(field_types.SingleLineTextField):
|
||||
@@ -81,32 +133,29 @@ class DateField(field_types.SingleLineTextField):
|
||||
reason=f"{self._name} is empty.",
|
||||
additional=["Provide date in format YYYY-MM-DD."])
|
||||
|
||||
# Check if the first part (to ignore timezone info) uses the
|
||||
# preferred format.
|
||||
parts = value.split()
|
||||
if format_matches(parts[0], _PREFERRED_PREFIX_FORMAT):
|
||||
if not (parsed := parse_date(value)):
|
||||
return vr.ValidationError(
|
||||
reason=f"{self._name} is invalid.",
|
||||
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
|
||||
|
||||
parsed_date, is_ambiguous = parsed
|
||||
if is_ambiguous:
|
||||
return vr.ValidationError(
|
||||
reason=f"{self._name} is ambiguous.",
|
||||
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
|
||||
|
||||
if not parse_with_format(value, _PREFERRED_PREFIX_FORMAT):
|
||||
return vr.ValidationWarning(
|
||||
reason=f"{self._name} isn't using the canonical format.",
|
||||
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
|
||||
|
||||
return None
|
||||
|
||||
def narrow_type(self, value: str) -> Optional[str]:
|
||||
"""Returns ISO 8601 date string, guarantees to be YYYY-MM-DD or None."""
|
||||
if not (parsed := parse_date(value)):
|
||||
return None
|
||||
|
||||
# Check if the first part (to ignore timezone info) uses a
|
||||
# recognized format.
|
||||
for prefix_format in _RECOGNIZED_PREFIX_FORMATS:
|
||||
if format_matches(parts[0], prefix_format):
|
||||
return vr.ValidationWarning(
|
||||
reason=f"{self._name} is not in the preferred format.",
|
||||
additional=[
|
||||
"Use YYYY-MM-DD.", f"Current value is '{value}'."
|
||||
])
|
||||
|
||||
# Check the entire value for recognized date formats.
|
||||
for date_format in _RECOGNIZED_DATE_FORMATS:
|
||||
if format_matches(value, date_format):
|
||||
return vr.ValidationWarning(
|
||||
reason=f"{self._name} is not in the preferred format.",
|
||||
additional=[
|
||||
"Use YYYY-MM-DD.", f"Current value is '{value}'."
|
||||
])
|
||||
|
||||
# Return an error as the value's format was not recognized.
|
||||
return vr.ValidationError(
|
||||
reason=f"{self._name} is invalid.",
|
||||
additional=["Use YYYY-MM-DD.", f"Current value is '{value}'."])
|
||||
# We still return a date even if the parsing result is ambiguous. An
|
||||
# date that's a few month off is better than nothing at all.
|
||||
return parsed[0]
|
||||
|
||||
@@ -49,6 +49,11 @@ _PATTERN_LICENSE_ALLOWED = re.compile(
|
||||
|
||||
_PATTERN_VERBOSE_DELIMITER = re.compile(r" and | or | / ")
|
||||
|
||||
# Split on the canonical delimiter, or any of the non-canonical delimiters.
|
||||
_PATTERN_SPLIT_LICENSE = re.compile("{}|{}".format(
|
||||
_PATTERN_VERBOSE_DELIMITER.pattern,
|
||||
field_types.MetadataField.VALUE_DELIMITER))
|
||||
|
||||
|
||||
def process_license_value(value: str,
|
||||
atomic_delimiter: str) -> List[Tuple[str, bool]]:
|
||||
@@ -134,3 +139,11 @@ class LicenseField(field_types.SingleLineTextField):
|
||||
reason=f"Separate licenses using a '{self.VALUE_DELIMITER}'.")
|
||||
|
||||
return None
|
||||
|
||||
def narrow_type(self, value: str) -> Optional[List[str]]:
|
||||
if not value:
|
||||
# Empty License field is equivalent to "not declared".
|
||||
return None
|
||||
|
||||
parts = _PATTERN_SPLIT_LICENSE.split(value)
|
||||
return list(filter(bool, map(lambda str: str.strip(), parts)))
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional, Union, Literal
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
@@ -39,13 +40,28 @@ class LocalModificationsField(field_types.FreeformTextField):
|
||||
def __init__(self):
|
||||
super().__init__(name="Local Modifications", structured=False)
|
||||
|
||||
def should_terminate_field(self, field_value) -> bool:
|
||||
field_value = field_value.strip()
|
||||
|
||||
# If we can reasonably infer the field value means "No modification",
|
||||
# terminate this field to avoid over extraction.
|
||||
def _is_no_modification(self, value) -> bool:
|
||||
for pattern in _PATTERNS_NOT_MODIFIED:
|
||||
if pattern.match(field_value):
|
||||
if pattern.match(value):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def should_terminate_field(self, value) -> bool:
|
||||
value = value.strip()
|
||||
|
||||
# If we can reasonably infer the field value means "No modification",
|
||||
# terminate this field to avoid over extraction.
|
||||
if self._is_no_modification(value):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def narrow_type(self, value) -> Optional[Union[Literal[False], str]]:
|
||||
if not value:
|
||||
return False
|
||||
|
||||
if self._is_no_modification(value):
|
||||
return False
|
||||
|
||||
return value
|
||||
|
||||
41
metadata/fields/custom/revision.py
Normal file
41
metadata/fields/custom/revision.py
Normal file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2024 The Chromium Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", "..", ".."))
|
||||
|
||||
# Add the repo's root directory for clearer imports.
|
||||
sys.path.insert(0, _ROOT_DIR)
|
||||
|
||||
import metadata.fields.field_types as field_types
|
||||
import metadata.fields.custom.version as version_field
|
||||
import metadata.fields.util as util
|
||||
import metadata.validation_result as vr
|
||||
|
||||
|
||||
class RevisionField(field_types.SingleLineTextField):
|
||||
"""Custom field for the revision."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(name="Revision")
|
||||
|
||||
def narrow_type(self, value: str) -> Optional[str]:
|
||||
value = super().narrow_type(value)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
if version_field.version_is_unknown(value):
|
||||
return None
|
||||
|
||||
if util.is_known_invalid_value(value):
|
||||
return None
|
||||
|
||||
return value
|
||||
@@ -6,7 +6,9 @@
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
from typing import Optional, List
|
||||
from urllib.parse import urlparse, urlunparse
|
||||
from itertools import filterfalse
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
@@ -19,29 +21,73 @@ import metadata.fields.field_types as field_types
|
||||
import metadata.fields.util as util
|
||||
import metadata.validation_result as vr
|
||||
|
||||
_PATTERN_URL_ALLOWED = re.compile(r"^(https?|ftp|git):\/\/\S+$")
|
||||
_PATTERN_URL_CANONICAL_REPO = re.compile(
|
||||
r"^This is the canonical (public )?repo(sitory)?\.?$", re.IGNORECASE)
|
||||
|
||||
_SUPPORTED_SCHEMES = {
|
||||
'http',
|
||||
'https',
|
||||
'git',
|
||||
'ftp',
|
||||
}
|
||||
|
||||
# URLs can't contain whitespaces. Treat them as delimiters so we can handle cases where URL field contains one URL per line (without comma delimiter).
|
||||
_PATTERN_URL_DELIMITER = re.compile("{}|{}".format(
|
||||
r'\s+', field_types.MetadataField.VALUE_DELIMITER))
|
||||
|
||||
|
||||
def _split_urls(value: str) -> List[str]:
|
||||
"""Split url field value into individual URLs."""
|
||||
urls = _PATTERN_URL_DELIMITER.split(value)
|
||||
return list(filter(lambda x: len(x) > 0, map(str.strip, urls)))
|
||||
|
||||
|
||||
def _url_canonicalize(url: str) -> str:
|
||||
"""Return the canonicalized URL (e.g. make scheme lower case)."""
|
||||
return urlunparse(urlparse(url))
|
||||
|
||||
|
||||
def _url_is_canonical(url: str) -> bool:
|
||||
return url == _url_canonicalize(url)
|
||||
|
||||
|
||||
def _url_is_valid(url: str) -> bool:
|
||||
"""Checks whether the given `url` is acceptable:
|
||||
* url is can be parsed without an error.
|
||||
* url uses a supported scheme / protocol.
|
||||
"""
|
||||
try:
|
||||
u = urlparse(url)
|
||||
except:
|
||||
return False
|
||||
|
||||
if u.scheme not in _SUPPORTED_SCHEMES:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
class URLField(field_types.MetadataField):
|
||||
"""Custom field for the package URL(s)."""
|
||||
def __init__(self):
|
||||
super().__init__(name="URL")
|
||||
|
||||
def repo_is_canonical(self, value: str):
|
||||
"""Returns if `raw_value` indicates this repository is the canonical repository."""
|
||||
return util.matches(_PATTERN_URL_CANONICAL_REPO, value.strip())
|
||||
|
||||
def validate(self, value: str) -> Optional[vr.ValidationResult]:
|
||||
"""Checks the given value has acceptable URL values only.
|
||||
|
||||
Note: this field supports multiple values.
|
||||
"""
|
||||
if util.matches(_PATTERN_URL_CANONICAL_REPO, value):
|
||||
if self.repo_is_canonical(value):
|
||||
return None
|
||||
|
||||
invalid_values = []
|
||||
for url in value.split(self.VALUE_DELIMITER):
|
||||
url = url.strip()
|
||||
if not util.matches(_PATTERN_URL_ALLOWED, url):
|
||||
invalid_values.append(url)
|
||||
urls = _split_urls(value)
|
||||
if not urls:
|
||||
return vr.ValidationError(reason=f"{self._name} must be provided.")
|
||||
|
||||
invalid_values = list(filterfalse(_url_is_valid, urls))
|
||||
|
||||
if invalid_values:
|
||||
return vr.ValidationError(
|
||||
@@ -53,4 +99,26 @@ class URLField(field_types.MetadataField):
|
||||
f"Invalid values: {util.quoted(invalid_values)}.",
|
||||
])
|
||||
|
||||
non_canon_values = list(filterfalse(_url_is_canonical, urls))
|
||||
if non_canon_values:
|
||||
canon_values = list(map(_url_canonicalize, non_canon_values))
|
||||
return vr.ValidationWarning(
|
||||
reason=f"{self._name} is contains non-canonical URLs.",
|
||||
additional=[
|
||||
"URLs should be canonical and well-formed."
|
||||
f"Non canonical values: {util.quoted(non_canon_values)}.",
|
||||
f"Canonicalized URLs should be: {util.quoted(canon_values)}."
|
||||
])
|
||||
|
||||
return None
|
||||
|
||||
def narrow_type(self, value) -> Optional[List[str]]:
|
||||
if not value:
|
||||
return None
|
||||
|
||||
if self.repo_is_canonical(value):
|
||||
return None
|
||||
|
||||
# Filter out invalid URLs, and canonicalize the URLs.
|
||||
return list(
|
||||
map(_url_canonicalize, filter(_url_is_valid, _split_urls(value))))
|
||||
|
||||
@@ -19,17 +19,16 @@ import metadata.fields.field_types as field_types
|
||||
import metadata.fields.util as util
|
||||
import metadata.validation_result as vr
|
||||
|
||||
_PATTERN_NOT_APPLICABLE = re.compile(r"^N ?\/ ?A$", re.IGNORECASE)
|
||||
|
||||
|
||||
def is_unknown(value: str) -> bool:
|
||||
def version_is_unknown(value: str) -> bool:
|
||||
"""Returns whether the value denotes the version being unknown."""
|
||||
return (value == "0" or util.matches(_PATTERN_NOT_APPLICABLE, value)
|
||||
return (value == "0" or util.is_not_applicable(value)
|
||||
or util.is_unknown(value))
|
||||
|
||||
|
||||
class VersionField(field_types.SingleLineTextField):
|
||||
"""Custom field for the package version."""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(name="Version")
|
||||
|
||||
@@ -55,3 +54,16 @@ class VersionField(field_types.SingleLineTextField):
|
||||
])
|
||||
|
||||
return None
|
||||
|
||||
def narrow_type(self, value: str) -> Optional[str]:
|
||||
value = super().narrow_type(value)
|
||||
if not value:
|
||||
return None
|
||||
|
||||
if version_is_unknown(value):
|
||||
return None
|
||||
|
||||
if util.is_known_invalid_value(value):
|
||||
return None
|
||||
|
||||
return value
|
||||
|
||||
@@ -7,6 +7,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Optional
|
||||
from enum import Enum
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
@@ -26,7 +27,6 @@ _PATTERN_YES_OR_NO = re.compile(r"^(yes|no)$", re.IGNORECASE)
|
||||
# case-insensitive. e.g. "No (test only)", "Yes?"
|
||||
_PATTERN_STARTS_WITH_YES_OR_NO = re.compile(r"^(yes|no)", re.IGNORECASE)
|
||||
|
||||
|
||||
class MetadataField:
|
||||
"""Base class for all metadata fields."""
|
||||
|
||||
@@ -73,6 +73,15 @@ class MetadataField:
|
||||
"""
|
||||
raise NotImplementedError(f"{self._name} field validation not defined.")
|
||||
|
||||
def narrow_type(self, value):
|
||||
"""Returns a narrowly typed (e.g. bool) value for this field for
|
||||
downstream consumption.
|
||||
|
||||
The alternative being the downstream parses the string again.
|
||||
"""
|
||||
raise NotImplementedError(
|
||||
f"{self._name} field value coersion not defined.")
|
||||
|
||||
|
||||
class FreeformTextField(MetadataField):
|
||||
"""Field where the value is freeform text."""
|
||||
@@ -86,6 +95,9 @@ class FreeformTextField(MetadataField):
|
||||
|
||||
return None
|
||||
|
||||
def narrow_type(self, value):
|
||||
assert value is not None
|
||||
return value
|
||||
|
||||
class SingleLineTextField(FreeformTextField):
|
||||
"""Field where the field as a whole is a single line of text."""
|
||||
@@ -126,3 +138,6 @@ class YesNoField(SingleLineTextField):
|
||||
f"This field must be {util.YES} or {util.NO}.",
|
||||
f"Current value is '{value}'.",
|
||||
])
|
||||
|
||||
def narrow_type(self, value) -> Optional[bool]:
|
||||
return util.infer_as_boolean(super().narrow_type(value))
|
||||
|
||||
@@ -21,12 +21,12 @@ import metadata.fields.custom.license_file
|
||||
import metadata.fields.custom.local_modifications
|
||||
import metadata.fields.custom.url
|
||||
import metadata.fields.custom.version
|
||||
import metadata.fields.custom.revision
|
||||
import metadata.fields.field_types as field_types
|
||||
|
||||
# Freeform text fields.
|
||||
NAME = field_types.SingleLineTextField("Name")
|
||||
SHORT_NAME = field_types.SingleLineTextField("Short Name")
|
||||
REVISION = field_types.SingleLineTextField("Revision")
|
||||
DESCRIPTION = field_types.FreeformTextField("Description", structured=False)
|
||||
|
||||
# Yes/no fields.
|
||||
@@ -43,6 +43,7 @@ LICENSE = metadata.fields.custom.license.LicenseField()
|
||||
LICENSE_FILE = metadata.fields.custom.license_file.LicenseFileField()
|
||||
URL = metadata.fields.custom.url.URLField()
|
||||
VERSION = metadata.fields.custom.version.VersionField()
|
||||
REVISION = metadata.fields.custom.revision.RevisionField()
|
||||
LOCAL_MODIFICATIONS = metadata.fields.custom.local_modifications.LocalModificationsField(
|
||||
)
|
||||
|
||||
|
||||
@@ -26,6 +26,22 @@ _PATTERN_STARTS_WITH_YES = re.compile(r"^yes", re.IGNORECASE)
|
||||
# case-insensitive.
|
||||
_PATTERN_STARTS_WITH_NO = re.compile(r"^no", re.IGNORECASE)
|
||||
|
||||
# Variants of N/A (Not Applicable).
|
||||
_PATTERN_NOT_APPLICABLE = re.compile(r"^(N ?\/ ?A)\.?|na\.?|not applicable\.?$",
|
||||
re.IGNORECASE)
|
||||
|
||||
# A collection of values that provides little information.
|
||||
# Use lower-case for easier comparison.
|
||||
_KNOWN_INVALID_VALUES = {
|
||||
"0",
|
||||
"varies",
|
||||
"-",
|
||||
"unknown",
|
||||
"head",
|
||||
"see deps",
|
||||
"deps",
|
||||
}
|
||||
|
||||
|
||||
def matches(pattern: re.Pattern, value: str) -> bool:
|
||||
"""Returns whether the value matches the pattern."""
|
||||
@@ -61,3 +77,20 @@ def infer_as_boolean(value: str, default: bool = True) -> bool:
|
||||
return False
|
||||
else:
|
||||
return default
|
||||
|
||||
|
||||
def is_known_invalid_value(value: str):
|
||||
"""Returns whether `value` is among the known bad values that provides
|
||||
little machine readable information.
|
||||
"""
|
||||
if not value:
|
||||
return False
|
||||
|
||||
if value.lower() in _KNOWN_INVALID_VALUES:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def is_not_applicable(value: str) -> bool:
|
||||
return matches(_PATTERN_NOT_APPLICABLE, value)
|
||||
|
||||
@@ -95,17 +95,18 @@ class FieldValidationTest(unittest.TestCase):
|
||||
def test_date_validation(self):
|
||||
self._run_field_validation(
|
||||
field=known_fields.DATE,
|
||||
valid_values=[
|
||||
"2012-03-04", "2012-03-04 UTC", "2012-03-04 UTC+10:00"
|
||||
],
|
||||
valid_values=["2012-03-04"],
|
||||
error_values=[
|
||||
"",
|
||||
"\n",
|
||||
"N/A",
|
||||
"03-04-12", # Ambiguous month and day.
|
||||
"04/03/2012", # Ambiguous month and day.
|
||||
],
|
||||
warning_values=[
|
||||
"2012-03-04 UTC", "2012-03-04 UTC+10:00",
|
||||
"2012/03/04 UTC+10:00", "20120304", "April 3, 2012",
|
||||
"3 Apr 2012", "03-04-12", "04/03/2012",
|
||||
"3 Apr 2012", "30/12/2000", "20-03-2020",
|
||||
"Tue Apr 3 05:06:07 2012 +0800"
|
||||
],
|
||||
)
|
||||
@@ -181,14 +182,18 @@ class FieldValidationTest(unittest.TestCase):
|
||||
"https://www.example.com/a",
|
||||
"http://www.example.com/b",
|
||||
"ftp://www.example.com/c,git://www.example.com/d",
|
||||
"https://www.example.com/a\n https://example.com/b",
|
||||
"This is the canonical public repository",
|
||||
],
|
||||
warning_values=[
|
||||
# Scheme is case-insensitive, but should be lower case.
|
||||
"Https://www.example.com/g",
|
||||
],
|
||||
error_values=[
|
||||
"",
|
||||
"\n",
|
||||
"ghttps://www.example.com/e",
|
||||
"https://www.example.com/ f",
|
||||
"Https://www.example.com/g",
|
||||
"This is an unrecognized message for the URL",
|
||||
],
|
||||
)
|
||||
|
||||
187
metadata/tests/type_narrowing_test.py
Normal file
187
metadata/tests/type_narrowing_test.py
Normal file
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/env python3
|
||||
# Copyright 2024 The Chromium Authors. All rights reserved.
|
||||
# Use of this source code is governed by a BSD-style license that can be
|
||||
# found in the LICENSE file.
|
||||
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
from typing import Any, Callable
|
||||
|
||||
_THIS_DIR = os.path.abspath(os.path.dirname(__file__))
|
||||
# The repo's root directory.
|
||||
_ROOT_DIR = os.path.abspath(os.path.join(_THIS_DIR, "..", ".."))
|
||||
|
||||
# Add the repo's root directory for clearer imports.
|
||||
sys.path.insert(0, _ROOT_DIR)
|
||||
|
||||
from metadata.fields.field_types import MetadataField
|
||||
import metadata.fields.known as fields
|
||||
from metadata.dependency_metadata import DependencyMetadata
|
||||
|
||||
|
||||
class FieldValidationTest(unittest.TestCase):
|
||||
"""Tests narrow_type() on fields we validate and extract structural data."""
|
||||
|
||||
def _test_on_field(self, field: MetadataField) -> Callable:
|
||||
|
||||
def expect(value: str, expected_value: Any, reason: str):
|
||||
output = field.narrow_type(value)
|
||||
self.assertEqual(
|
||||
output, expected_value,
|
||||
f'Field "{field.get_name()}" should {reason}. Input value'
|
||||
f' was: "{value}", but got coerced into {repr(output)}')
|
||||
|
||||
return expect
|
||||
|
||||
def test_name(self):
|
||||
expect = self._test_on_field(fields.NAME)
|
||||
expect("package name", "package name", "return as-is")
|
||||
expect("", "", "not coerce empty string to `None`")
|
||||
|
||||
def test_short_name(self):
|
||||
expect = self._test_on_field(fields.SHORT_NAME)
|
||||
expect("pkg-name", "pkg-name", "return as-is")
|
||||
expect("", "", "not coerce empty string to `None`")
|
||||
|
||||
def test_url(self):
|
||||
expect = self._test_on_field(fields.URL)
|
||||
expect("", None, "treat empty string as None")
|
||||
expect("https://example.com/", ["https://example.com/"],
|
||||
"return valid url")
|
||||
expect("https://example.com/,\nhttps://example2.com/",
|
||||
["https://example.com/", "https://example2.com/"],
|
||||
"return multiple valid urls")
|
||||
expect("file://test", [], "reject unsupported scheme")
|
||||
expect("file://test,\nhttps://example.com", ["https://example.com"],
|
||||
"reject unsupported scheme")
|
||||
expect("HTTPS://example.com", ["https://example.com"],
|
||||
"canonicalize url")
|
||||
expect("http", [], "reject invalid url")
|
||||
expect("This is the canonical repo.", None,
|
||||
"understand the this repo is canonical message")
|
||||
|
||||
def test_version(self):
|
||||
expect = self._test_on_field(fields.VERSION)
|
||||
expect("", None, "treat empty string as None")
|
||||
expect("0", None, "treat invalid value as None")
|
||||
expect("varies", None, "treat invalid value as None")
|
||||
expect("see deps", None, "treat invalid value as None")
|
||||
expect("N/A", None, "N/A is treated as None")
|
||||
expect("Not applicable.", None, "N/A is treated as None")
|
||||
|
||||
def test_date(self):
|
||||
expect = self._test_on_field(fields.DATE)
|
||||
expect("", None, "treat empty string as None")
|
||||
expect("0", None, "treat invalid value as None")
|
||||
expect("varies", None, "treat invalid value as None")
|
||||
expect("2024-01-02", "2024-01-02", "accepts ISO 8601 date")
|
||||
expect("2024-01-02T03:04:05Z", "2024-01-02",
|
||||
"accepts ISO 8601 date time")
|
||||
expect("Jan 2 2024", "2024-01-02", "accepts locale format")
|
||||
expect(
|
||||
"02/03/2000", "2000-03-02",
|
||||
"accepts ambiguous MM/DD format (better than no date info at all)")
|
||||
expect("11/30/2000", "2000-11-30", "accepts unambiguous MM/DD format")
|
||||
|
||||
def test_revision(self):
|
||||
expect = self._test_on_field(fields.REVISION)
|
||||
expect("", None, "treat empty string as None")
|
||||
expect("0", None, "treat invalid value as None")
|
||||
expect("varies", None, "treat invalid value as None")
|
||||
expect("see deps", None, "treat invalid value as None")
|
||||
expect("N/A", None, "N/A is treated as None")
|
||||
expect("Not applicable.", None, "N/A is treated as None")
|
||||
|
||||
def test_license(self):
|
||||
expect = self._test_on_field(fields.LICENSE)
|
||||
expect("", None, "treat empty string as None")
|
||||
expect("LICENSE-1", ["LICENSE-1"], "return as a list")
|
||||
expect("LGPL v2 and BSD", ["LGPL v2", "BSD"], "return as a list")
|
||||
|
||||
def test_license_file(self):
|
||||
# TODO(b/321154076): Consider excluding files that doesn't exist on
|
||||
# disk if it's not too hard.
|
||||
#
|
||||
# Right now, we return the unparsed license file field as-is.
|
||||
expect = self._test_on_field(fields.LICENSE_FILE)
|
||||
expect("src/file", "src/file", "return value as-is")
|
||||
|
||||
def test_security_critical(self):
|
||||
expect = self._test_on_field(fields.SECURITY_CRITICAL)
|
||||
expect("yes", True, "understand truthy value")
|
||||
expect("Yes", True, "understand truthy value")
|
||||
expect("no", False, "understand falsey value")
|
||||
expect("No, because", False,
|
||||
"understand falsey value, with description")
|
||||
|
||||
def test_shipped(self):
|
||||
expect = self._test_on_field(fields.SHIPPED)
|
||||
expect("yes", True, "understand truthy value")
|
||||
expect("Yes, but", True, "understand truthy value with extra comment")
|
||||
expect("no", False, "understand falsey value")
|
||||
expect("no, because", False,
|
||||
"understand falsey value, with extra comment")
|
||||
|
||||
def test_shipped_in_chromium(self):
|
||||
expect = self._test_on_field(fields.SHIPPED_IN_CHROMIUM)
|
||||
expect("yes", True, "understand truthy value")
|
||||
expect("Yes", True, "understand truthy value")
|
||||
expect("no", False, "understand falsey value")
|
||||
expect("no, because", False,
|
||||
"understand falsey value, with extra comment")
|
||||
|
||||
def test_license_android_compatible(self):
|
||||
expect = self._test_on_field(fields.LICENSE_ANDROID_COMPATIBLE)
|
||||
expect("yes", True, "understand truthy value")
|
||||
expect("Yes", True, "understand truthy value")
|
||||
expect("no", False, "understand falsey value")
|
||||
expect("no, because", False,
|
||||
"understand falsey value, with extra comment")
|
||||
|
||||
def test_cpe_prefix(self):
|
||||
expect = self._test_on_field(fields.CPE_PREFIX)
|
||||
expect("unknown", "unknown", "understand unknown")
|
||||
expect("bad_cpe_format", None, "rejects invalid value")
|
||||
expect("cpe:/a:d3", "cpe:/a:d3", "accept a valid cpe prefix")
|
||||
expect("cpe:/a:D3", "cpe:/a:d3", "normalize to lowercase")
|
||||
|
||||
def test_description(self):
|
||||
expect = self._test_on_field(fields.DESCRIPTION)
|
||||
expect("desc", "desc", "return value as-is")
|
||||
|
||||
def test_local_modification(self):
|
||||
expect = self._test_on_field(fields.LOCAL_MODIFICATIONS)
|
||||
expect("none", False, "understands none")
|
||||
expect("(none)", False, "understands none")
|
||||
expect("not applicable", False, "understands N/A")
|
||||
expect("", False, "treat empty string as False")
|
||||
expect("modified X file", "modified X file",
|
||||
"return value as-is if it doesn't mean no modification")
|
||||
|
||||
def test_dependency_data_return_as_property(self):
|
||||
dm = DependencyMetadata()
|
||||
dm.add_entry("name", "package")
|
||||
dm.add_entry("url", "git://git@example.com,\nbad_url://example.com")
|
||||
dm.add_entry("security critical", "no")
|
||||
dm.add_entry("date", "2024-01-02")
|
||||
dm.add_entry("revision", "")
|
||||
|
||||
self.assertEqual(dm.name, "package")
|
||||
self.assertEqual(dm.url, ["git://git@example.com"])
|
||||
self.assertEqual(dm.security_critical, False)
|
||||
self.assertEqual(dm.date, "2024-01-02")
|
||||
self.assertEqual(dm.revision, None)
|
||||
self.assertEqual(dm.version, None)
|
||||
|
||||
def test_dependency_data_repo_is_canonical(self):
|
||||
dm = DependencyMetadata()
|
||||
dm.add_entry("name", "package")
|
||||
dm.add_entry("url", "This is the canonical repo.")
|
||||
|
||||
self.assertEqual(dm.url, None)
|
||||
self.assertEqual(dm.is_canonical, True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user