Update metrics linting to be able to handle custom metrics (#18733)

Part of https://github.com/element-hq/synapse/issues/18592
This commit is contained in:
Eric Eastwood
2025-08-01 15:34:11 -05:00
committed by GitHub
parent e43a1cec84
commit e16fbdcdcc
10 changed files with 293 additions and 35 deletions

View File

@@ -23,16 +23,21 @@
can crop up, e.g the cache descriptors.
"""
from typing import Callable, Optional, Tuple, Type, Union
import enum
from typing import Callable, Mapping, Optional, Tuple, Type, Union
import attr
import mypy.types
from mypy.erasetype import remove_instance_last_known_values
from mypy.errorcodes import ErrorCode
from mypy.nodes import ARG_NAMED_OPT, ListExpr, NameExpr, TempNode, TupleExpr, Var
from mypy.plugin import (
ClassDefContext,
Context,
FunctionLike,
FunctionSigContext,
MethodSigContext,
MypyFile,
Plugin,
)
from mypy.typeops import bind_self
@@ -41,12 +46,15 @@ from mypy.types import (
CallableType,
Instance,
NoneType,
Options,
TupleType,
TypeAliasType,
TypeVarType,
UninhabitedType,
UnionType,
)
from mypy_zope import plugin as mypy_zope_plugin
from pydantic.mypy import plugin as mypy_pydantic_plugin
PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL = ErrorCode(
"missing-server-name-label",
@@ -54,19 +62,153 @@ PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL = ErrorCode(
category="per-homeserver-tenant-metrics",
)
PROMETHEUS_METRIC_MISSING_FROM_LIST_TO_CHECK = ErrorCode(
"metric-type-missing-from-list",
"Every Prometheus metric type must be included in the `prometheus_metric_fullname_to_label_arg_map`.",
category="per-homeserver-tenant-metrics",
)
class Sentinel(enum.Enum):
# defining a sentinel in this way allows mypy to correctly handle the
# type of a dictionary lookup and subsequent type narrowing.
UNSET_SENTINEL = object()
@attr.s(auto_attribs=True)
class ArgLocation:
keyword_name: str
"""
The keyword argument name for this argument
"""
position: int
"""
The 0-based positional index of this argument
"""
prometheus_metric_fullname_to_label_arg_map: Mapping[str, Optional[ArgLocation]] = {
# `Collector` subclasses:
"prometheus_client.metrics.MetricWrapperBase": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Counter": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Histogram": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Gauge": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Summary": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Info": ArgLocation("labelnames", 2),
"prometheus_client.metrics.Enum": ArgLocation("labelnames", 2),
"synapse.metrics.LaterGauge": ArgLocation("labelnames", 2),
"synapse.metrics.InFlightGauge": ArgLocation("labels", 2),
"synapse.metrics.GaugeBucketCollector": ArgLocation("labelnames", 2),
"prometheus_client.registry.Collector": None,
"prometheus_client.registry._EmptyCollector": None,
"prometheus_client.registry.CollectorRegistry": None,
"prometheus_client.process_collector.ProcessCollector": None,
"prometheus_client.platform_collector.PlatformCollector": None,
"prometheus_client.gc_collector.GCCollector": None,
"synapse.metrics._gc.GCCounts": None,
"synapse.metrics._gc.PyPyGCStats": None,
"synapse.metrics._reactor_metrics.ReactorLastSeenMetric": None,
"synapse.metrics.CPUMetrics": None,
"synapse.metrics.jemalloc.JemallocCollector": None,
"synapse.util.metrics.DynamicCollectorRegistry": None,
"synapse.metrics.background_process_metrics._Collector": None,
#
# `Metric` subclasses:
"prometheus_client.metrics_core.Metric": None,
"prometheus_client.metrics_core.UnknownMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.CounterMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.SummaryMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.InfoMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.HistogramMetricFamily": ArgLocation("labels", 3),
"prometheus_client.metrics_core.GaugeHistogramMetricFamily": ArgLocation(
"labels", 4
),
"prometheus_client.metrics_core.StateSetMetricFamily": ArgLocation("labels", 3),
"synapse.metrics.GaugeHistogramMetricFamilyWithLabels": ArgLocation(
"labelnames", 4
),
}
"""
Map from the fullname of the Prometheus `Metric`/`Collector` classes to the keyword
argument name and positional index of the label names. This map is useful because
different metrics have different signatures for passing in label names and we just need
to know where to look.
This map should include any metrics that we collect with Prometheus. Which corresponds
to anything that inherits from `prometheus_client.registry.Collector`
(`synapse.metrics._types.Collector`) or `prometheus_client.metrics_core.Metric`. The
exhaustiveness of this list is enforced by `analyze_prometheus_metric_classes`.
The entries with `None` always fail the lint because they don't have a `labelnames`
argument (therefore, no `SERVER_NAME_LABEL`), but we include them here so that people
can notice and manually allow via a type ignore comment as the source of truth
should be in the source code.
"""
# Unbound at this point because we don't know the mypy version yet.
# This is set in the `plugin(...)` function below.
MypyPydanticPluginClass: Type[Plugin]
MypyZopePluginClass: Type[Plugin]
class SynapsePlugin(Plugin):
def __init__(self, options: Options):
super().__init__(options)
self.mypy_pydantic_plugin = MypyPydanticPluginClass(options)
self.mypy_zope_plugin = MypyZopePluginClass(options)
def set_modules(self, modules: dict[str, MypyFile]) -> None:
"""
This is called by mypy internals. We have to override this to ensure it's also
called for any other plugins that we're manually handling.
Here is how mypy describes it:
> [`self._modules`] can't be set in `__init__` because it is executed too soon
> in `build.py`. Therefore, `build.py` *must* set it later before graph processing
> starts by calling `set_modules()`.
"""
super().set_modules(modules)
self.mypy_pydantic_plugin.set_modules(modules)
self.mypy_zope_plugin.set_modules(modules)
def get_base_class_hook(
self, fullname: str
) -> Optional[Callable[[ClassDefContext], None]]:
def _get_base_class_hook(ctx: ClassDefContext) -> None:
# Run any `get_base_class_hook` checks from other plugins first.
#
# Unfortunately, because mypy only chooses the first plugin that returns a
# non-None value (known-limitation, c.f.
# https://github.com/python/mypy/issues/19524), we workaround this by
# putting our custom plugin first in the plugin order and then calling the
# other plugin's hook manually followed by our own checks.
if callback := self.mypy_pydantic_plugin.get_base_class_hook(fullname):
callback(ctx)
if callback := self.mypy_zope_plugin.get_base_class_hook(fullname):
callback(ctx)
# Now run our own checks
analyze_prometheus_metric_classes(ctx)
return _get_base_class_hook
def get_function_signature_hook(
self, fullname: str
) -> Optional[Callable[[FunctionSigContext], FunctionLike]]:
if fullname in (
"prometheus_client.metrics.Counter",
"prometheus_client.metrics.Histogram",
"prometheus_client.metrics.Gauge",
# TODO: Add other prometheus_client metrics that need checking as we
# refactor, see https://github.com/element-hq/synapse/issues/18592
):
return check_prometheus_metric_instantiation
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]
# Look for any Prometheus metrics to make sure they have the `SERVER_NAME_LABEL`
# label.
if fullname in prometheus_metric_fullname_to_label_arg_map.keys():
# Because it's difficult to determine the `fullname` of the function in the
# callback, let's just pass it in while we have it.
return lambda ctx: check_prometheus_metric_instantiation(ctx, fullname)
return None
@@ -90,7 +232,44 @@ class SynapsePlugin(Plugin):
return None
def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableType:
def analyze_prometheus_metric_classes(ctx: ClassDefContext) -> None:
"""
Cross-check the list of Prometheus metric classes against the
`prometheus_metric_fullname_to_label_arg_map` to ensure the list is exhaustive and
up-to-date.
"""
fullname = ctx.cls.fullname
# Strip off the unique identifier for classes that are dynamically created inside
# functions. ex. `synapse.metrics.jemalloc.JemallocCollector@185` (this is the line
# number)
if "@" in fullname:
fullname = fullname.split("@", 1)[0]
if any(
ancestor_type.fullname
in (
# All of the Prometheus metric classes inherit from the `Collector`.
"prometheus_client.registry.Collector",
"synapse.metrics._types.Collector",
# And custom metrics that inherit from `Metric`.
"prometheus_client.metrics_core.Metric",
)
for ancestor_type in ctx.cls.info.mro
):
if fullname not in prometheus_metric_fullname_to_label_arg_map:
ctx.api.fail(
f"Expected {fullname} to be in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map.",
Context(),
code=PROMETHEUS_METRIC_MISSING_FROM_LIST_TO_CHECK,
)
def check_prometheus_metric_instantiation(
ctx: FunctionSigContext, fullname: str
) -> CallableType:
"""
Ensure that the `prometheus_client` metrics include the `SERVER_NAME_LABEL` label
when instantiated.
@@ -103,18 +282,49 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
Python garbage collection, and Twisted reactor tick time, which shouldn't have the
`SERVER_NAME_LABEL`. In those cases, use a type ignore comment to disable the
check, e.g. `# type: ignore[missing-server-name-label]`.
Args:
ctx: The `FunctionSigContext` from mypy.
fullname: The fully qualified name of the function being called,
e.g. `"prometheus_client.metrics.Counter"`
"""
# The true signature, this isn't being modified so this is what will be returned.
signature: CallableType = ctx.default_signature
signature = ctx.default_signature
# Find where the label names argument is in the function signature.
arg_location = prometheus_metric_fullname_to_label_arg_map.get(
fullname, Sentinel.UNSET_SENTINEL
)
assert arg_location is not Sentinel.UNSET_SENTINEL, (
f"Expected to find {fullname} in `prometheus_metric_fullname_to_label_arg_map`, "
f"but it was not found. This is a problem with our custom mypy plugin. "
f"Please add it to the map. Context: {ctx.context}"
)
# People should be using `# type: ignore[missing-server-name-label]` for
# process-level metrics that should not have the `SERVER_NAME_LABEL`.
if arg_location is None:
ctx.api.fail(
f"{signature.name} does not have a `labelnames`/`labels` argument "
"(if this is untrue, update `prometheus_metric_fullname_to_label_arg_map` "
"in our custom mypy plugin) and should probably have a type ignore comment, "
"e.g. `# type: ignore[missing-server-name-label]`. The reason we don't "
"automatically ignore this is the source of truth should be in the source code.",
ctx.context,
code=PROMETHEUS_METRIC_MISSING_SERVER_NAME_LABEL,
)
return signature
# Sanity check the arguments are still as expected in this version of
# `prometheus_client`. ex. `Counter(name, documentation, labelnames, ...)`
#
# `signature.arg_names` should be: ["name", "documentation", "labelnames", ...]
if len(signature.arg_names) < 3 or signature.arg_names[2] != "labelnames":
if (
len(signature.arg_names) < (arg_location.position + 1)
or signature.arg_names[arg_location.position] != arg_location.keyword_name
):
ctx.api.fail(
f"Expected the 3rd argument of {signature.name} to be 'labelnames', but got "
f"{signature.arg_names[2]}",
f"Expected argument number {arg_location.position + 1} of {signature.name} to be `labelnames`/`labels`, "
f"but got {signature.arg_names[arg_location.position]}",
ctx.context,
)
return signature
@@ -137,7 +347,11 @@ def check_prometheus_metric_instantiation(ctx: FunctionSigContext) -> CallableTy
# ...
# ]
# ```
labelnames_arg_expression = ctx.args[2][0] if len(ctx.args[2]) > 0 else None
labelnames_arg_expression = (
ctx.args[arg_location.position][0]
if len(ctx.args[arg_location.position]) > 0
else None
)
if isinstance(labelnames_arg_expression, (ListExpr, TupleExpr)):
# Check if the `labelnames` argument includes the `server_name` label (`SERVER_NAME_LABEL`).
for labelname_expression in labelnames_arg_expression.items:
@@ -476,10 +690,13 @@ def is_cacheable(
def plugin(version: str) -> Type[SynapsePlugin]:
global MypyPydanticPluginClass, MypyZopePluginClass
# This is the entry point of the plugin, and lets us deal with the fact
# that the mypy plugin interface is *not* stable by looking at the version
# string.
#
# However, since we pin the version of mypy Synapse uses in CI, we don't
# really care.
MypyPydanticPluginClass = mypy_pydantic_plugin(version)
MypyZopePluginClass = mypy_zope_plugin(version)
return SynapsePlugin