From 895d503bf340b23059e01eb2030b42dcffd701f5 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 15:15:36 -0700 Subject: [PATCH 01/17] Add explicit compatibility guard rails to cuda.pathfinder. Introduce CompatibilityGuardRails plus related errors and tests so callers can opt into CTK and driver compatibility checks while reusing the existing pathfinder lookup APIs. Made-with: Cursor --- cuda_pathfinder/cuda/pathfinder/__init__.py | 9 + .../pathfinder/_compatibility_guard_rails.py | 555 ++++++++++++++++++ cuda_pathfinder/docs/source/api.rst | 3 + .../tests/test_compatibility_guard_rails.py | 373 ++++++++++++ .../tests/test_driver_lib_loading.py | 14 +- 5 files changed, 949 insertions(+), 5 deletions(-) create mode 100644 cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py create mode 100644 cuda_pathfinder/tests/test_compatibility_guard_rails.py diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index dc818dfd08f..022ebb6b372 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -11,6 +11,15 @@ find_nvidia_binary_utility as find_nvidia_binary_utility, ) from cuda.pathfinder._binaries.supported_nvidia_binaries import SUPPORTED_BINARIES as _SUPPORTED_BINARIES +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityCheckError as CompatibilityCheckError, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityGuardRails as CompatibilityGuardRails, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityInsufficientMetadataError as CompatibilityInsufficientMetadataError, +) from cuda.pathfinder._dynamic_libs.load_dl_common import ( DynamicLibNotAvailableError as DynamicLibNotAvailableError, ) diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py new file mode 100644 index 00000000000..13ccb606a6c --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -0,0 +1,555 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import functools +import importlib.metadata +import json +import os +import re +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import TypeAlias, cast + +from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( + find_nvidia_binary_utility as _find_nvidia_binary_utility, +) +from cuda.pathfinder._binaries.supported_nvidia_binaries import SUPPORTED_BINARIES_ALL +from cuda.pathfinder._dynamic_libs.lib_descriptor import LIB_DESCRIPTORS +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + LocatedHeaderDir, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as _locate_nvidia_header_directory, +) +from cuda.pathfinder._headers.header_descriptor import HEADER_DESCRIPTORS +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + LocatedBitcodeLib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + locate_bitcode_lib as _locate_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + LocatedStaticLib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + locate_static_lib as _locate_static_lib, +) +from cuda.pathfinder._utils.driver_info import ( + DriverCudaVersion, + QueryDriverCudaVersionError, + query_driver_cuda_version, +) + +ItemKind: TypeAlias = str +PackagedWith: TypeAlias = str +ConstraintOperator: TypeAlias = str +ConstraintArg: TypeAlias = int | str | tuple[str, int] | None + +_CTK_VERSION_RE = re.compile(r"^(?P\d+)\.(?P\d+)") +_REQUIRES_DIST_RE = re.compile( + r"^\s*(?P[A-Za-z0-9_.-]+)\s*==\s*(?P[0-9][A-Za-z0-9.+-]*?)(?:\.\*)?(?:\s*;|$)" +) + +_STATIC_LIBS_PACKAGED_WITH: dict[str, PackagedWith] = { + "cudadevrt": "ctk", +} +_BITCODE_LIBS_PACKAGED_WITH: dict[str, PackagedWith] = { + "device": "ctk", + "nvshmem_device": "other", +} +_BINARY_PACKAGED_WITH: dict[str, PackagedWith] = dict.fromkeys(SUPPORTED_BINARIES_ALL, "ctk") + + +class CompatibilityCheckError(RuntimeError): + """Raised when compatibility checks reject a resolved item.""" + + +class CompatibilityInsufficientMetadataError(CompatibilityCheckError): + """Raised when v1 compatibility checks cannot reach a definitive answer.""" + + +@dataclass(frozen=True, slots=True) +class CtkMetadata: + ctk_version: CtkVersion + ctk_root: str | None + source: str + + +@dataclass(frozen=True, slots=True) +class CtkVersion: + major: int + minor: int + + def __str__(self) -> str: + return f"{self.major}.{self.minor}" + + +@dataclass(frozen=True, slots=True) +class ComparisonConstraint: + operator: ConstraintOperator + value: int + + def matches(self, candidate: int) -> bool: + if self.operator == "==": + return candidate == self.value + if self.operator == "<": + return candidate < self.value + if self.operator == "<=": + return candidate <= self.value + if self.operator == ">": + return candidate > self.value + if self.operator == ">=": + return candidate >= self.value + raise AssertionError(f"Unsupported operator: {self.operator!r}") + + def __str__(self) -> str: + return f"{self.operator}{self.value}" + + +@dataclass(frozen=True, slots=True) +class ResolvedItem: + name: str + kind: ItemKind + packaged_with: PackagedWith + abs_path: str + found_via: str | None + ctk_root: str | None + ctk_version: CtkVersion | None + ctk_version_source: str | None + + def describe(self) -> str: + found_via = "" if self.found_via is None else f" via {self.found_via}" + return f"{self.kind} {self.name!r}{found_via} at {self.abs_path!r}" + + +@dataclass(frozen=True, slots=True) +class CompatibilityResult: + status: str + message: str + + def require_compatible(self) -> None: + if self.status == "compatible": + return + if self.status == "insufficient_metadata": + raise CompatibilityInsufficientMetadataError(self.message) + raise CompatibilityCheckError(self.message) + + +def _coerce_constraint(name: str, raw_value: ConstraintArg) -> ComparisonConstraint | None: + if raw_value is None: + return None + if isinstance(raw_value, int): + return ComparisonConstraint("==", raw_value) + if isinstance(raw_value, tuple): + if len(raw_value) != 2: + raise ValueError(f"{name} tuple constraints must have exactly two elements.") + operator, value = raw_value + if operator not in ("==", "<", "<=", ">", ">="): + raise ValueError(f"{name} has unsupported operator {operator!r}.") + if not isinstance(value, int): + raise ValueError(f"{name} constraint value must be an integer.") + return ComparisonConstraint(operator, value) + if isinstance(raw_value, str): + match = re.fullmatch(r"\s*(==|<|<=|>|>=)?\s*(\d+)\s*", raw_value) + if match is None: + raise ValueError(f"{name} must be an int, a (operator, value) tuple, or a string like '>=12'.") + operator = match.group(1) or "==" + value = int(match.group(2)) + return ComparisonConstraint(operator, value) + raise ValueError(f"{name} must be an int, a (operator, value) tuple, or a string like '>=12'.") + + +def _parse_ctk_version(cuda_version: str) -> CtkVersion | None: + match = _CTK_VERSION_RE.match(cuda_version) + if match is None: + return None + return CtkVersion(major=int(match.group("major")), minor=int(match.group("minor"))) + + +def _normalize_distribution_name(name: str) -> str: + return re.sub(r"[-_.]+", "-", name).lower() + + +def _distribution_name(dist: importlib.metadata.Distribution) -> str | None: + # Work around mypy's typing of Distribution.metadata as PackageMetadata: + # the runtime object behaves like a string mapping, but mypy does not + # expose Mapping.get() on PackageMetadata. + metadata = cast(Mapping[str, str], dist.metadata) + return metadata.get("Name") + + +@functools.cache +def _owned_distribution_candidates(abs_path: str) -> tuple[tuple[str, str], ...]: + normalized_abs_path = os.path.normpath(os.path.abspath(abs_path)) + matches: set[tuple[str, str]] = set() + for dist in importlib.metadata.distributions(): + dist_name = _distribution_name(dist) + if not dist_name: + continue + for file in dist.files or (): + candidate_abs_path = os.path.normpath(os.path.abspath(str(dist.locate_file(file)))) + if candidate_abs_path == normalized_abs_path: + matches.add((dist_name, dist.version)) + return tuple(sorted(matches)) + + +@functools.cache +def _cuda_toolkit_requirement_maps() -> tuple[tuple[str, CtkVersion, dict[str, tuple[str, ...]]], ...]: + results: list[tuple[str, CtkVersion, dict[str, tuple[str, ...]]]] = [] + for dist in importlib.metadata.distributions(): + dist_name = _distribution_name(dist) + if _normalize_distribution_name(dist_name or "") != "cuda-toolkit": + continue + ctk_version = _parse_ctk_version(dist.version) + if ctk_version is None: + continue + requirement_map: dict[str, set[str]] = {} + for requirement in dist.requires or (): + match = _REQUIRES_DIST_RE.match(requirement) + if match is None: + continue + req_name = _normalize_distribution_name(match.group("name")) + requirement_map.setdefault(req_name, set()).add(match.group("version")) + results.append( + ( + dist.version, + ctk_version, + {name: tuple(sorted(prefixes)) for name, prefixes in requirement_map.items()}, + ) + ) + return tuple(results) + + +def _wheel_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: + matched_versions: dict[CtkVersion, str] = {} + for owner_name, owner_version in _owned_distribution_candidates(abs_path): + normalized_owner_name = _normalize_distribution_name(owner_name) + for toolkit_dist_version, ctk_version, requirement_map in _cuda_toolkit_requirement_maps(): + requirement_prefixes = requirement_map.get(normalized_owner_name, ()) + if not any( + owner_version == prefix or owner_version.startswith(prefix + ".") for prefix in requirement_prefixes + ): + continue + matched_versions[ctk_version] = ( + f"wheel metadata via {owner_name}=={owner_version} pinned by cuda-toolkit=={toolkit_dist_version}" + ) + if len(matched_versions) != 1: + return None + [(ctk_version, source)] = matched_versions.items() + return CtkMetadata(ctk_version=ctk_version, ctk_root=None, source=source) + + +@functools.cache +def _read_ctk_version(ctk_root: str) -> CtkVersion | None: + version_json_path = os.path.join(ctk_root, "version.json") + if not os.path.isfile(version_json_path): + return None + with open(version_json_path, encoding="utf-8") as fobj: + payload = json.load(fobj) + if not isinstance(payload, dict): + return None + cuda_entry = payload.get("cuda") + if not isinstance(cuda_entry, dict): + return None + cuda_version = cuda_entry.get("version") + if not isinstance(cuda_version, str): + return None + return _parse_ctk_version(cuda_version) + + +def _find_enclosing_ctk_root(abs_path: str) -> str | None: + current = Path(abs_path) + if current.is_file(): + current = current.parent + for candidate in (current, *current.parents): + ctk_root = str(candidate) + if _read_ctk_version(ctk_root) is not None: + return ctk_root + return None + + +def _ctk_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: + ctk_root = _find_enclosing_ctk_root(abs_path) + if ctk_root is not None: + ctk_version = _read_ctk_version(ctk_root) + if ctk_version is not None: + version_json_path = os.path.join(ctk_root, "version.json") + return CtkMetadata( + ctk_version=ctk_version, + ctk_root=ctk_root, + source=f"version.json at {version_json_path}", + ) + return _wheel_metadata_for_abs_path(abs_path) + + +def _resolve_item( + *, + name: str, + kind: ItemKind, + packaged_with: PackagedWith, + abs_path: str, + found_via: str | None, +) -> ResolvedItem: + ctk_metadata = _ctk_metadata_for_abs_path(abs_path) + return ResolvedItem( + name=name, + kind=kind, + packaged_with=packaged_with, + abs_path=abs_path, + found_via=found_via, + ctk_root=None if ctk_metadata is None else ctk_metadata.ctk_root, + ctk_version=None if ctk_metadata is None else ctk_metadata.ctk_version, + ctk_version_source=None if ctk_metadata is None else ctk_metadata.source, + ) + + +def _resolve_dynamic_lib_item(libname: str, loaded: LoadedDL) -> ResolvedItem: + if loaded.abs_path is None: + raise CompatibilityInsufficientMetadataError( + f"Could not determine an absolute path for dynamic library {libname!r}." + ) + desc = LIB_DESCRIPTORS[libname] + return _resolve_item( + name=libname, + kind="dynamic-lib", + packaged_with=desc.packaged_with, + abs_path=loaded.abs_path, + found_via=loaded.found_via, + ) + + +def _resolve_header_item(libname: str, located: LocatedHeaderDir) -> ResolvedItem: + if located.abs_path is None: + raise CompatibilityInsufficientMetadataError( + f"Could not determine an absolute path for header directory {libname!r}." + ) + desc = HEADER_DESCRIPTORS[libname] + metadata_abs_path = os.path.join(located.abs_path, desc.header_basename) + return _resolve_item( + name=libname, + kind="header-dir", + packaged_with=desc.packaged_with, + abs_path=metadata_abs_path, + found_via=located.found_via, + ) + + +def _resolve_static_lib_item(located: LocatedStaticLib) -> ResolvedItem: + packaged_with = _STATIC_LIBS_PACKAGED_WITH[located.name] + return _resolve_item( + name=located.name, + kind="static-lib", + packaged_with=packaged_with, + abs_path=located.abs_path, + found_via=located.found_via, + ) + + +def _resolve_bitcode_lib_item(located: LocatedBitcodeLib) -> ResolvedItem: + packaged_with = _BITCODE_LIBS_PACKAGED_WITH[located.name] + return _resolve_item( + name=located.name, + kind="bitcode-lib", + packaged_with=packaged_with, + abs_path=located.abs_path, + found_via=located.found_via, + ) + + +def _resolve_binary_item(utility_name: str, abs_path: str) -> ResolvedItem: + packaged_with = _BINARY_PACKAGED_WITH[utility_name] + return _resolve_item( + name=utility_name, + kind="binary", + packaged_with=packaged_with, + abs_path=abs_path, + found_via=None, + ) + + +def compatibility_check( + driver_cuda_version: DriverCudaVersion, item1: ResolvedItem, item2: ResolvedItem +) -> CompatibilityResult: + for item in (item1, item2): + if item.packaged_with != "ctk": + return CompatibilityResult( + status="insufficient_metadata", + message=( + "v1 compatibility checks only give definitive answers for " + f"packaged_with='ctk' items. {item.describe()} is packaged_with={item.packaged_with!r}." + ), + ) + if item.ctk_version is None or item.ctk_version_source is None: + return CompatibilityResult( + status="insufficient_metadata", + message=( + "v1 compatibility checks require either an enclosing CUDA Toolkit root " + "with version.json or wheel metadata that can be traced to an installed " + f"cuda-toolkit distribution. Could not determine the CTK version for {item.describe()}." + ), + ) + + assert item1.ctk_version is not None + assert item2.ctk_version is not None + + if item1.ctk_version != item2.ctk_version: + return CompatibilityResult( + status="incompatible", + message=( + f"{item1.describe()} resolves to CTK {item1.ctk_version}, while " + f"{item2.describe()} resolves to CTK {item2.ctk_version}. " + "v1 requires an exact CTK major.minor match." + ), + ) + + if driver_cuda_version.major < item1.ctk_version.major: + return CompatibilityResult( + status="incompatible", + message=( + f"Driver version {driver_cuda_version.encoded} only supports CUDA major version {driver_cuda_version.major}, " + f"but {item1.describe()} requires CTK {item1.ctk_version}. " + "v1 requires driver_major >= ctk_major." + ), + ) + + return CompatibilityResult( + status="compatible", + message=( + f"{item1.describe()} and {item2.describe()} both resolve to CTK {item1.ctk_version}, " + f"and driver version {driver_cuda_version.encoded} satisfies the v1 driver guard rail." + ), + ) + + +class CompatibilityGuardRails: + """Resolve CUDA artifacts while enforcing minimal v1 compatibility guard rails.""" + + def __init__( + self, + *, + ctk_major: ConstraintArg = None, + ctk_minor: ConstraintArg = None, + driver_cuda_version: DriverCudaVersion | None = None, + ) -> None: + self._ctk_major_constraint = _coerce_constraint("ctk_major", ctk_major) + self._ctk_minor_constraint = _coerce_constraint("ctk_minor", ctk_minor) + self._driver_cuda_version = driver_cuda_version + self._resolved_items: list[ResolvedItem] = [] + + def _get_driver_cuda_version(self) -> DriverCudaVersion: + if self._driver_cuda_version is None: + try: + self._driver_cuda_version = query_driver_cuda_version() + except QueryDriverCudaVersionError as exc: + raise CompatibilityCheckError( + "Failed to query the CUDA driver version needed for compatibility checks." + ) from exc + return self._driver_cuda_version + + def _enforce_supported_packaging(self, item: ResolvedItem) -> None: + if item.packaged_with == "ctk": + return + raise CompatibilityInsufficientMetadataError( + "v1 compatibility checks only give definitive answers for " + f"packaged_with='ctk' items. {item.describe()} is packaged_with={item.packaged_with!r}." + ) + + def _enforce_ctk_metadata(self, item: ResolvedItem) -> None: + if item.ctk_version is not None and item.ctk_version_source is not None: + return + raise CompatibilityInsufficientMetadataError( + "v1 compatibility checks require either an enclosing CUDA Toolkit root " + "with version.json or wheel metadata that can be traced to an installed " + f"cuda-toolkit distribution. Could not determine the CTK version for {item.describe()}." + ) + + def _enforce_constraints(self, item: ResolvedItem) -> None: + assert item.ctk_version is not None + if self._ctk_major_constraint is not None and not self._ctk_major_constraint.matches(item.ctk_version.major): + raise CompatibilityCheckError( + f"{item.describe()} resolves to CTK {item.ctk_version}, which does not satisfy " + f"ctk_major{self._ctk_major_constraint}." + ) + if self._ctk_minor_constraint is not None and not self._ctk_minor_constraint.matches(item.ctk_version.minor): + raise CompatibilityCheckError( + f"{item.describe()} resolves to CTK {item.ctk_version}, which does not satisfy " + f"ctk_minor{self._ctk_minor_constraint}." + ) + + def _anchor_item(self) -> ResolvedItem | None: + if not self._resolved_items: + return None + return self._resolved_items[0] + + def _remember(self, item: ResolvedItem) -> None: + if item not in self._resolved_items: + self._resolved_items.append(item) + + def _register_and_check(self, item: ResolvedItem) -> None: + self._enforce_supported_packaging(item) + self._enforce_ctk_metadata(item) + self._enforce_constraints(item) + anchor = self._anchor_item() + if anchor is None: + anchor = item + compatibility_check(self._get_driver_cuda_version(), anchor, item).require_compatible() + self._remember(item) + + def load_nvidia_dynamic_lib(self, libname: str) -> LoadedDL: + """Load a CUDA dynamic library and reject v1-incompatible resolutions.""" + loaded = _load_nvidia_dynamic_lib(libname) + self._register_and_check(_resolve_dynamic_lib_item(libname, loaded)) + return loaded + + def locate_nvidia_header_directory(self, libname: str) -> LocatedHeaderDir | None: + """Locate a CUDA header directory and reject v1-incompatible resolutions.""" + located = _locate_nvidia_header_directory(libname) + if located is None: + return None + self._register_and_check(_resolve_header_item(libname, located)) + return located + + def find_nvidia_header_directory(self, libname: str) -> str | None: + """Locate a CUDA header directory and return only the path string.""" + located = self.locate_nvidia_header_directory(libname) + return None if located is None else located.abs_path + + def locate_static_lib(self, name: str) -> LocatedStaticLib: + """Locate a CUDA static library and reject v1-incompatible resolutions.""" + located = _locate_static_lib(name) + self._register_and_check(_resolve_static_lib_item(located)) + return located + + def find_static_lib(self, name: str) -> str: + """Locate a CUDA static library and return only the path string.""" + abs_path = self.locate_static_lib(name).abs_path + assert isinstance(abs_path, str) + return abs_path + + def locate_bitcode_lib(self, name: str) -> LocatedBitcodeLib: + """Locate a CUDA bitcode library and reject v1-incompatible resolutions.""" + located = _locate_bitcode_lib(name) + self._register_and_check(_resolve_bitcode_lib_item(located)) + return located + + def find_bitcode_lib(self, name: str) -> str: + """Locate a CUDA bitcode library and return only the path string.""" + abs_path = self.locate_bitcode_lib(name).abs_path + assert isinstance(abs_path, str) + return abs_path + + def find_nvidia_binary_utility(self, utility_name: str) -> str | None: + """Locate a CUDA binary utility and reject v1-incompatible resolutions.""" + abs_path = _find_nvidia_binary_utility(utility_name) + if abs_path is None: + return None + self._register_and_check(_resolve_binary_item(utility_name, abs_path)) + assert isinstance(abs_path, str) + return abs_path diff --git a/cuda_pathfinder/docs/source/api.rst b/cuda_pathfinder/docs/source/api.rst index e49478c09ec..a4a2ef821fd 100644 --- a/cuda_pathfinder/docs/source/api.rst +++ b/cuda_pathfinder/docs/source/api.rst @@ -18,6 +18,9 @@ CUDA bitcode and static libraries. get_cuda_path_or_home + CompatibilityGuardRails + CompatibilityCheckError + CompatibilityInsufficientMetadataError SUPPORTED_NVIDIA_LIBNAMES load_nvidia_dynamic_lib LoadedDL diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py new file mode 100644 index 00000000000..74a2175c017 --- /dev/null +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -0,0 +1,373 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import json +import os +from pathlib import Path + +import pytest + +import cuda.pathfinder._compatibility_guard_rails as compatibility_module +from cuda.pathfinder import ( + BitcodeLibNotFoundError, + CompatibilityCheckError, + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, + DynamicLibNotFoundError, + LoadedDL, + LocatedBitcodeLib, + LocatedHeaderDir, + LocatedStaticLib, + StaticLibNotFoundError, +) +from cuda.pathfinder._utils.driver_info import DriverCudaVersion, QueryDriverCudaVersionError + +STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_COMPATIBILITY_GUARD_RAILS_STRICTNESS", "see_what_works") +assert STRICTNESS in ("see_what_works", "all_must_work") + + +def _write_version_json(ctk_root: Path, toolkit_version: str) -> None: + ctk_root.mkdir(parents=True, exist_ok=True) + payload = {"cuda": {"version": toolkit_version}} + (ctk_root / "version.json").write_text(json.dumps(payload), encoding="utf-8") + + +def _touch(path: Path) -> str: + path.parent.mkdir(parents=True, exist_ok=True) + path.touch() + return str(path) + + +def _loaded_dl(abs_path: str, *, found_via: str = "CUDA_PATH") -> LoadedDL: + return LoadedDL( + abs_path=abs_path, + was_already_loaded_from_elsewhere=False, + _handle_uint=1, + found_via=found_via, + ) + + +def _located_static_lib(name: str, abs_path: str) -> LocatedStaticLib: + return LocatedStaticLib( + name=name, + abs_path=abs_path, + filename=os.path.basename(abs_path), + found_via="CUDA_PATH", + ) + + +def _located_bitcode_lib(name: str, abs_path: str) -> LocatedBitcodeLib: + return LocatedBitcodeLib( + name=name, + abs_path=abs_path, + filename=os.path.basename(abs_path), + found_via="CUDA_PATH", + ) + + +def _driver_cuda_version(encoded: int) -> DriverCudaVersion: + return DriverCudaVersion( + encoded=encoded, + major=encoded // 1000, + minor=(encoded % 1000) // 10, + ) + + +def _assert_real_ctk_backed_path(path: str) -> None: + norm_path = os.path.normpath(os.path.abspath(path)) + if "site-packages" in Path(norm_path).parts: + return + current = Path(norm_path) + if current.is_file(): + current = current.parent + for candidate in (current, *current.parents): + version_json_path = candidate / "version.json" + if version_json_path.is_file(): + return + for env_var in ("CUDA_PATH", "CUDA_HOME"): + ctk_root = os.environ.get(env_var) + if not ctk_root: + continue + norm_ctk_root = os.path.normpath(os.path.abspath(ctk_root)) + if os.path.commonpath((norm_path, norm_ctk_root)) == norm_ctk_root: + return + raise AssertionError( + "Expected a site-packages path, a path under a CTK root with version.json, " + f"or a path under CUDA_PATH/CUDA_HOME, got {path!r}" + ) + + +def test_load_dynamic_lib_then_find_headers_same_ctk_version(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + hdr_dir = ctk_root / "targets" / "x86_64-linux" / "include" + _touch(hdr_dir / "nvrtc.h") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + hdr_path = guard_rails.find_nvidia_header_directory("nvrtc") + + assert loaded.abs_path == lib_path + assert hdr_path == str(hdr_dir) + + +def test_exact_ctk_major_minor_match_is_required(monkeypatch, tmp_path): + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + _write_version_json(lib_root, "12.8.20250303") + _write_version_json(hdr_root, "12.9.20250531") + + lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch(hdr_dir / "nvrtc.h") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + with pytest.raises(CompatibilityCheckError, match="exact CTK major.minor match"): + guard_rails.find_nvidia_header_directory("nvrtc") + + +def test_driver_major_must_not_be_older_than_ctk_major(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-13.0" + _write_version_json(ctk_root, "13.0.20251003") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.13") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(12080)) + + with pytest.raises(CompatibilityCheckError, match="driver_major >= ctk_major"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_missing_version_json_raises_insufficient_metadata(monkeypatch, tmp_path): + lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + with pytest.raises(CompatibilityInsufficientMetadataError, match="version.json"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_other_packaging_raises_insufficient_metadata(monkeypatch, tmp_path): + abs_path = _touch(tmp_path / "site-packages" / "nvidia" / "nvshmem" / "lib" / "libnvshmem_device.bc") + + monkeypatch.setattr( + compatibility_module, + "_locate_bitcode_lib", + lambda _name: _located_bitcode_lib("nvshmem_device", abs_path), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + with pytest.raises(CompatibilityInsufficientMetadataError, match="packaged_with='ctk'"): + guard_rails.find_bitcode_lib("nvshmem_device") + + +def test_constraints_accept_string_and_tuple_forms(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + ctk_major=(">=", 12), + ctk_minor=">=9", + driver_cuda_version=_driver_cuda_version(13000), + ) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + +def test_constraint_failure_raises(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails( + ctk_major=12, + ctk_minor="<9", + driver_cuda_version=_driver_cuda_version(13000), + ) + + with pytest.raises(CompatibilityCheckError, match="ctk_minor<9"): + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + +def test_static_bitcode_and_binary_methods_participate_in_checks(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + static_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libcudadevrt.a") + bitcode_path = _touch(ctk_root / "nvvm" / "libdevice" / "libdevice.10.bc") + binary_path = _touch(ctk_root / "bin" / "nvcc") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_static_lib", + lambda _name: _located_static_lib("cudadevrt", static_path), + ) + monkeypatch.setattr( + compatibility_module, + "_locate_bitcode_lib", + lambda _name: _located_bitcode_lib("device", bitcode_path), + ) + monkeypatch.setattr( + compatibility_module, + "_find_nvidia_binary_utility", + lambda _utility_name: binary_path, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + guard_rails.load_nvidia_dynamic_lib("nvrtc") + assert guard_rails.find_static_lib("cudadevrt") == static_path + assert guard_rails.find_bitcode_lib("device") == bitcode_path + assert guard_rails.find_nvidia_binary_utility("nvcc") == binary_path + + +def test_guard_rails_query_driver_cuda_version_by_default(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + query_calls: list[int] = [] + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + def fake_query_driver_cuda_version() -> DriverCudaVersion: + query_calls.append(1) + return _driver_cuda_version(13000) + + monkeypatch.setattr(compatibility_module, "query_driver_cuda_version", fake_query_driver_cuda_version) + + guard_rails = CompatibilityGuardRails() + + guard_rails.load_nvidia_dynamic_lib("nvrtc") + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert len(query_calls) == 1 + + +def test_guard_rails_wrap_driver_query_failures(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + def fail_query_driver_cuda_version() -> DriverCudaVersion: + raise QueryDriverCudaVersionError("driver query failed") + + monkeypatch.setattr(compatibility_module, "query_driver_cuda_version", fail_query_driver_cuda_version) + + guard_rails = CompatibilityGuardRails() + + with pytest.raises( + CompatibilityCheckError, + match="Failed to query the CUDA driver version needed for compatibility checks", + ) as exc_info: + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert isinstance(exc_info.value.__cause__, QueryDriverCudaVersionError) + + +def test_find_nvidia_header_directory_returns_none_when_unresolved(monkeypatch): + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: None, + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + assert guard_rails.find_nvidia_header_directory("nvrtc") is None + + +def test_real_wheel_ctk_items_are_compatible(info_summary_append): + guard_rails = CompatibilityGuardRails( + ctk_major=13, + ctk_minor=2, + driver_cuda_version=_driver_cuda_version(13000), + ) + + try: + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + header_dir = guard_rails.find_nvidia_header_directory("nvrtc") + static_lib = guard_rails.find_static_lib("cudadevrt") + bitcode_lib = guard_rails.find_bitcode_lib("device") + nvcc = guard_rails.find_nvidia_binary_utility("nvcc") + except ( + CompatibilityCheckError, + CompatibilityInsufficientMetadataError, + DynamicLibNotFoundError, + StaticLibNotFoundError, + BitcodeLibNotFoundError, + ) as exc: + if STRICTNESS == "all_must_work": + raise + info_summary_append(f"real CTK check unavailable: {exc.__class__.__name__}: {exc}") + return + + info_summary_append(f"nvrtc={loaded.abs_path!r}") + info_summary_append(f"nvrtc_headers={header_dir!r}") + info_summary_append(f"cudadevrt={static_lib!r}") + info_summary_append(f"libdevice={bitcode_lib!r}") + info_summary_append(f"nvcc={nvcc!r}") + + assert isinstance(loaded.abs_path, str) + assert header_dir is not None + assert nvcc is not None + for path in (loaded.abs_path, header_dir, static_lib, bitcode_lib, nvcc): + _assert_real_ctk_backed_path(path) + + +def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_append): + guard_rails = CompatibilityGuardRails( + ctk_major=13, + ctk_minor=2, + driver_cuda_version=_driver_cuda_version(13000), + ) + + try: + header_dir = guard_rails.find_nvidia_header_directory("cufft") + except (CompatibilityCheckError, CompatibilityInsufficientMetadataError) as exc: + if STRICTNESS == "all_must_work": + raise + info_summary_append(f"real cufft CTK check unavailable: {exc.__class__.__name__}: {exc}") + return + + if header_dir is None: + if STRICTNESS == "all_must_work": + raise AssertionError("Expected CTK-backed cufft headers to be discoverable.") + info_summary_append("real cufft CTK check unavailable: cufft headers not found") + return + + info_summary_append(f"cufft_headers={header_dir!r}") + _assert_real_ctk_backed_path(header_dir) diff --git a/cuda_pathfinder/tests/test_driver_lib_loading.py b/cuda_pathfinder/tests/test_driver_lib_loading.py index b97453c9b5a..188640179c0 100644 --- a/cuda_pathfinder/tests/test_driver_lib_loading.py +++ b/cuda_pathfinder/tests/test_driver_lib_loading.py @@ -164,7 +164,7 @@ def test_real_query_driver_cuda_version(info_summary_append): driver_info._load_nvidia_dynamic_lib.cache_clear() driver_info.query_driver_cuda_version.cache_clear() try: - version = driver_info.query_driver_cuda_version() + driver_cuda_version = driver_info.query_driver_cuda_version() except driver_info.QueryDriverCudaVersionError as exc: if STRICTNESS == "all_must_work": raise @@ -174,7 +174,11 @@ def test_real_query_driver_cuda_version(info_summary_append): driver_info._load_nvidia_dynamic_lib.cache_clear() driver_info.query_driver_cuda_version.cache_clear() - info_summary_append(f"driver_version={version.major}.{version.minor} (encoded={version.encoded})") - assert version.encoded > 0 - assert version.major == version.encoded // 1000 - assert version.minor == (version.encoded % 1000) // 10 + info_summary_append( + "driver_cuda_version=" + f"{driver_cuda_version.major}.{driver_cuda_version.minor} " + f"(encoded={driver_cuda_version.encoded})" + ) + assert driver_cuda_version.encoded > 0 + assert driver_cuda_version.major == driver_cuda_version.encoded // 1000 + assert driver_cuda_version.minor == (driver_cuda_version.encoded % 1000) // 10 From 041cb438c6ff563c4f9e36ca38f794b8e55bde5e Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 12:08:35 -0700 Subject: [PATCH 02/17] Add a public process-wide guard-rails singleton. Expose process_wide_compatibility_guard_rails at import time so follow-up changes can route the default cuda.pathfinder APIs through a stable public instance. Document the singleton and pin its public availability with a small regression test. Made-with: Cursor --- cuda_pathfinder/cuda/pathfinder/__init__.py | 5 +++++ cuda_pathfinder/docs/source/api.rst | 1 + cuda_pathfinder/tests/test_compatibility_guard_rails.py | 7 +++++++ 3 files changed, 13 insertions(+) diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index 022ebb6b372..89e64a879b8 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -72,6 +72,11 @@ from cuda.pathfinder._version import __version__ # isort: skip +#: Process-wide default compatibility guard rails instance. Public APIs can +#: delegate through this singleton while the explicit ``CompatibilityGuardRails`` +#: class remains available for advanced use cases. +process_wide_compatibility_guard_rails = CompatibilityGuardRails() + # Indirections to help Sphinx find the docstrings. #: Mapping from short CUDA Toolkit (CTK) library names to their canonical #: header basenames (used to validate a discovered include directory). diff --git a/cuda_pathfinder/docs/source/api.rst b/cuda_pathfinder/docs/source/api.rst index a4a2ef821fd..04290a4bbd1 100644 --- a/cuda_pathfinder/docs/source/api.rst +++ b/cuda_pathfinder/docs/source/api.rst @@ -19,6 +19,7 @@ CUDA bitcode and static libraries. get_cuda_path_or_home CompatibilityGuardRails + process_wide_compatibility_guard_rails CompatibilityCheckError CompatibilityInsufficientMetadataError SUPPORTED_NVIDIA_LIBNAMES diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 74a2175c017..12078d368fc 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -8,6 +8,7 @@ import pytest import cuda.pathfinder._compatibility_guard_rails as compatibility_module +from cuda import pathfinder from cuda.pathfinder import ( BitcodeLibNotFoundError, CompatibilityCheckError, @@ -19,6 +20,7 @@ LocatedHeaderDir, LocatedStaticLib, StaticLibNotFoundError, + process_wide_compatibility_guard_rails, ) from cuda.pathfinder._utils.driver_info import DriverCudaVersion, QueryDriverCudaVersionError @@ -97,6 +99,11 @@ def _assert_real_ctk_backed_path(path: str) -> None: ) +def test_process_wide_compatibility_guard_rails_is_public_singleton(): + assert process_wide_compatibility_guard_rails is pathfinder.process_wide_compatibility_guard_rails + assert isinstance(process_wide_compatibility_guard_rails, CompatibilityGuardRails) + + def test_load_dynamic_lib_then_find_headers_same_ctk_version(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" _write_version_json(ctk_root, "12.9.20250531") From 4d9326582b5c35525fe6f3682e1d04316609c75d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 13:09:24 -0700 Subject: [PATCH 03/17] Route public pathfinder lookups through process-wide guard rails. Make the process-wide CompatibilityGuardRails instance the default path for the public load/find/locate APIs so top-level calls share compatibility state. Factor the routing/fallback/cache-reset glue into a dedicated internal module to keep `cuda.pathfinder.__init__` focused on the public surface, and fall back to the existing raw resolvers when v1 guard rails only have insufficient metadata. Made-with: Cursor --- cuda_pathfinder/cuda/pathfinder/__init__.py | 48 +++-- .../pathfinder/_compatibility_guard_rails.py | 5 + ..._process_wide_compatibility_guard_rails.py | 199 ++++++++++++++++++ .../tests/test_compatibility_guard_rails.py | 91 ++++++++ 4 files changed, 323 insertions(+), 20 deletions(-) create mode 100644 cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py diff --git a/cuda_pathfinder/cuda/pathfinder/__init__.py b/cuda_pathfinder/cuda/pathfinder/__init__.py index 89e64a879b8..3a3f12f3b34 100644 --- a/cuda_pathfinder/cuda/pathfinder/__init__.py +++ b/cuda_pathfinder/cuda/pathfinder/__init__.py @@ -7,9 +7,6 @@ # cuda_pathfinder/docs/source/api.rst # to keep the documentation in sync. -from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( - find_nvidia_binary_utility as find_nvidia_binary_utility, -) from cuda.pathfinder._binaries.supported_nvidia_binaries import SUPPORTED_BINARIES as _SUPPORTED_BINARIES from cuda.pathfinder._compatibility_guard_rails import ( CompatibilityCheckError as CompatibilityCheckError, @@ -28,16 +25,38 @@ DynamicLibUnknownError as DynamicLibUnknownError, ) from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL as LoadedDL -from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import load_nvidia_dynamic_lib as load_nvidia_dynamic_lib from cuda.pathfinder._dynamic_libs.supported_nvidia_libs import ( SUPPORTED_LIBNAMES as SUPPORTED_NVIDIA_LIBNAMES, ) from cuda.pathfinder._headers.find_nvidia_headers import LocatedHeaderDir as LocatedHeaderDir -from cuda.pathfinder._headers.find_nvidia_headers import find_nvidia_header_directory as find_nvidia_header_directory -from cuda.pathfinder._headers.find_nvidia_headers import ( +from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_bitcode_lib as find_bitcode_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_nvidia_binary_utility as find_nvidia_binary_utility, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_nvidia_header_directory as find_nvidia_header_directory, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + find_static_lib as find_static_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + load_nvidia_dynamic_lib as load_nvidia_dynamic_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + locate_bitcode_lib as locate_bitcode_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( locate_nvidia_header_directory as locate_nvidia_header_directory, ) -from cuda.pathfinder._headers.supported_nvidia_headers import SUPPORTED_HEADERS_CTK as _SUPPORTED_HEADERS_CTK +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + locate_static_lib as locate_static_lib, +) +from cuda.pathfinder._process_wide_compatibility_guard_rails import ( + process_wide_compatibility_guard_rails as _process_wide_compatibility_guard_rails, +) from cuda.pathfinder._static_libs.find_bitcode_lib import ( SUPPORTED_BITCODE_LIBS as _SUPPORTED_BITCODE_LIBS, ) @@ -47,12 +66,6 @@ from cuda.pathfinder._static_libs.find_bitcode_lib import ( LocatedBitcodeLib as LocatedBitcodeLib, ) -from cuda.pathfinder._static_libs.find_bitcode_lib import ( - find_bitcode_lib as find_bitcode_lib, -) -from cuda.pathfinder._static_libs.find_bitcode_lib import ( - locate_bitcode_lib as locate_bitcode_lib, -) from cuda.pathfinder._static_libs.find_static_lib import ( SUPPORTED_STATIC_LIBS as _SUPPORTED_STATIC_LIBS, ) @@ -62,12 +75,6 @@ from cuda.pathfinder._static_libs.find_static_lib import ( StaticLibNotFoundError as StaticLibNotFoundError, ) -from cuda.pathfinder._static_libs.find_static_lib import ( - find_static_lib as find_static_lib, -) -from cuda.pathfinder._static_libs.find_static_lib import ( - locate_static_lib as locate_static_lib, -) from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home as get_cuda_path_or_home from cuda.pathfinder._version import __version__ # isort: skip @@ -75,7 +82,8 @@ #: Process-wide default compatibility guard rails instance. Public APIs can #: delegate through this singleton while the explicit ``CompatibilityGuardRails`` #: class remains available for advanced use cases. -process_wide_compatibility_guard_rails = CompatibilityGuardRails() +process_wide_compatibility_guard_rails = _process_wide_compatibility_guard_rails + # Indirections to help Sphinx find the docstrings. #: Mapping from short CUDA Toolkit (CTK) library names to their canonical diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py index 13ccb606a6c..70a176322c8 100644 --- a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -440,6 +440,7 @@ def __init__( ) -> None: self._ctk_major_constraint = _coerce_constraint("ctk_major", ctk_major) self._ctk_minor_constraint = _coerce_constraint("ctk_minor", ctk_minor) + self._configured_driver_cuda_version = driver_cuda_version self._driver_cuda_version = driver_cuda_version self._resolved_items: list[ResolvedItem] = [] @@ -492,6 +493,10 @@ def _remember(self, item: ResolvedItem) -> None: if item not in self._resolved_items: self._resolved_items.append(item) + def _reset_for_testing(self) -> None: + self._driver_cuda_version = self._configured_driver_cuda_version + self._resolved_items.clear() + def _register_and_check(self, item: ResolvedItem) -> None: self._enforce_supported_packaging(item) self._enforce_ctk_metadata(item) diff --git a/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py new file mode 100644 index 00000000000..bd3f9154c69 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py @@ -0,0 +1,199 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import sys +from collections.abc import Callable +from typing import Protocol, TypeVar, cast + +from cuda.pathfinder._binaries.find_nvidia_binary_utility import ( + find_nvidia_binary_utility as _find_nvidia_binary_utility, +) +from cuda.pathfinder._compatibility_guard_rails import ( + CompatibilityGuardRails, + CompatibilityInsufficientMetadataError, +) +from cuda.pathfinder._dynamic_libs.load_dl_common import LoadedDL +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( + load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + LocatedHeaderDir, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + find_nvidia_header_directory as _find_nvidia_header_directory_impl, +) +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as _locate_nvidia_header_directory, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + LocatedBitcodeLib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + find_bitcode_lib as _find_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_bitcode_lib import ( + locate_bitcode_lib as _locate_bitcode_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + LocatedStaticLib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + find_static_lib as _find_static_lib, +) +from cuda.pathfinder._static_libs.find_static_lib import ( + locate_static_lib as _locate_static_lib, +) + +_T = TypeVar("_T") + + +class _ProcessWideGuardRailsApi(Protocol): + def load_nvidia_dynamic_lib(self, libname: str) -> LoadedDL: ... + + def locate_nvidia_header_directory(self, libname: str) -> LocatedHeaderDir | None: ... + + def find_nvidia_header_directory(self, libname: str) -> str | None: ... + + def locate_static_lib(self, name: str) -> LocatedStaticLib: ... + + def find_static_lib(self, name: str) -> str: ... + + def locate_bitcode_lib(self, name: str) -> LocatedBitcodeLib: ... + + def find_bitcode_lib(self, name: str) -> str: ... + + def find_nvidia_binary_utility(self, utility_name: str) -> str | None: ... + + +class _PublicPathfinderModule(Protocol): + process_wide_compatibility_guard_rails: object + + +process_wide_compatibility_guard_rails: CompatibilityGuardRails = CompatibilityGuardRails() + + +def _public_module() -> _PublicPathfinderModule | None: + public_module = sys.modules.get("cuda.pathfinder") + if public_module is None: + return None + return cast(_PublicPathfinderModule, public_module) + + +def _current_process_wide_compatibility_guard_rails() -> _ProcessWideGuardRailsApi: + public_module = _public_module() + if public_module is None: + return cast(_ProcessWideGuardRailsApi, process_wide_compatibility_guard_rails) + return cast(_ProcessWideGuardRailsApi, public_module.process_wide_compatibility_guard_rails) + + +def _reset_process_wide_compatibility_guard_rails() -> None: + current = _current_process_wide_compatibility_guard_rails() + if isinstance(current, CompatibilityGuardRails): + current._reset_for_testing() + return + public_module = _public_module() + if public_module is None: + global process_wide_compatibility_guard_rails + process_wide_compatibility_guard_rails = CompatibilityGuardRails() + return + public_module.process_wide_compatibility_guard_rails = CompatibilityGuardRails() + + +def _try_process_wide_guard_rails_then_fallback(guard_rails_call: Callable[[], _T], raw_call: Callable[[], _T]) -> _T: + try: + return guard_rails_call() + except CompatibilityInsufficientMetadataError: + return raw_call() + + +def _cache_clear_with_process_state_reset(cache_clear: Callable[[], object]) -> Callable[[], None]: + def clear() -> None: + cache_clear() + _reset_process_wide_compatibility_guard_rails() + + return clear + + +def load_nvidia_dynamic_lib(libname: str) -> LoadedDL: + """Load a CUDA dynamic library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().load_nvidia_dynamic_lib(libname), + lambda: _load_nvidia_dynamic_lib(libname), + ) + + +def locate_nvidia_header_directory(libname: str) -> LocatedHeaderDir | None: + """Locate a CUDA header directory via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_nvidia_header_directory(libname), + lambda: _locate_nvidia_header_directory(libname), + ) + + +def find_nvidia_header_directory(libname: str) -> str | None: + """Locate a CUDA header directory and return its path string.""" + try: + abs_path = _current_process_wide_compatibility_guard_rails().find_nvidia_header_directory(libname) + except CompatibilityInsufficientMetadataError: + abs_path = _find_nvidia_header_directory_impl(libname) + assert abs_path is None or isinstance(abs_path, str) + return abs_path + + +def locate_static_lib(name: str) -> LocatedStaticLib: + """Locate a CUDA static library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_static_lib(name), + lambda: _locate_static_lib(name), + ) + + +def find_static_lib(name: str) -> str: + """Locate a CUDA static library and return its path string.""" + try: + abs_path = _current_process_wide_compatibility_guard_rails().find_static_lib(name) + except CompatibilityInsufficientMetadataError: + abs_path = _find_static_lib(name) + assert isinstance(abs_path, str) + return abs_path + + +def locate_bitcode_lib(name: str) -> LocatedBitcodeLib: + """Locate a CUDA bitcode library via the process-wide compatibility guard rails.""" + return _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().locate_bitcode_lib(name), + lambda: _locate_bitcode_lib(name), + ) + + +def find_bitcode_lib(name: str) -> str: + """Locate a CUDA bitcode library and return its path string.""" + try: + abs_path = _current_process_wide_compatibility_guard_rails().find_bitcode_lib(name) + except CompatibilityInsufficientMetadataError: + abs_path = _find_bitcode_lib(name) + assert isinstance(abs_path, str) + return abs_path + + +def find_nvidia_binary_utility(utility_name: str) -> str | None: + """Locate a CUDA binary utility via the process-wide compatibility guard rails.""" + try: + abs_path = _current_process_wide_compatibility_guard_rails().find_nvidia_binary_utility(utility_name) + except CompatibilityInsufficientMetadataError: + abs_path = _find_nvidia_binary_utility(utility_name) + assert abs_path is None or isinstance(abs_path, str) + return abs_path + + +load_nvidia_dynamic_lib.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _load_nvidia_dynamic_lib.cache_clear +) +locate_nvidia_header_directory.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _locate_nvidia_header_directory.cache_clear +) +find_nvidia_binary_utility.cache_clear = _cache_clear_with_process_state_reset( # type: ignore[attr-defined] + _find_nvidia_binary_utility.cache_clear +) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 12078d368fc..4e578158316 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -99,11 +99,102 @@ def _assert_real_ctk_backed_path(path: str) -> None: ) +class _DelegatingProcessWideGuardRails: + def __init__(self, method_name: str, return_value: object) -> None: + self._method_name = method_name + self._return_value = return_value + self.calls: list[tuple[str, tuple[object, ...]]] = [] + + def __getattr__(self, name: str): + if name != self._method_name: + raise AttributeError(name) + + def delegated(*args: object) -> object: + self.calls.append((name, args)) + return self._return_value + + return delegated + + def test_process_wide_compatibility_guard_rails_is_public_singleton(): assert process_wide_compatibility_guard_rails is pathfinder.process_wide_compatibility_guard_rails assert isinstance(process_wide_compatibility_guard_rails, CompatibilityGuardRails) +@pytest.mark.parametrize( + ("public_api_name", "guard_rails_method_name", "args", "return_value"), + [ + ( + "load_nvidia_dynamic_lib", + "load_nvidia_dynamic_lib", + ("nvrtc",), + _loaded_dl("/opt/mock/libnvrtc.so.12"), + ), + ( + "locate_nvidia_header_directory", + "locate_nvidia_header_directory", + ("nvrtc",), + LocatedHeaderDir(abs_path="/opt/mock/include", found_via="CUDA_PATH"), + ), + ("find_nvidia_header_directory", "find_nvidia_header_directory", ("nvrtc",), "/opt/mock/include"), + ( + "locate_static_lib", + "locate_static_lib", + ("cudadevrt",), + _located_static_lib("cudadevrt", "/opt/mock/libcudadevrt.a"), + ), + ("find_static_lib", "find_static_lib", ("cudadevrt",), "/opt/mock/libcudadevrt.a"), + ( + "locate_bitcode_lib", + "locate_bitcode_lib", + ("device",), + _located_bitcode_lib("device", "/opt/mock/libdevice.10.bc"), + ), + ("find_bitcode_lib", "find_bitcode_lib", ("device",), "/opt/mock/libdevice.10.bc"), + ("find_nvidia_binary_utility", "find_nvidia_binary_utility", ("nvcc",), "/opt/mock/nvcc"), + ], +) +def test_public_apis_route_through_process_wide_guard_rails( + monkeypatch, public_api_name, guard_rails_method_name, args, return_value +): + fake_guard_rails = _DelegatingProcessWideGuardRails(guard_rails_method_name, return_value) + monkeypatch.setattr(pathfinder, "process_wide_compatibility_guard_rails", fake_guard_rails) + + result = getattr(pathfinder, public_api_name)(*args) + + assert result == return_value + assert fake_guard_rails.calls == [(guard_rails_method_name, args)] + + +def test_public_apis_share_process_wide_guard_rails_state(monkeypatch, tmp_path): + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + _write_version_json(lib_root, "12.8.20250303") + _write_version_json(hdr_root, "12.9.20250531") + + lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch(hdr_dir / "nvrtc.h") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + with pytest.raises(CompatibilityCheckError, match="exact CTK major.minor match"): + pathfinder.find_nvidia_header_directory("nvrtc") + + def test_load_dynamic_lib_then_find_headers_same_ctk_version(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" _write_version_json(ctk_root, "12.9.20250531") From 0b156651b865e45335a0572179516a37a177ed53 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 15:49:42 -0700 Subject: [PATCH 04/17] Add guard-rails mode switch for public pathfinder APIs. Allow CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS to select strict, best_effort, or off behavior so we can experiment with stricter compatibility checks without changing the public API shape. Made-with: Cursor --- ..._process_wide_compatibility_guard_rails.py | 55 +++++++++---- .../tests/test_compatibility_guard_rails.py | 79 +++++++++++++++++++ .../tests/test_driver_lib_loading.py | 6 ++ .../tests/test_find_nvidia_binaries.py | 7 ++ .../tests/test_find_nvidia_headers.py | 7 ++ .../tests/test_load_nvidia_dynamic_lib.py | 6 ++ 6 files changed, 143 insertions(+), 17 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py index bd3f9154c69..d66e8243be2 100644 --- a/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py +++ b/cuda_pathfinder/cuda/pathfinder/_process_wide_compatibility_guard_rails.py @@ -3,6 +3,7 @@ from __future__ import annotations +import os import sys from collections.abc import Callable from typing import Protocol, TypeVar, cast @@ -47,6 +48,8 @@ ) _T = TypeVar("_T") +_COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" +_COMPATIBILITY_GUARD_RAILS_MODES = ("off", "best_effort", "strict") class _ProcessWideGuardRailsApi(Protocol): @@ -74,6 +77,19 @@ class _PublicPathfinderModule(Protocol): process_wide_compatibility_guard_rails: CompatibilityGuardRails = CompatibilityGuardRails() +def _compatibility_guard_rails_mode() -> str: + value = os.environ.get(_COMPATIBILITY_GUARD_RAILS_ENV_VAR) + if not value: + return "strict" + if value in _COMPATIBILITY_GUARD_RAILS_MODES: + return value + allowed_values = ", ".join(repr(mode) for mode in _COMPATIBILITY_GUARD_RAILS_MODES) + raise RuntimeError( + f"Invalid {_COMPATIBILITY_GUARD_RAILS_ENV_VAR}={value!r}. " + f"Allowed values: {allowed_values}. Unset or empty defaults to 'strict'." + ) + + def _public_module() -> _PublicPathfinderModule | None: public_module = sys.modules.get("cuda.pathfinder") if public_module is None: @@ -102,10 +118,15 @@ def _reset_process_wide_compatibility_guard_rails() -> None: def _try_process_wide_guard_rails_then_fallback(guard_rails_call: Callable[[], _T], raw_call: Callable[[], _T]) -> _T: + mode = _compatibility_guard_rails_mode() + if mode == "off": + return raw_call() try: return guard_rails_call() except CompatibilityInsufficientMetadataError: - return raw_call() + if mode == "best_effort": + return raw_call() + raise def _cache_clear_with_process_state_reset(cache_clear: Callable[[], object]) -> Callable[[], None]: @@ -134,10 +155,10 @@ def locate_nvidia_header_directory(libname: str) -> LocatedHeaderDir | None: def find_nvidia_header_directory(libname: str) -> str | None: """Locate a CUDA header directory and return its path string.""" - try: - abs_path = _current_process_wide_compatibility_guard_rails().find_nvidia_header_directory(libname) - except CompatibilityInsufficientMetadataError: - abs_path = _find_nvidia_header_directory_impl(libname) + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_nvidia_header_directory(libname), + lambda: _find_nvidia_header_directory_impl(libname), + ) assert abs_path is None or isinstance(abs_path, str) return abs_path @@ -152,10 +173,10 @@ def locate_static_lib(name: str) -> LocatedStaticLib: def find_static_lib(name: str) -> str: """Locate a CUDA static library and return its path string.""" - try: - abs_path = _current_process_wide_compatibility_guard_rails().find_static_lib(name) - except CompatibilityInsufficientMetadataError: - abs_path = _find_static_lib(name) + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_static_lib(name), + lambda: _find_static_lib(name), + ) assert isinstance(abs_path, str) return abs_path @@ -170,20 +191,20 @@ def locate_bitcode_lib(name: str) -> LocatedBitcodeLib: def find_bitcode_lib(name: str) -> str: """Locate a CUDA bitcode library and return its path string.""" - try: - abs_path = _current_process_wide_compatibility_guard_rails().find_bitcode_lib(name) - except CompatibilityInsufficientMetadataError: - abs_path = _find_bitcode_lib(name) + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_bitcode_lib(name), + lambda: _find_bitcode_lib(name), + ) assert isinstance(abs_path, str) return abs_path def find_nvidia_binary_utility(utility_name: str) -> str | None: """Locate a CUDA binary utility via the process-wide compatibility guard rails.""" - try: - abs_path = _current_process_wide_compatibility_guard_rails().find_nvidia_binary_utility(utility_name) - except CompatibilityInsufficientMetadataError: - abs_path = _find_nvidia_binary_utility(utility_name) + abs_path = _try_process_wide_guard_rails_then_fallback( + lambda: _current_process_wide_compatibility_guard_rails().find_nvidia_binary_utility(utility_name), + lambda: _find_nvidia_binary_utility(utility_name), + ) assert abs_path is None or isinstance(abs_path, str) return abs_path diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 4e578158316..2b59e860c27 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import importlib import json import os from pathlib import Path @@ -26,6 +27,13 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_COMPATIBILITY_GUARD_RAILS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" +process_wide_module = importlib.import_module("cuda.pathfinder._process_wide_compatibility_guard_rails") + + +@pytest.fixture(autouse=True) +def _default_process_wide_guard_rails_mode(monkeypatch): + monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) def _write_version_json(ctk_root: Path, toolkit_version: str) -> None: @@ -166,6 +174,77 @@ def test_public_apis_route_through_process_wide_guard_rails( assert fake_guard_rails.calls == [(guard_rails_method_name, args)] +@pytest.mark.parametrize("env_value", [None, ""]) +def test_public_apis_default_to_strict_when_env_var_is_unset_or_empty(monkeypatch, tmp_path, env_value): + lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + def fail_raw_fallback(_libname: str) -> LoadedDL: + pytest.fail("strict mode must not fall back to raw loading") + + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", fail_raw_fallback) + if env_value is None: + monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) + else: + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, env_value) + + with pytest.raises(CompatibilityInsufficientMetadataError, match="version.json"): + pathfinder.load_nvidia_dynamic_lib("nvrtc") + + +def test_public_apis_best_effort_fall_back_on_insufficient_metadata(monkeypatch, tmp_path): + guarded_lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "best_effort") + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(guarded_lib_path)) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + + +def test_public_apis_off_bypass_process_wide_guard_rails(monkeypatch): + raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") + fake_guard_rails = _DelegatingProcessWideGuardRails( + "load_nvidia_dynamic_lib", + _loaded_dl("/opt/mock/guard-rails/libnvrtc.so.12"), + ) + + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + monkeypatch.setattr(pathfinder, "process_wide_compatibility_guard_rails", fake_guard_rails) + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", lambda _libname: raw_loaded) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvrtc") + + assert loaded is raw_loaded + assert fake_guard_rails.calls == [] + + +def test_public_apis_reject_invalid_guard_rails_mode(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "unexpected") + + with pytest.raises(RuntimeError, match=COMPATIBILITY_GUARD_RAILS_ENV_VAR) as exc_info: + pathfinder.find_nvidia_binary_utility("nvcc") + + message = str(exc_info.value) + assert "'off'" in message + assert "'best_effort'" in message + assert "'strict'" in message + + def test_public_apis_share_process_wide_guard_rails_state(monkeypatch, tmp_path): lib_root = tmp_path / "cuda-12.8" hdr_root = tmp_path / "cuda-12.9" diff --git a/cuda_pathfinder/tests/test_driver_lib_loading.py b/cuda_pathfinder/tests/test_driver_lib_loading.py index 188640179c0..e47edd9001b 100644 --- a/cuda_pathfinder/tests/test_driver_lib_loading.py +++ b/cuda_pathfinder/tests/test_driver_lib_loading.py @@ -30,6 +30,7 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" _MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib" _LOADER_MODULE = "cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib.LOADER" @@ -38,6 +39,11 @@ _NVML_DESC = LIB_DESCRIPTORS["nvml"] +@pytest.fixture(autouse=True) +def _disable_process_wide_compatibility_guard_rails(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + + def _make_loaded_dl(path, found_via): return LoadedDL(path, False, 0xDEAD, found_via) diff --git a/cuda_pathfinder/tests/test_find_nvidia_binaries.py b/cuda_pathfinder/tests/test_find_nvidia_binaries.py index ec9740cd853..dbdbf5b61f5 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_binaries.py +++ b/cuda_pathfinder/tests/test_find_nvidia_binaries.py @@ -14,6 +14,13 @@ SUPPORTED_BINARIES_ALL, ) +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" + + +@pytest.fixture(autouse=True) +def _disable_process_wide_compatibility_guard_rails(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + def test_unknown_utility_name(): with pytest.raises(UnsupportedBinaryError, match=r"'unknown-utility' is not supported"): diff --git a/cuda_pathfinder/tests/test_find_nvidia_headers.py b/cuda_pathfinder/tests/test_find_nvidia_headers.py index e28f64d3520..596d0d2b296 100644 --- a/cuda_pathfinder/tests/test_find_nvidia_headers.py +++ b/cuda_pathfinder/tests/test_find_nvidia_headers.py @@ -39,6 +39,13 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_FIND_NVIDIA_HEADERS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" + + +@pytest.fixture(autouse=True) +def _disable_process_wide_compatibility_guard_rails(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") + NON_CTK_IMPORTLIB_METADATA_DISTRIBUTIONS_NAMES = { "cusolverMp": r"^nvidia-cusolvermp-.*$", diff --git a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py index 401e7dc13f8..c43a8f17416 100644 --- a/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py +++ b/cuda_pathfinder/tests/test_load_nvidia_dynamic_lib.py @@ -23,6 +23,12 @@ STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_LOAD_NVIDIA_DYNAMIC_LIB_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") +COMPATIBILITY_GUARD_RAILS_ENV_VAR = "CUDA_PATHFINDER_COMPATIBILITY_GUARD_RAILS" + + +@pytest.fixture(autouse=True) +def _disable_process_wide_compatibility_guard_rails(monkeypatch): + monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "off") def test_supported_libnames_linux_sonames_consistency(): From e2a0909d710827c2306cdc2416165c8933698a49 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 21:07:56 -0700 Subject: [PATCH 05/17] Allow strict guard rails for driver libraries. Treat driver-packaged libraries as compatibility-neutral so strict mode can load NVML and other driver libs without a raw fallback, while CTK-backed artifacts remain the only items that establish and enforce the process-wide CTK anchor. Made-with: Cursor --- .../pathfinder/_compatibility_guard_rails.py | 16 +++- .../tests/test_compatibility_guard_rails.py | 81 +++++++++++++++++++ 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py index 70a176322c8..e15ebe3e9d2 100644 --- a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -459,7 +459,8 @@ def _enforce_supported_packaging(self, item: ResolvedItem) -> None: return raise CompatibilityInsufficientMetadataError( "v1 compatibility checks only give definitive answers for " - f"packaged_with='ctk' items. {item.describe()} is packaged_with={item.packaged_with!r}." + f"packaged_with='ctk' items, plus compatibility-neutral driver libraries. " + f"{item.describe()} is packaged_with={item.packaged_with!r}." ) def _enforce_ctk_metadata(self, item: ResolvedItem) -> None: @@ -485,9 +486,10 @@ def _enforce_constraints(self, item: ResolvedItem) -> None: ) def _anchor_item(self) -> ResolvedItem | None: - if not self._resolved_items: - return None - return self._resolved_items[0] + for item in self._resolved_items: + if item.packaged_with == "ctk": + return item + return None def _remember(self, item: ResolvedItem) -> None: if item not in self._resolved_items: @@ -498,6 +500,12 @@ def _reset_for_testing(self) -> None: self._resolved_items.clear() def _register_and_check(self, item: ResolvedItem) -> None: + # Driver libraries come from the installed display driver rather than a + # CUDA Toolkit line, so they do not need CTK metadata and must not lock + # the process-wide CTK anchor. + if item.packaged_with == "driver": + self._remember(item) + return self._enforce_supported_packaging(item) self._enforce_ctk_metadata(item) self._enforce_constraints(item) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 2b59e860c27..f5f323bfe2f 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -174,6 +174,30 @@ def test_public_apis_route_through_process_wide_guard_rails( assert fake_guard_rails.calls == [(guard_rails_method_name, args)] +def test_public_driver_libs_are_allowed_in_strict_mode(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + + monkeypatch.setattr( + compatibility_module, + "_load_nvidia_dynamic_lib", + lambda _libname: _loaded_dl(driver_lib_path, found_via="system-search"), + ) + monkeypatch.setattr( + pathfinder, + "process_wide_compatibility_guard_rails", + CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)), + ) + + def fail_raw_fallback(_libname: str) -> LoadedDL: + pytest.fail("strict mode must not fall back to raw loading") + + monkeypatch.setattr(process_wide_module, "_load_nvidia_dynamic_lib", fail_raw_fallback) + + loaded = pathfinder.load_nvidia_dynamic_lib("nvml") + + assert loaded.abs_path == driver_lib_path + + @pytest.mark.parametrize("env_value", [None, ""]) def test_public_apis_default_to_strict_when_env_var_is_unset_or_empty(monkeypatch, tmp_path, env_value): lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") @@ -360,6 +384,63 @@ def test_other_packaging_raises_insufficient_metadata(monkeypatch, tmp_path): guard_rails.find_bitcode_lib("nvshmem_device") +def test_driver_libs_do_not_lock_ctk_anchor(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + ctk_root = tmp_path / "cuda-12.9" + _write_version_json(ctk_root, "12.9.20250531") + ctk_lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + + def fake_load_nvidia_dynamic_lib(libname: str) -> LoadedDL: + if libname == "nvml": + return _loaded_dl(driver_lib_path, found_via="system-search") + if libname == "nvrtc": + return _loaded_dl(ctk_lib_path) + raise AssertionError(f"Unexpected libname: {libname!r}") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", fake_load_nvidia_dynamic_lib) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + driver_loaded = guard_rails.load_nvidia_dynamic_lib("nvml") + ctk_loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert driver_loaded.abs_path == driver_lib_path + assert ctk_loaded.abs_path == ctk_lib_path + + +def test_driver_libs_do_not_mask_later_ctk_mismatch(monkeypatch, tmp_path): + driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") + lib_root = tmp_path / "cuda-12.8" + hdr_root = tmp_path / "cuda-12.9" + _write_version_json(lib_root, "12.8.20250303") + _write_version_json(hdr_root, "12.9.20250531") + + lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" + _touch(hdr_dir / "nvrtc.h") + + def fake_load_nvidia_dynamic_lib(libname: str) -> LoadedDL: + if libname == "nvml": + return _loaded_dl(driver_lib_path, found_via="system-search") + if libname == "nvrtc": + return _loaded_dl(lib_path) + raise AssertionError(f"Unexpected libname: {libname!r}") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", fake_load_nvidia_dynamic_lib) + monkeypatch.setattr( + compatibility_module, + "_locate_nvidia_header_directory", + lambda _libname: LocatedHeaderDir(abs_path=str(hdr_dir), found_via="CUDA_PATH"), + ) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + guard_rails.load_nvidia_dynamic_lib("nvml") + guard_rails.load_nvidia_dynamic_lib("nvrtc") + + with pytest.raises(CompatibilityCheckError, match="exact CTK major.minor match"): + guard_rails.find_nvidia_header_directory("nvrtc") + + def test_constraints_accept_string_and_tuple_forms(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" _write_version_json(ctk_root, "12.9.20250531") From b622613574e9a6ae609fda463f27e90be2a4d725 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Fri, 24 Apr 2026 22:37:07 -0700 Subject: [PATCH 06/17] Accept wheel metadata version ranges in strict guard rails. Infer the CUDA Toolkit line from both wildcard-pinned and range-based cuda-toolkit requirements so strict process-wide guard rails keep working for editable wheel installs used by nvrtc and nvJitLink. Made-with: Cursor --- .../pathfinder/_compatibility_guard_rails.py | 97 +++++++++++++++++-- .../tests/test_compatibility_guard_rails.py | 63 ++++++++++++ 2 files changed, 150 insertions(+), 10 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py index e15ebe3e9d2..888d7d0b329 100644 --- a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -53,9 +53,8 @@ ConstraintArg: TypeAlias = int | str | tuple[str, int] | None _CTK_VERSION_RE = re.compile(r"^(?P\d+)\.(?P\d+)") -_REQUIRES_DIST_RE = re.compile( - r"^\s*(?P[A-Za-z0-9_.-]+)\s*==\s*(?P[0-9][A-Za-z0-9.+-]*?)(?:\.\*)?(?:\s*;|$)" -) +_REQUIRES_DIST_RE = re.compile(r"^\s*(?P[A-Za-z0-9_.-]+)\s*(?P[^;]*)(?:\s*;|$)") +_VERSION_SPECIFIER_RE = re.compile(r"^\s*(?P==|<=|>=|<|>)\s*(?P[0-9][A-Za-z0-9.+-]*?(?:\.\*)?)\s*$") _STATIC_LIBS_PACKAGED_WITH: dict[str, PackagedWith] = { "cudadevrt": "ctk", @@ -113,6 +112,12 @@ def __str__(self) -> str: return f"{self.operator}{self.value}" +@dataclass(frozen=True, slots=True) +class VersionSpecifier: + operator: ConstraintOperator + version: str + + @dataclass(frozen=True, slots=True) class ResolvedItem: name: str @@ -185,6 +190,63 @@ def _distribution_name(dist: importlib.metadata.Distribution) -> str | None: return metadata.get("Name") +def _release_version_parts(version: str) -> tuple[int, ...] | None: + match = re.match(r"^\d+(?:\.\d+)*", version) + if match is None: + return None + return tuple(int(part) for part in match.group(0).split(".")) + + +def _compare_release_versions(lhs: tuple[int, ...], rhs: tuple[int, ...]) -> int: + max_len = max(len(lhs), len(rhs)) + lhs_padded = lhs + (0,) * (max_len - len(lhs)) + rhs_padded = rhs + (0,) * (max_len - len(rhs)) + if lhs_padded < rhs_padded: + return -1 + if lhs_padded > rhs_padded: + return 1 + return 0 + + +def _parse_version_specifiers(specifier_text: str) -> tuple[VersionSpecifier, ...]: + stripped = specifier_text.strip() + if not stripped: + return () + parsed: list[VersionSpecifier] = [] + for raw_clause in stripped.split(","): + match = _VERSION_SPECIFIER_RE.match(raw_clause) + if match is None: + return () + parsed.append(VersionSpecifier(operator=match.group("operator"), version=match.group("version"))) + return tuple(parsed) + + +def _version_satisfies_specifiers(version: str, specifiers: tuple[VersionSpecifier, ...]) -> bool: + if not specifiers: + return False + for specifier in specifiers: + if specifier.operator == "==": + prefix = specifier.version.removesuffix(".*") + if version == prefix or version.startswith(prefix + "."): + continue + return False + candidate_parts = _release_version_parts(version) + required_parts = _release_version_parts(specifier.version) + if candidate_parts is None or required_parts is None: + return False + comparison = _compare_release_versions(candidate_parts, required_parts) + if specifier.operator == "<" and comparison < 0: + continue + if specifier.operator == "<=" and comparison <= 0: + continue + if specifier.operator == ">" and comparison > 0: + continue + if specifier.operator == ">=" and comparison >= 0: + continue + return False + return True + + @functools.cache def _owned_distribution_candidates(abs_path: str) -> tuple[tuple[str, str], ...]: normalized_abs_path = os.path.normpath(os.path.abspath(abs_path)) @@ -201,8 +263,10 @@ def _owned_distribution_candidates(abs_path: str) -> tuple[tuple[str, str], ...] @functools.cache -def _cuda_toolkit_requirement_maps() -> tuple[tuple[str, CtkVersion, dict[str, tuple[str, ...]]], ...]: - results: list[tuple[str, CtkVersion, dict[str, tuple[str, ...]]]] = [] +def _cuda_toolkit_requirement_maps() -> tuple[ + tuple[str, CtkVersion, dict[str, tuple[tuple[VersionSpecifier, ...], ...]]], ... +]: + results: list[tuple[str, CtkVersion, dict[str, tuple[tuple[VersionSpecifier, ...], ...]]]] = [] for dist in importlib.metadata.distributions(): dist_name = _distribution_name(dist) if _normalize_distribution_name(dist_name or "") != "cuda-toolkit": @@ -210,18 +274,31 @@ def _cuda_toolkit_requirement_maps() -> tuple[tuple[str, CtkVersion, dict[str, t ctk_version = _parse_ctk_version(dist.version) if ctk_version is None: continue - requirement_map: dict[str, set[str]] = {} + requirement_map: dict[str, set[tuple[VersionSpecifier, ...]]] = {} for requirement in dist.requires or (): match = _REQUIRES_DIST_RE.match(requirement) if match is None: continue req_name = _normalize_distribution_name(match.group("name")) - requirement_map.setdefault(req_name, set()).add(match.group("version")) + parsed_specifiers = _parse_version_specifiers(match.group("specifier_text")) + if not parsed_specifiers: + continue + requirement_map.setdefault(req_name, set()).add(parsed_specifiers) results.append( ( dist.version, ctk_version, - {name: tuple(sorted(prefixes)) for name, prefixes in requirement_map.items()}, + { + name: tuple( + sorted( + specifier_sets, + key=lambda specifiers: tuple( + (specifier.operator, specifier.version) for specifier in specifiers + ), + ) + ) + for name, specifier_sets in requirement_map.items() + }, ) ) return tuple(results) @@ -232,9 +309,9 @@ def _wheel_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: for owner_name, owner_version in _owned_distribution_candidates(abs_path): normalized_owner_name = _normalize_distribution_name(owner_name) for toolkit_dist_version, ctk_version, requirement_map in _cuda_toolkit_requirement_maps(): - requirement_prefixes = requirement_map.get(normalized_owner_name, ()) + requirement_specifier_sets = requirement_map.get(normalized_owner_name, ()) if not any( - owner_version == prefix or owner_version.startswith(prefix + ".") for prefix in requirement_prefixes + _version_satisfies_specifiers(owner_version, specifiers) for specifiers in requirement_specifier_sets ): continue matched_versions[ctk_version] = ( diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index f5f323bfe2f..00f26e155b9 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -83,6 +83,26 @@ def _driver_cuda_version(encoded: int) -> DriverCudaVersion: ) +class _FakeDistribution: + def __init__( + self, + *, + name: str, + version: str, + root: Path, + files: tuple[str, ...] = (), + requires: tuple[str, ...] = (), + ) -> None: + self.metadata = {"Name": name} + self.version = version + self.files = tuple(Path(file) for file in files) + self.requires = list(requires) + self._root = root + + def locate_file(self, file: Path) -> Path: + return self._root / file + + def _assert_real_ctk_backed_path(path: str) -> None: norm_path = os.path.normpath(os.path.abspath(path)) if "site-packages" in Path(norm_path).parts: @@ -441,6 +461,49 @@ def fake_load_nvidia_dynamic_lib(libname: str) -> LoadedDL: guard_rails.find_nvidia_header_directory("nvrtc") +@pytest.mark.parametrize( + "requirement", + ( + "nvidia-nvjitlink == 13.2.78.*; extra == 'nvjitlink'", + "nvidia-nvjitlink<14,>=13.2.78; extra == 'nvjitlink'", + ), +) +def test_wheel_metadata_accepts_exact_and_range_requirements(monkeypatch, tmp_path, requirement): + site_packages = tmp_path / "site-packages" + lib_path = _touch(site_packages / "nvidia" / "cu13" / "lib" / "libnvJitLink.so.13") + owner_dist = _FakeDistribution( + name="nvidia-nvjitlink", + version="13.2.78", + root=site_packages, + files=("nvidia/cu13/lib/libnvJitLink.so.13",), + ) + cuda_toolkit_dist = _FakeDistribution( + name="cuda-toolkit", + version="13.2.1", + root=site_packages, + requires=(requirement,), + ) + + compatibility_module._owned_distribution_candidates.cache_clear() + compatibility_module._cuda_toolkit_requirement_maps.cache_clear() + try: + monkeypatch.setattr( + compatibility_module.importlib.metadata, + "distributions", + lambda: (owner_dist, cuda_toolkit_dist), + ) + + metadata = compatibility_module._wheel_metadata_for_abs_path(lib_path) + finally: + compatibility_module._owned_distribution_candidates.cache_clear() + compatibility_module._cuda_toolkit_requirement_maps.cache_clear() + + assert metadata is not None + assert metadata.ctk_version.major == 13 + assert metadata.ctk_version.minor == 2 + assert metadata.source == "wheel metadata via nvidia-nvjitlink==13.2.78 pinned by cuda-toolkit==13.2.1" + + def test_constraints_accept_string_and_tuple_forms(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" _write_version_json(ctk_root, "12.9.20250531") From f7e81ede9278ad149e60e8080d3b91fbb5bf3087 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 25 Apr 2026 12:06:35 -0700 Subject: [PATCH 07/17] Add cuda.h toolkit version parser. Introduce a small toolkit-info utility that reads the CUDA_VERSION macro from cuda.h so follow-up guard-rails changes can infer CTK major.minor from toolkit headers without depending on version.json. Made-with: Cursor --- .../cuda/pathfinder/_utils/toolkit_info.py | 52 ++++++++++ .../tests/test_utils_toolkit_info.py | 96 +++++++++++++++++++ 2 files changed, 148 insertions(+) create mode 100644 cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py create mode 100644 cuda_pathfinder/tests/test_utils_toolkit_info.py diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py new file mode 100644 index 00000000000..874cbfeae40 --- /dev/null +++ b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import functools +import re +from dataclasses import dataclass +from pathlib import Path + +_CUDA_VERSION_RE = re.compile(r"^\s*#\s*define\s+CUDA_VERSION\s+(?P\d+)\b", re.MULTILINE) + + +class ReadCudaHeaderVersionError(RuntimeError): + """Raised when ``read_cuda_header_version()`` cannot determine the CTK version from ``cuda.h``.""" + + +@dataclass(frozen=True, slots=True) +class CudaToolkitVersion: + """CUDA Toolkit version encoded by the ``CUDA_VERSION`` macro in ``cuda.h``.""" + + encoded: int + major: int + minor: int + + +def parse_cuda_header_version(header_text: str) -> CudaToolkitVersion | None: + """Parse the CUDA Toolkit major/minor version from ``cuda.h`` text.""" + match = _CUDA_VERSION_RE.search(header_text) + if match is None: + return None + encoded = int(match.group("encoded")) + return CudaToolkitVersion( + encoded=encoded, + major=encoded // 1000, + minor=(encoded % 1000) // 10, + ) + + +@functools.cache +def read_cuda_header_version(cuda_header_path: str) -> CudaToolkitVersion: + """Read and parse the CUDA Toolkit major/minor version from ``cuda.h``.""" + try: + header_text = Path(cuda_header_path).read_text(encoding="utf-8", errors="replace") + version = parse_cuda_header_version(header_text) + if version is None: + raise RuntimeError(f"{cuda_header_path!r} does not define CUDA_VERSION.") + return version + except Exception as exc: + raise ReadCudaHeaderVersionError( + f"Failed to read the CUDA Toolkit version from cuda.h at {cuda_header_path!r}." + ) from exc diff --git a/cuda_pathfinder/tests/test_utils_toolkit_info.py b/cuda_pathfinder/tests/test_utils_toolkit_info.py new file mode 100644 index 00000000000..4cb370006ef --- /dev/null +++ b/cuda_pathfinder/tests/test_utils_toolkit_info.py @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import pytest + +from cuda.pathfinder._utils import toolkit_info + + +@pytest.fixture(autouse=True) +def _clear_cuda_header_version_cache(): + toolkit_info.read_cuda_header_version.cache_clear() + yield + toolkit_info.read_cuda_header_version.cache_clear() + + +def test_parse_cuda_header_version_returns_parsed_dataclass(): + header_text = """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_VERSION 13020 + #endif + """ + + assert toolkit_info.parse_cuda_header_version(header_text) == toolkit_info.CudaToolkitVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_parse_cuda_header_version_returns_none_when_macro_is_missing(): + header_text = """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_API_PER_THREAD_DEFAULT_STREAM 1 + #endif + """ + + assert toolkit_info.parse_cuda_header_version(header_text) is None + + +def test_read_cuda_header_version_reads_file_and_returns_parsed_dataclass(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_text( + """ + #ifndef CUDA_H + #define CUDA_H + #define CUDA_VERSION 12090 /* CUDA 12.9 */ + #endif + """, + encoding="utf-8", + ) + + assert toolkit_info.read_cuda_header_version(str(cuda_h_path)) == toolkit_info.CudaToolkitVersion( + encoded=12090, + major=12, + minor=9, + ) + + +def test_read_cuda_header_version_tolerates_non_utf8_bytes(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_bytes( + b"#ifndef CUDA_H\n" + b"#define CUDA_H\n" + b"\xff\xfe invalid bytes in comment or banner\n" + b"#define CUDA_VERSION 12080\n" + b"#endif\n" + ) + + assert toolkit_info.read_cuda_header_version(str(cuda_h_path)) == toolkit_info.CudaToolkitVersion( + encoded=12080, + major=12, + minor=8, + ) + + +def test_read_cuda_header_version_wraps_parse_failures(tmp_path): + cuda_h_path = tmp_path / "cuda.h" + cuda_h_path.write_text( + """ + #ifndef CUDA_H + #define CUDA_H + #endif + """, + encoding="utf-8", + ) + + with pytest.raises( + toolkit_info.ReadCudaHeaderVersionError, + match="Failed to read the CUDA Toolkit version from cuda.h", + ) as exc_info: + toolkit_info.read_cuda_header_version(str(cuda_h_path)) + + assert isinstance(exc_info.value.__cause__, RuntimeError) + assert "does not define CUDA_VERSION" in str(exc_info.value.__cause__) From e3b402a29b848bf5b7effc20a02d3cc1018cf871 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 25 Apr 2026 12:31:50 -0700 Subject: [PATCH 08/17] Share encoded CUDA version decoding logic. Centralize encoded CUDA version parsing and validation so toolkit and driver version helpers stay aligned and cuda.h parsing gets consistent string conversion and error reporting. Made-with: Cursor --- .../cuda/pathfinder/_utils/driver_info.py | 14 ++--- .../cuda/pathfinder/_utils/toolkit_info.py | 51 +++++++++++++++---- .../tests/test_utils_driver_info.py | 11 ++++ .../tests/test_utils_toolkit_info.py | 44 ++++++++++++++++ 4 files changed, 99 insertions(+), 21 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py b/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py index a5d4d167d33..78e833f9baf 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/driver_info.py @@ -7,11 +7,13 @@ import functools from collections.abc import Callable from dataclasses import dataclass +from typing import cast from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import ( load_nvidia_dynamic_lib as _load_nvidia_dynamic_lib, ) from cuda.pathfinder._utils.platform_aware import IS_WINDOWS +from cuda.pathfinder._utils.toolkit_info import EncodedCudaVersion class QueryDriverCudaVersionError(RuntimeError): @@ -19,7 +21,7 @@ class QueryDriverCudaVersionError(RuntimeError): @dataclass(frozen=True, slots=True) -class DriverCudaVersion: +class DriverCudaVersion(EncodedCudaVersion): """ CUDA-facing driver version reported by ``cuDriverGetVersion()``. @@ -41,21 +43,13 @@ class DriverCudaVersion: to ``Driver Version: 595.58.03``. """ - encoded: int - major: int - minor: int - @functools.cache def query_driver_cuda_version() -> DriverCudaVersion: """Return the CUDA driver version parsed into its major/minor components.""" try: encoded = _query_driver_cuda_version_int() - return DriverCudaVersion( - encoded=encoded, - major=encoded // 1000, - minor=(encoded % 1000) // 10, - ) + return cast(DriverCudaVersion, DriverCudaVersion.from_encoded(encoded)) except Exception as exc: raise QueryDriverCudaVersionError("Failed to query the CUDA driver version.") from exc diff --git a/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py index 874cbfeae40..431727bf4bb 100644 --- a/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py +++ b/cuda_pathfinder/cuda/pathfinder/_utils/toolkit_info.py @@ -7,8 +7,46 @@ import re from dataclasses import dataclass from pathlib import Path +from typing import TypeVar _CUDA_VERSION_RE = re.compile(r"^\s*#\s*define\s+CUDA_VERSION\s+(?P\d+)\b", re.MULTILINE) +EncodedCudaVersionT = TypeVar("EncodedCudaVersionT", bound="EncodedCudaVersion") + + +@dataclass(frozen=True, slots=True) +class EncodedCudaVersion: + """CUDA major/minor version represented in CUDA's integer ``encoded`` form.""" + + encoded: int + major: int + minor: int + + @classmethod + def from_encoded(cls: type[EncodedCudaVersionT], encoded: int | str) -> EncodedCudaVersionT: + if isinstance(encoded, str): + try: + encoded_int = int(encoded) + except ValueError as exc: + raise ValueError( + f"{cls.__name__}.from_encoded() expected an integer or decimal string, got {encoded!r}." + ) from exc + elif isinstance(encoded, int): + encoded_int = encoded + else: + raise TypeError( + f"{cls.__name__}.from_encoded() expected an integer or decimal string, got {type(encoded).__name__}." + ) + if encoded_int < 0: + raise ValueError( + f"{cls.__name__}.from_encoded() expected a non-negative encoded CUDA version, got {encoded_int}." + ) + # CUDA encodes versions as major * 1000 + minor * 10. The least-significant + # decimal is ignored here: it is 0 in all CUDA releases and is not a patch version. + return cls( + encoded=encoded_int, + major=encoded_int // 1000, + minor=(encoded_int % 1000) // 10, + ) class ReadCudaHeaderVersionError(RuntimeError): @@ -16,25 +54,16 @@ class ReadCudaHeaderVersionError(RuntimeError): @dataclass(frozen=True, slots=True) -class CudaToolkitVersion: +class CudaToolkitVersion(EncodedCudaVersion): """CUDA Toolkit version encoded by the ``CUDA_VERSION`` macro in ``cuda.h``.""" - encoded: int - major: int - minor: int - def parse_cuda_header_version(header_text: str) -> CudaToolkitVersion | None: """Parse the CUDA Toolkit major/minor version from ``cuda.h`` text.""" match = _CUDA_VERSION_RE.search(header_text) if match is None: return None - encoded = int(match.group("encoded")) - return CudaToolkitVersion( - encoded=encoded, - major=encoded // 1000, - minor=(encoded % 1000) // 10, - ) + return CudaToolkitVersion.from_encoded(match.group("encoded")) @functools.cache diff --git a/cuda_pathfinder/tests/test_utils_driver_info.py b/cuda_pathfinder/tests/test_utils_driver_info.py index 21948dadafe..99af76a69b4 100644 --- a/cuda_pathfinder/tests/test_utils_driver_info.py +++ b/cuda_pathfinder/tests/test_utils_driver_info.py @@ -73,6 +73,17 @@ def test_query_driver_cuda_version_returns_parsed_dataclass(monkeypatch): ) +def test_driver_cuda_version_from_encoded_returns_subclass_instance(): + version = driver_info.DriverCudaVersion.from_encoded(12080) + + assert version == driver_info.DriverCudaVersion( + encoded=12080, + major=12, + minor=8, + ) + assert type(version) is driver_info.DriverCudaVersion + + def test_query_driver_cuda_version_wraps_internal_failures(monkeypatch): root_cause = RuntimeError("low-level query failed") diff --git a/cuda_pathfinder/tests/test_utils_toolkit_info.py b/cuda_pathfinder/tests/test_utils_toolkit_info.py index 4cb370006ef..a62db6b9603 100644 --- a/cuda_pathfinder/tests/test_utils_toolkit_info.py +++ b/cuda_pathfinder/tests/test_utils_toolkit_info.py @@ -13,6 +13,39 @@ def _clear_cuda_header_version_cache(): toolkit_info.read_cuda_header_version.cache_clear() +def test_encoded_cuda_version_from_encoded_decodes_major_minor(): + assert toolkit_info.EncodedCudaVersion.from_encoded(13020) == toolkit_info.EncodedCudaVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_encoded_cuda_version_from_encoded_accepts_decimal_string(): + assert toolkit_info.EncodedCudaVersion.from_encoded("13020") == toolkit_info.EncodedCudaVersion( + encoded=13020, + major=13, + minor=2, + ) + + +def test_encoded_cuda_version_from_encoded_raises_helpful_error_for_invalid_string(): + with pytest.raises( + ValueError, + match=r"EncodedCudaVersion\.from_encoded\(\) expected an integer or decimal string, got '13\.2'", + ): + toolkit_info.EncodedCudaVersion.from_encoded("13.2") + + +@pytest.mark.parametrize("encoded", [-1, "-1"]) +def test_encoded_cuda_version_from_encoded_rejects_negative_values(encoded): + with pytest.raises( + ValueError, + match=r"EncodedCudaVersion\.from_encoded\(\) expected a non-negative encoded CUDA version, got -1", + ): + toolkit_info.EncodedCudaVersion.from_encoded(encoded) + + def test_parse_cuda_header_version_returns_parsed_dataclass(): header_text = """ #ifndef CUDA_H @@ -28,6 +61,17 @@ def test_parse_cuda_header_version_returns_parsed_dataclass(): ) +def test_cuda_toolkit_version_from_encoded_returns_subclass_instance(): + version = toolkit_info.CudaToolkitVersion.from_encoded(12090) + + assert version == toolkit_info.CudaToolkitVersion( + encoded=12090, + major=12, + minor=9, + ) + assert type(version) is toolkit_info.CudaToolkitVersion + + def test_parse_cuda_header_version_returns_none_when_macro_is_missing(): header_text = """ #ifndef CUDA_H From c6c38e3952a6733eecdf6de913601b31076c551d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 25 Apr 2026 13:13:22 -0700 Subject: [PATCH 09/17] Use cuda.h for CTK guard-rails metadata. Replace version.json-based CTK root metadata with cuda.h parsing so compatibility checks use a simpler, more universal toolkit source while preserving wheel-based metadata inference. Made-with: Cursor --- .../pathfinder/_compatibility_guard_rails.py | 84 +++++++++++------- .../tests/test_compatibility_guard_rails.py | 88 ++++++++++++------- 2 files changed, 105 insertions(+), 67 deletions(-) diff --git a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py index 888d7d0b329..35737c1cf5d 100644 --- a/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py +++ b/cuda_pathfinder/cuda/pathfinder/_compatibility_guard_rails.py @@ -5,7 +5,6 @@ import functools import importlib.metadata -import json import os import re from collections.abc import Mapping @@ -46,6 +45,7 @@ QueryDriverCudaVersionError, query_driver_cuda_version, ) +from cuda.pathfinder._utils.toolkit_info import ReadCudaHeaderVersionError, read_cuda_header_version ItemKind: TypeAlias = str PackagedWith: TypeAlias = str @@ -323,46 +323,62 @@ def _wheel_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: return CtkMetadata(ctk_version=ctk_version, ctk_root=None, source=source) +def _normalized_ctk_root_for_cuda_header(cuda_header_path: Path) -> Path: + ctk_root = cuda_header_path.parent.parent + if ctk_root.parent.name == "targets": + return ctk_root.parent.parent + return ctk_root + + @functools.cache -def _read_ctk_version(ctk_root: str) -> CtkVersion | None: - version_json_path = os.path.join(ctk_root, "version.json") - if not os.path.isfile(version_json_path): - return None - with open(version_json_path, encoding="utf-8") as fobj: - payload = json.load(fobj) - if not isinstance(payload, dict): - return None - cuda_entry = payload.get("cuda") - if not isinstance(cuda_entry, dict): +def _cuda_header_metadata_for_ctk_root_candidate(ctk_root_candidate: str) -> CtkMetadata | None: + candidate_path = Path(ctk_root_candidate) + header_paths: list[Path] = [] + + direct_header = candidate_path / "include" / "cuda.h" + if direct_header.is_file(): + header_paths.append(direct_header) + + targets_dir = candidate_path / "targets" + if targets_dir.is_dir(): + header_paths.extend(sorted(path for path in targets_dir.glob("*/include/cuda.h") if path.is_file())) + + matches: list[tuple[CtkVersion, Path, Path]] = [] + for cuda_header_path in header_paths: + try: + version = read_cuda_header_version(str(cuda_header_path)) + except ReadCudaHeaderVersionError: + continue + matches.append( + ( + CtkVersion(major=version.major, minor=version.minor), + _normalized_ctk_root_for_cuda_header(cuda_header_path), + cuda_header_path, + ) + ) + + if not matches: return None - cuda_version = cuda_entry.get("version") - if not isinstance(cuda_version, str): + + ctk_version, ctk_root, source_path = matches[0] + if any(other_version != ctk_version for other_version, _other_root, _other_source in matches[1:]): return None - return _parse_ctk_version(cuda_version) + + return CtkMetadata( + ctk_version=ctk_version, + ctk_root=str(ctk_root), + source=f"cuda.h at {source_path}", + ) -def _find_enclosing_ctk_root(abs_path: str) -> str | None: +def _ctk_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: current = Path(abs_path) if current.is_file(): current = current.parent for candidate in (current, *current.parents): - ctk_root = str(candidate) - if _read_ctk_version(ctk_root) is not None: - return ctk_root - return None - - -def _ctk_metadata_for_abs_path(abs_path: str) -> CtkMetadata | None: - ctk_root = _find_enclosing_ctk_root(abs_path) - if ctk_root is not None: - ctk_version = _read_ctk_version(ctk_root) - if ctk_version is not None: - version_json_path = os.path.join(ctk_root, "version.json") - return CtkMetadata( - ctk_version=ctk_version, - ctk_root=ctk_root, - source=f"version.json at {version_json_path}", - ) + ctk_metadata = _cuda_header_metadata_for_ctk_root_candidate(str(candidate)) + if ctk_metadata is not None: + return ctk_metadata return _wheel_metadata_for_abs_path(abs_path) @@ -468,7 +484,7 @@ def compatibility_check( status="insufficient_metadata", message=( "v1 compatibility checks require either an enclosing CUDA Toolkit root " - "with version.json or wheel metadata that can be traced to an installed " + "with cuda.h or wheel metadata that can be traced to an installed " f"cuda-toolkit distribution. Could not determine the CTK version for {item.describe()}." ), ) @@ -545,7 +561,7 @@ def _enforce_ctk_metadata(self, item: ResolvedItem) -> None: return raise CompatibilityInsufficientMetadataError( "v1 compatibility checks require either an enclosing CUDA Toolkit root " - "with version.json or wheel metadata that can be traced to an installed " + "with cuda.h or wheel metadata that can be traced to an installed " f"cuda-toolkit distribution. Could not determine the CTK version for {item.describe()}." ) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 00f26e155b9..cb081787443 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -2,7 +2,6 @@ # SPDX-License-Identifier: Apache-2.0 import importlib -import json import os from pathlib import Path @@ -36,10 +35,22 @@ def _default_process_wide_guard_rails_mode(monkeypatch): monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) -def _write_version_json(ctk_root: Path, toolkit_version: str) -> None: - ctk_root.mkdir(parents=True, exist_ok=True) - payload = {"cuda": {"version": toolkit_version}} - (ctk_root / "version.json").write_text(json.dumps(payload), encoding="utf-8") +def _write_cuda_h( + ctk_root: Path, + toolkit_version: str, + *, + include_dir_parts: tuple[str, ...] = ("targets", "x86_64-linux", "include"), +) -> None: + parts = toolkit_version.split(".") + if len(parts) < 2: + raise AssertionError(f"Expected at least major.minor in toolkit version, got {toolkit_version!r}") + encoded = int(parts[0]) * 1000 + int(parts[1]) * 10 + cuda_h_path = ctk_root.joinpath(*include_dir_parts, "cuda.h") + cuda_h_path.parent.mkdir(parents=True, exist_ok=True) + cuda_h_path.write_text( + f"#ifndef CUDA_H\n#define CUDA_H\n#define CUDA_VERSION {encoded}\n#endif\n", + encoding="utf-8", + ) def _touch(path: Path) -> str: @@ -76,11 +87,7 @@ def _located_bitcode_lib(name: str, abs_path: str) -> LocatedBitcodeLib: def _driver_cuda_version(encoded: int) -> DriverCudaVersion: - return DriverCudaVersion( - encoded=encoded, - major=encoded // 1000, - minor=(encoded % 1000) // 10, - ) + return DriverCudaVersion.from_encoded(encoded) class _FakeDistribution: @@ -111,8 +118,9 @@ def _assert_real_ctk_backed_path(path: str) -> None: if current.is_file(): current = current.parent for candidate in (current, *current.parents): - version_json_path = candidate / "version.json" - if version_json_path.is_file(): + if (candidate / "include" / "cuda.h").is_file(): + return + if any(path.is_file() for path in (candidate / "targets").glob("*/include/cuda.h")): return for env_var in ("CUDA_PATH", "CUDA_HOME"): ctk_root = os.environ.get(env_var) @@ -122,7 +130,7 @@ def _assert_real_ctk_backed_path(path: str) -> None: if os.path.commonpath((norm_path, norm_ctk_root)) == norm_ctk_root: return raise AssertionError( - "Expected a site-packages path, a path under a CTK root with version.json, " + "Expected a site-packages path, a path under a CTK root with cuda.h, " f"or a path under CUDA_PATH/CUDA_HOME, got {path!r}" ) @@ -220,7 +228,7 @@ def fail_raw_fallback(_libname: str) -> LoadedDL: @pytest.mark.parametrize("env_value", [None, ""]) def test_public_apis_default_to_strict_when_env_var_is_unset_or_empty(monkeypatch, tmp_path, env_value): - lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) monkeypatch.setattr( @@ -238,12 +246,12 @@ def fail_raw_fallback(_libname: str) -> LoadedDL: else: monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, env_value) - with pytest.raises(CompatibilityInsufficientMetadataError, match="version.json"): + with pytest.raises(CompatibilityInsufficientMetadataError, match="cuda.h"): pathfinder.load_nvidia_dynamic_lib("nvrtc") def test_public_apis_best_effort_fall_back_on_insufficient_metadata(monkeypatch, tmp_path): - guarded_lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") + guarded_lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") raw_loaded = _loaded_dl("/opt/mock/libnvrtc.so.12", found_via="system-search") monkeypatch.setenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, "best_effort") @@ -292,8 +300,8 @@ def test_public_apis_reject_invalid_guard_rails_mode(monkeypatch): def test_public_apis_share_process_wide_guard_rails_state(monkeypatch, tmp_path): lib_root = tmp_path / "cuda-12.8" hdr_root = tmp_path / "cuda-12.9" - _write_version_json(lib_root, "12.8.20250303") - _write_version_json(hdr_root, "12.9.20250531") + _write_cuda_h(lib_root, "12.8.20250303") + _write_cuda_h(hdr_root, "12.9.20250531") lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" @@ -320,7 +328,7 @@ def test_public_apis_share_process_wide_guard_rails_state(monkeypatch, tmp_path) def test_load_dynamic_lib_then_find_headers_same_ctk_version(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") hdr_dir = ctk_root / "targets" / "x86_64-linux" / "include" _touch(hdr_dir / "nvrtc.h") @@ -344,8 +352,8 @@ def test_load_dynamic_lib_then_find_headers_same_ctk_version(monkeypatch, tmp_pa def test_exact_ctk_major_minor_match_is_required(monkeypatch, tmp_path): lib_root = tmp_path / "cuda-12.8" hdr_root = tmp_path / "cuda-12.9" - _write_version_json(lib_root, "12.8.20250303") - _write_version_json(hdr_root, "12.9.20250531") + _write_cuda_h(lib_root, "12.8.20250303") + _write_cuda_h(hdr_root, "12.9.20250531") lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" @@ -367,7 +375,7 @@ def test_exact_ctk_major_minor_match_is_required(monkeypatch, tmp_path): def test_driver_major_must_not_be_older_than_ctk_major(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-13.0" - _write_version_json(ctk_root, "13.0.20251003") + _write_cuda_h(ctk_root, "13.0.20251003") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.13") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) @@ -378,17 +386,31 @@ def test_driver_major_must_not_be_older_than_ctk_major(monkeypatch, tmp_path): guard_rails.load_nvidia_dynamic_lib("nvrtc") -def test_missing_version_json_raises_insufficient_metadata(monkeypatch, tmp_path): - lib_path = _touch(tmp_path / "no-version-json" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") +def test_missing_cuda_h_raises_insufficient_metadata(monkeypatch, tmp_path): + lib_path = _touch(tmp_path / "no-cuda-h" / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) - with pytest.raises(CompatibilityInsufficientMetadataError, match="version.json"): + with pytest.raises(CompatibilityInsufficientMetadataError, match="cuda.h"): guard_rails.load_nvidia_dynamic_lib("nvrtc") +def test_windows_style_ctk_root_uses_root_include_cuda_h(monkeypatch, tmp_path): + ctk_root = tmp_path / "cuda-13.2" + _write_cuda_h(ctk_root, "13.2.20251003", include_dir_parts=("include",)) + lib_path = _touch(ctk_root / "bin" / "x64" / "nvrtc64_130_0.dll") + + monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) + + guard_rails = CompatibilityGuardRails(driver_cuda_version=_driver_cuda_version(13000)) + + loaded = guard_rails.load_nvidia_dynamic_lib("nvrtc") + + assert loaded.abs_path == lib_path + + def test_other_packaging_raises_insufficient_metadata(monkeypatch, tmp_path): abs_path = _touch(tmp_path / "site-packages" / "nvidia" / "nvshmem" / "lib" / "libnvshmem_device.bc") @@ -407,7 +429,7 @@ def test_other_packaging_raises_insufficient_metadata(monkeypatch, tmp_path): def test_driver_libs_do_not_lock_ctk_anchor(monkeypatch, tmp_path): driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") ctk_lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") def fake_load_nvidia_dynamic_lib(libname: str) -> LoadedDL: @@ -432,8 +454,8 @@ def test_driver_libs_do_not_mask_later_ctk_mismatch(monkeypatch, tmp_path): driver_lib_path = _touch(tmp_path / "driver-root" / "libnvidia-ml.so.1") lib_root = tmp_path / "cuda-12.8" hdr_root = tmp_path / "cuda-12.9" - _write_version_json(lib_root, "12.8.20250303") - _write_version_json(hdr_root, "12.9.20250531") + _write_cuda_h(lib_root, "12.8.20250303") + _write_cuda_h(hdr_root, "12.9.20250531") lib_path = _touch(lib_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") hdr_dir = hdr_root / "targets" / "x86_64-linux" / "include" @@ -506,7 +528,7 @@ def test_wheel_metadata_accepts_exact_and_range_requirements(monkeypatch, tmp_pa def test_constraints_accept_string_and_tuple_forms(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) @@ -524,7 +546,7 @@ def test_constraints_accept_string_and_tuple_forms(monkeypatch, tmp_path): def test_constraint_failure_raises(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) @@ -541,7 +563,7 @@ def test_constraint_failure_raises(monkeypatch, tmp_path): def test_static_bitcode_and_binary_methods_participate_in_checks(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") static_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libcudadevrt.a") @@ -575,7 +597,7 @@ def test_static_bitcode_and_binary_methods_participate_in_checks(monkeypatch, tm def test_guard_rails_query_driver_cuda_version_by_default(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") query_calls: list[int] = [] @@ -598,7 +620,7 @@ def fake_query_driver_cuda_version() -> DriverCudaVersion: def test_guard_rails_wrap_driver_query_failures(monkeypatch, tmp_path): ctk_root = tmp_path / "cuda-12.9" - _write_version_json(ctk_root, "12.9.20250531") + _write_cuda_h(ctk_root, "12.9.20250531") lib_path = _touch(ctk_root / "targets" / "x86_64-linux" / "lib" / "libnvrtc.so.12") monkeypatch.setattr(compatibility_module, "_load_nvidia_dynamic_lib", lambda _libname: _loaded_dl(lib_path)) From 298888e5f2ceed8db88f31928c8a62470e33828a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 26 Apr 2026 16:56:31 -0700 Subject: [PATCH 10/17] Make real guard-rails tests derive their CTK line from cuda.h. This keeps the host-backed compatibility checks aligned with the resolved toolkit layout and skips cleanly when cudart headers or cuda.h are unavailable. Made-with: Cursor --- cuda_pathfinder/tests/local_helpers.py | 54 +++++++++++++++++++ .../tests/test_compatibility_guard_rails.py | 45 ++++++++++++++-- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/cuda_pathfinder/tests/local_helpers.py b/cuda_pathfinder/tests/local_helpers.py index 7893ba8229f..a67d3e2580c 100644 --- a/cuda_pathfinder/tests/local_helpers.py +++ b/cuda_pathfinder/tests/local_helpers.py @@ -4,6 +4,25 @@ import functools import importlib.metadata import re +from dataclasses import dataclass +from pathlib import Path + +import pytest + +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as locate_nvidia_header_directory_raw, +) +from cuda.pathfinder._utils.toolkit_info import CudaToolkitVersion, read_cuda_header_version + + +@dataclass(frozen=True, slots=True) +class LocatedRealCudaToolkitVersion: + """Real-host CTK version discovered from ``cuda.h`` next to resolved ``cudart`` headers.""" + + version: CudaToolkitVersion + cuda_h_path: str + header_dir: str + found_via: str @functools.cache @@ -14,3 +33,38 @@ def have_distribution(name_pattern: str) -> bool: for dist in importlib.metadata.distributions() if "Name" in dist.metadata ) + + +@functools.cache +def locate_real_cuda_toolkit_version_from_cuda_h() -> LocatedRealCudaToolkitVersion | None: + """Return the real-host CTK version from ``cuda.h`` if ``cudart`` headers can be located.""" + located = locate_nvidia_header_directory_raw("cudart") + if located is None or located.abs_path is None: + return None + cuda_h_path = Path(located.abs_path) / "cuda.h" + if not cuda_h_path.is_file(): + return None + return LocatedRealCudaToolkitVersion( + version=read_cuda_header_version(str(cuda_h_path)), + cuda_h_path=str(cuda_h_path), + header_dir=located.abs_path, + found_via=located.found_via, + ) + + +def require_real_cuda_toolkit_version_from_cuda_h() -> LocatedRealCudaToolkitVersion: + """Return the real-host CTK version from ``cuda.h`` or skip if it cannot be located.""" + located = locate_nvidia_header_directory_raw("cudart") + if located is None or located.abs_path is None: + pytest.skip("Could not locate cudart headers, so could not find cuda.h for a real CTK installation.") + cuda_h_path = Path(located.abs_path) / "cuda.h" + if not cuda_h_path.is_file(): + pytest.skip( + f"Located cudart headers via {located.found_via} at {located.abs_path!r}, but could not find cuda.h." + ) + return LocatedRealCudaToolkitVersion( + version=read_cuda_header_version(str(cuda_h_path)), + cuda_h_path=str(cuda_h_path), + header_dir=located.abs_path, + found_via=located.found_via, + ) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index cb081787443..ff941dea12d 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -22,7 +22,17 @@ StaticLibNotFoundError, process_wide_compatibility_guard_rails, ) +from cuda.pathfinder._dynamic_libs.load_nvidia_dynamic_lib import _resolve_system_loaded_abs_path_in_subprocess +from cuda.pathfinder._headers.find_nvidia_headers import ( + locate_nvidia_header_directory as locate_nvidia_header_directory_raw, +) +from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home from cuda.pathfinder._utils.driver_info import DriverCudaVersion, QueryDriverCudaVersionError +from cuda.pathfinder._utils.toolkit_info import read_cuda_header_version +from local_helpers import ( + locate_real_cuda_toolkit_version_from_cuda_h, + require_real_cuda_toolkit_version_from_cuda_h, +) STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_COMPATIBILITY_GUARD_RAILS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") @@ -35,6 +45,21 @@ def _default_process_wide_guard_rails_mode(monkeypatch): monkeypatch.delenv(COMPATIBILITY_GUARD_RAILS_ENV_VAR, raising=False) +@pytest.fixture +def clear_real_cuda_h_probe_caches(): + locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() + locate_nvidia_header_directory_raw.cache_clear() + _resolve_system_loaded_abs_path_in_subprocess.cache_clear() + get_cuda_path_or_home.cache_clear() + read_cuda_header_version.cache_clear() + yield + locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() + locate_nvidia_header_directory_raw.cache_clear() + _resolve_system_loaded_abs_path_in_subprocess.cache_clear() + get_cuda_path_or_home.cache_clear() + read_cuda_header_version.cache_clear() + + def _write_cuda_h( ctk_root: Path, toolkit_version: str, @@ -653,10 +678,16 @@ def test_find_nvidia_header_directory_returns_none_when_unresolved(monkeypatch): assert guard_rails.find_nvidia_header_directory("nvrtc") is None +@pytest.mark.usefixtures("clear_real_cuda_h_probe_caches") def test_real_wheel_ctk_items_are_compatible(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + info_summary_append( + f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " + f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" + ) guard_rails = CompatibilityGuardRails( - ctk_major=13, - ctk_minor=2, + ctk_major=real_ctk.version.major, + ctk_minor=real_ctk.version.minor, driver_cuda_version=_driver_cuda_version(13000), ) @@ -691,10 +722,16 @@ def test_real_wheel_ctk_items_are_compatible(info_summary_append): _assert_real_ctk_backed_path(path) +@pytest.mark.usefixtures("clear_real_cuda_h_probe_caches") def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + info_summary_append( + f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " + f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" + ) guard_rails = CompatibilityGuardRails( - ctk_major=13, - ctk_minor=2, + ctk_major=real_ctk.version.major, + ctk_minor=real_ctk.version.minor, driver_cuda_version=_driver_cuda_version(13000), ) From 8900cd2f101fe1beac56dfee49712190b42d60dd Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 26 Apr 2026 17:03:06 -0700 Subject: [PATCH 11/17] Make real guard-rails tests query the driver version. This keeps the host-backed compatibility checks aligned with the actual driver state instead of relying on a fixed encoded version in the real-environment tests. Made-with: Cursor --- cuda_pathfinder/tests/local_helpers.py | 9 +++++++ .../tests/test_compatibility_guard_rails.py | 26 +++++++++++++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/cuda_pathfinder/tests/local_helpers.py b/cuda_pathfinder/tests/local_helpers.py index a67d3e2580c..bfcfbe207c9 100644 --- a/cuda_pathfinder/tests/local_helpers.py +++ b/cuda_pathfinder/tests/local_helpers.py @@ -12,6 +12,7 @@ from cuda.pathfinder._headers.find_nvidia_headers import ( locate_nvidia_header_directory as locate_nvidia_header_directory_raw, ) +from cuda.pathfinder._utils import driver_info from cuda.pathfinder._utils.toolkit_info import CudaToolkitVersion, read_cuda_header_version @@ -68,3 +69,11 @@ def require_real_cuda_toolkit_version_from_cuda_h() -> LocatedRealCudaToolkitVer header_dir=located.abs_path, found_via=located.found_via, ) + + +def require_real_driver_cuda_version() -> driver_info.DriverCudaVersion: + """Return the real-host CUDA driver version or skip if it cannot be queried.""" + try: + return driver_info.query_driver_cuda_version() + except driver_info.QueryDriverCudaVersionError as exc: + pytest.skip(f"Could not query the CUDA driver version for a real driver installation: {exc}") diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index ff941dea12d..fa841d4a83a 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -27,10 +27,12 @@ locate_nvidia_header_directory as locate_nvidia_header_directory_raw, ) from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home +from cuda.pathfinder._utils import driver_info from cuda.pathfinder._utils.driver_info import DriverCudaVersion, QueryDriverCudaVersionError from cuda.pathfinder._utils.toolkit_info import read_cuda_header_version from local_helpers import ( locate_real_cuda_toolkit_version_from_cuda_h, + require_real_driver_cuda_version, require_real_cuda_toolkit_version_from_cuda_h, ) @@ -46,18 +48,22 @@ def _default_process_wide_guard_rails_mode(monkeypatch): @pytest.fixture -def clear_real_cuda_h_probe_caches(): +def clear_real_host_probe_caches(): locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() locate_nvidia_header_directory_raw.cache_clear() _resolve_system_loaded_abs_path_in_subprocess.cache_clear() get_cuda_path_or_home.cache_clear() read_cuda_header_version.cache_clear() + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_cuda_version.cache_clear() yield locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() locate_nvidia_header_directory_raw.cache_clear() _resolve_system_loaded_abs_path_in_subprocess.cache_clear() get_cuda_path_or_home.cache_clear() read_cuda_header_version.cache_clear() + driver_info._load_nvidia_dynamic_lib.cache_clear() + driver_info.query_driver_cuda_version.cache_clear() def _write_cuda_h( @@ -678,17 +684,22 @@ def test_find_nvidia_header_directory_returns_none_when_unresolved(monkeypatch): assert guard_rails.find_nvidia_header_directory("nvrtc") is None -@pytest.mark.usefixtures("clear_real_cuda_h_probe_caches") +@pytest.mark.usefixtures("clear_real_host_probe_caches") def test_real_wheel_ctk_items_are_compatible(info_summary_append): real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + real_driver = require_real_driver_cuda_version() info_summary_append( f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" ) + info_summary_append( + "real driver CUDA version=" + f"{real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + ) guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, ctk_minor=real_ctk.version.minor, - driver_cuda_version=_driver_cuda_version(13000), + driver_cuda_version=real_driver, ) try: @@ -722,17 +733,22 @@ def test_real_wheel_ctk_items_are_compatible(info_summary_append): _assert_real_ctk_backed_path(path) -@pytest.mark.usefixtures("clear_real_cuda_h_probe_caches") +@pytest.mark.usefixtures("clear_real_host_probe_caches") def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_append): real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + real_driver = require_real_driver_cuda_version() info_summary_append( f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" ) + info_summary_append( + "real driver CUDA version=" + f"{real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + ) guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, ctk_minor=real_ctk.version.minor, - driver_cuda_version=_driver_cuda_version(13000), + driver_cuda_version=real_driver, ) try: From 583af91040be3c549570555a717d7cfda9775162 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 26 Apr 2026 17:11:03 -0700 Subject: [PATCH 12/17] pre-commit fixes --- .../tests/test_compatibility_guard_rails.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index fa841d4a83a..1aa82ad1329 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -6,6 +6,11 @@ from pathlib import Path import pytest +from local_helpers import ( + locate_real_cuda_toolkit_version_from_cuda_h, + require_real_cuda_toolkit_version_from_cuda_h, + require_real_driver_cuda_version, +) import cuda.pathfinder._compatibility_guard_rails as compatibility_module from cuda import pathfinder @@ -26,15 +31,10 @@ from cuda.pathfinder._headers.find_nvidia_headers import ( locate_nvidia_header_directory as locate_nvidia_header_directory_raw, ) -from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home from cuda.pathfinder._utils import driver_info from cuda.pathfinder._utils.driver_info import DriverCudaVersion, QueryDriverCudaVersionError +from cuda.pathfinder._utils.env_vars import get_cuda_path_or_home from cuda.pathfinder._utils.toolkit_info import read_cuda_header_version -from local_helpers import ( - locate_real_cuda_toolkit_version_from_cuda_h, - require_real_driver_cuda_version, - require_real_cuda_toolkit_version_from_cuda_h, -) STRICTNESS = os.environ.get("CUDA_PATHFINDER_TEST_COMPATIBILITY_GUARD_RAILS_STRICTNESS", "see_what_works") assert STRICTNESS in ("see_what_works", "all_must_work") @@ -693,8 +693,7 @@ def test_real_wheel_ctk_items_are_compatible(info_summary_append): f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" ) info_summary_append( - "real driver CUDA version=" - f"{real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" ) guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, @@ -742,8 +741,7 @@ def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_ap f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" ) info_summary_append( - "real driver CUDA version=" - f"{real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" ) guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, From f13c754b476a52dbeb30a296d45252bb8d4c6ccb Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sun, 26 Apr 2026 20:31:17 -0700 Subject: [PATCH 13/17] Adjust real guard-rails tests for cu12 nvcc wheels. This keeps the real host-backed checks strict when an installed nvcc wheel actually ships nvcc, while avoiding a false failure in cu12 wheel environments that only provide lower-level compiler pieces such as ptxas. Made-with: Cursor --- .../tests/test_compatibility_guard_rails.py | 60 ++++++++++--------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/cuda_pathfinder/tests/test_compatibility_guard_rails.py b/cuda_pathfinder/tests/test_compatibility_guard_rails.py index 1aa82ad1329..672ba422ca1 100644 --- a/cuda_pathfinder/tests/test_compatibility_guard_rails.py +++ b/cuda_pathfinder/tests/test_compatibility_guard_rails.py @@ -7,6 +7,7 @@ import pytest from local_helpers import ( + have_distribution, locate_real_cuda_toolkit_version_from_cuda_h, require_real_cuda_toolkit_version_from_cuda_h, require_real_driver_cuda_version, @@ -49,6 +50,7 @@ def _default_process_wide_guard_rails_mode(monkeypatch): @pytest.fixture def clear_real_host_probe_caches(): + have_distribution.cache_clear() locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() locate_nvidia_header_directory_raw.cache_clear() _resolve_system_loaded_abs_path_in_subprocess.cache_clear() @@ -57,6 +59,7 @@ def clear_real_host_probe_caches(): driver_info._load_nvidia_dynamic_lib.cache_clear() driver_info.query_driver_cuda_version.cache_clear() yield + have_distribution.cache_clear() locate_real_cuda_toolkit_version_from_cuda_h.cache_clear() locate_nvidia_header_directory_raw.cache_clear() _resolve_system_loaded_abs_path_in_subprocess.cache_clear() @@ -685,16 +688,26 @@ def test_find_nvidia_header_directory_returns_none_when_unresolved(monkeypatch): @pytest.mark.usefixtures("clear_real_host_probe_caches") -def test_real_wheel_ctk_items_are_compatible(info_summary_append): - real_ctk = require_real_cuda_toolkit_version_from_cuda_h() +def test_real_driver(info_summary_append): real_driver = require_real_driver_cuda_version() info_summary_append( - f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " - f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" + f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" ) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_ctk(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() info_summary_append( - f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" + f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " + f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" ) + + +@pytest.mark.usefixtures("clear_real_host_probe_caches") +def test_real_wheel_ctk_items_are_compatible(info_summary_append): + real_ctk = require_real_cuda_toolkit_version_from_cuda_h() + real_driver = require_real_driver_cuda_version() guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, ctk_minor=real_ctk.version.minor, @@ -716,33 +729,29 @@ def test_real_wheel_ctk_items_are_compatible(info_summary_append): ) as exc: if STRICTNESS == "all_must_work": raise - info_summary_append(f"real CTK check unavailable: {exc.__class__.__name__}: {exc}") - return - - info_summary_append(f"nvrtc={loaded.abs_path!r}") - info_summary_append(f"nvrtc_headers={header_dir!r}") - info_summary_append(f"cudadevrt={static_lib!r}") - info_summary_append(f"libdevice={bitcode_lib!r}") - info_summary_append(f"nvcc={nvcc!r}") + pytest.skip(f"real CTK check unavailable: {exc.__class__.__name__}: {exc}") assert isinstance(loaded.abs_path, str) assert header_dir is not None - assert nvcc is not None - for path in (loaded.abs_path, header_dir, static_lib, bitcode_lib, nvcc): + for path in (loaded.abs_path, header_dir, static_lib, bitcode_lib): _assert_real_ctk_backed_path(path) + if have_distribution(r"^nvidia-cuda-nvcc-cu12$"): + # For CUDA 12, NVIDIA publishes a PyPI package named nvidia-cuda-nvcc-cu12, + # but the wheels only contain nvcc-adjacent compiler components such as + # ptxas, CRT headers, libnvvm, and libdevice; the nvcc executable itself + # is not included. + if nvcc is not None: + # nvcc found elsewhere, e.g. /usr/local or Conda. + _assert_real_ctk_backed_path(nvcc) + else: + assert nvcc is not None + _assert_real_ctk_backed_path(nvcc) @pytest.mark.usefixtures("clear_real_host_probe_caches") def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_append): real_ctk = require_real_cuda_toolkit_version_from_cuda_h() real_driver = require_real_driver_cuda_version() - info_summary_append( - f"real cuda.h CTK version={real_ctk.version.major}.{real_ctk.version.minor} " - f"via {real_ctk.found_via} at {real_ctk.cuda_h_path!r}" - ) - info_summary_append( - f"real driver CUDA version={real_driver.major}.{real_driver.minor} (encoded={real_driver.encoded})" - ) guard_rails = CompatibilityGuardRails( ctk_major=real_ctk.version.major, ctk_minor=real_ctk.version.minor, @@ -754,14 +763,11 @@ def test_real_wheel_component_version_does_not_override_ctk_line(info_summary_ap except (CompatibilityCheckError, CompatibilityInsufficientMetadataError) as exc: if STRICTNESS == "all_must_work": raise - info_summary_append(f"real cufft CTK check unavailable: {exc.__class__.__name__}: {exc}") - return + pytest.skip(f"real cufft CTK check unavailable: {exc.__class__.__name__}: {exc}") if header_dir is None: if STRICTNESS == "all_must_work": raise AssertionError("Expected CTK-backed cufft headers to be discoverable.") - info_summary_append("real cufft CTK check unavailable: cufft headers not found") - return + pytest.skip("real cufft CTK check unavailable: cufft headers not found") - info_summary_append(f"cufft_headers={header_dir!r}") _assert_real_ctk_backed_path(header_dir) From 47e0edc5f6cc713314b5bb7493ee3e8747c40274 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Apr 2026 14:25:59 -0700 Subject: [PATCH 14/17] Align pathfinder conda setup scripts with test groups Keep the ad hoc conda environment helpers aligned with the current cuda_pathfinder test dependency groups so local verification matches the package's test matrix. Split the PowerShell install list from Linux-only dependencies to avoid pulling unsupported packages on Windows. Made-with: Cursor --- .../conda_create_for_pathfinder_testing.ps1 | 17 ++++--- .../conda_create_for_pathfinder_testing.sh | 48 ++++++++++++++----- 2 files changed, 45 insertions(+), 20 deletions(-) diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 index 115720f6e5e..fa890200704 100644 --- a/toolshed/conda_create_for_pathfinder_testing.ps1 +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -7,24 +7,27 @@ param( ) $ErrorActionPreference = "Stop" +Set-StrictMode -Version Latest & "$env:CONDA_EXE" "shell.powershell" "hook" | Out-String | Invoke-Expression -conda create --yes -n "pathfinder_testing_cu$CudaVersion" python=3.13 "cuda-toolkit=$CudaVersion" +conda create --yes -n "pathfinder_testing_cu$CudaVersion" python=3.14 "cuda-toolkit=$CudaVersion" conda activate "pathfinder_testing_cu$CudaVersion" $cpkgs = @( + "pytest>=6.2.4", + "pytest-mock", + "pytest-repeat", + "pytest-randomly", "cusparselt-dev", "cutensor", - "libcublasmp-dev", + "cutlass", "libcudss-dev", - "libcufftmp-dev", - "libmathdx-dev", - "libnvshmem3", - "libnvshmem-dev", - "libnvpl-fft-dev" + "libmathdx-dev" ) +# Keep the PowerShell environment aligned with the Windows-relevant +# cuda_pathfinder dependency groups; Linux-only deps stay in the .sh script. foreach ($cpkg in $cpkgs) { Write-Host "CONDA INSTALL: $cpkg" conda install -y -c conda-forge $cpkg diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh index 1ed57e6765b..3c1ec694a8f 100755 --- a/toolshed/conda_create_for_pathfinder_testing.sh +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -3,26 +3,48 @@ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +set -euo pipefail + if [[ $# -ne 1 ]]; then echo "Usage: $(basename "$0") ctk-major-minor-patch" 1>&2 exit 1 fi +cuda_version="$1" +cuda_major="${cuda_version%%.*}" +uname_m="$(uname -m)" + eval "$(conda shell.bash hook)" -conda create --yes -n "pathfinder_testing_cu$1" python=3.13 cuda-toolkit="$1" -conda activate "pathfinder_testing_cu$1" - -for cpkg in \ - cusparselt-dev \ - cutensor \ - libcublasmp-dev \ - libcudss-dev \ - libcufftmp-dev \ - libmathdx-dev \ - libnvshmem3 \ - libnvshmem-dev \ - libnvpl-fft-dev; do +conda create --yes -n "pathfinder_testing_cu$cuda_version" python=3.14 cuda-toolkit="$cuda_version" +conda activate "pathfinder_testing_cu$cuda_version" + +cpkgs=( + "pytest>=6.2.4" + "pytest-mock" + "pytest-repeat" + "pytest-randomly" + "cusparselt-dev" + "cutensor" + "cutlass" + "libcublasmp-dev" + "libcudss-dev" + "libcufftmp-dev" + "libcusolvermp-dev" + "libmathdx-dev" + "libnvshmem3" + "libnvshmem-dev" +) + +# Keep the conda environment aligned with platform-scoped pyproject groups. +if [[ "$uname_m" == "aarch64" ]]; then + cpkgs+=("libnvpl-fft-dev") + if [[ "$cuda_major" == "13" ]]; then + cpkgs+=("libcudla-dev") + fi +fi + +for cpkg in "${cpkgs[@]}"; do echo "CONDA INSTALL: $cpkg" conda install -y -c conda-forge "$cpkg" done From 147f1131448d6795cae0da71aa94903f50414f48 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Apr 2026 15:25:52 -0700 Subject: [PATCH 15/17] Add set +u / set -u pairs around conda activate, conda install to avoid unset issues in conda scripts --- toolshed/conda_create_for_pathfinder_testing.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh index 3c1ec694a8f..077b265f916 100755 --- a/toolshed/conda_create_for_pathfinder_testing.sh +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -17,7 +17,9 @@ uname_m="$(uname -m)" eval "$(conda shell.bash hook)" conda create --yes -n "pathfinder_testing_cu$cuda_version" python=3.14 cuda-toolkit="$cuda_version" +set +u conda activate "pathfinder_testing_cu$cuda_version" +set -u cpkgs=( "pytest>=6.2.4" @@ -46,5 +48,7 @@ fi for cpkg in "${cpkgs[@]}"; do echo "CONDA INSTALL: $cpkg" + set +u conda install -y -c conda-forge "$cpkg" + set -u done From 11be7c95079f5efecd77c5585635740d79e4166d Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Apr 2026 15:58:33 -0700 Subject: [PATCH 16/17] Adjust pathfinder conda setup scripts for CTK-specific Python pins Keep the test environment helpers working across CUDA 12 and 13 by matching conda-forge solver constraints, so CUDA 12 uses Python 3.12 while CUDA 13 stays on Python 3.14. Made-with: Cursor --- toolshed/conda_create_for_pathfinder_testing.ps1 | 11 ++++++++++- toolshed/conda_create_for_pathfinder_testing.sh | 15 ++++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 index fa890200704..efd0ac3afff 100644 --- a/toolshed/conda_create_for_pathfinder_testing.ps1 +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -9,9 +9,18 @@ param( $ErrorActionPreference = "Stop" Set-StrictMode -Version Latest +$cudaMajor = $CudaVersion.Split(".", 2)[0] +switch ($cudaMajor) { + "12" { $pythonVersion = "3.12" } + "13" { $pythonVersion = "3.14" } + default { + throw "Unsupported CUDA major version for this helper: $cudaMajor. Expected a 12.x or 13.x toolkit version." + } +} + & "$env:CONDA_EXE" "shell.powershell" "hook" | Out-String | Invoke-Expression -conda create --yes -n "pathfinder_testing_cu$CudaVersion" python=3.14 "cuda-toolkit=$CudaVersion" +conda create --yes -n "pathfinder_testing_cu$CudaVersion" "python=$pythonVersion" "cuda-toolkit=$CudaVersion" conda activate "pathfinder_testing_cu$CudaVersion" $cpkgs = @( diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh index 077b265f916..ffd69f31f95 100755 --- a/toolshed/conda_create_for_pathfinder_testing.sh +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -13,10 +13,23 @@ fi cuda_version="$1" cuda_major="${cuda_version%%.*}" uname_m="$(uname -m)" +case "$cuda_major" in + 12) + python_version=3.12 + ;; + 13) + python_version=3.14 + ;; + *) + echo "Unsupported CUDA major version for this helper: $cuda_major" 1>&2 + echo "Expected a 12.x or 13.x toolkit version." 1>&2 + exit 1 + ;; +esac eval "$(conda shell.bash hook)" -conda create --yes -n "pathfinder_testing_cu$cuda_version" python=3.14 cuda-toolkit="$cuda_version" +conda create --yes -n "pathfinder_testing_cu$cuda_version" "python=$python_version" cuda-toolkit="$cuda_version" set +u conda activate "pathfinder_testing_cu$cuda_version" set -u From 4c953528fb7ecd6754ab3423bb5f65f352e15b53 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Mon, 27 Apr 2026 21:09:44 -0700 Subject: [PATCH 17/17] Simplify pathfinder conda setup scripts. Keep the helper scripts focused on the CUDA packages they need from conda while `pip install --group test` owns the pytest dependencies, so the ad hoc environment setup stays easier to keep in sync with pyproject markers. Made-with: Cursor --- toolshed/conda_create_for_pathfinder_testing.ps1 | 8 ++------ toolshed/conda_create_for_pathfinder_testing.sh | 6 ++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/toolshed/conda_create_for_pathfinder_testing.ps1 b/toolshed/conda_create_for_pathfinder_testing.ps1 index efd0ac3afff..1c0b2999ffa 100644 --- a/toolshed/conda_create_for_pathfinder_testing.ps1 +++ b/toolshed/conda_create_for_pathfinder_testing.ps1 @@ -23,11 +23,9 @@ switch ($cudaMajor) { conda create --yes -n "pathfinder_testing_cu$CudaVersion" "python=$pythonVersion" "cuda-toolkit=$CudaVersion" conda activate "pathfinder_testing_cu$CudaVersion" +# Keep this list aligned with the Windows-installable subset of +# cuda_pathfinder/pyproject.toml. $cpkgs = @( - "pytest>=6.2.4", - "pytest-mock", - "pytest-repeat", - "pytest-randomly", "cusparselt-dev", "cutensor", "cutlass", @@ -35,8 +33,6 @@ $cpkgs = @( "libmathdx-dev" ) -# Keep the PowerShell environment aligned with the Windows-relevant -# cuda_pathfinder dependency groups; Linux-only deps stay in the .sh script. foreach ($cpkg in $cpkgs) { Write-Host "CONDA INSTALL: $cpkg" conda install -y -c conda-forge $cpkg diff --git a/toolshed/conda_create_for_pathfinder_testing.sh b/toolshed/conda_create_for_pathfinder_testing.sh index ffd69f31f95..8674bb1ed03 100755 --- a/toolshed/conda_create_for_pathfinder_testing.sh +++ b/toolshed/conda_create_for_pathfinder_testing.sh @@ -34,11 +34,9 @@ set +u conda activate "pathfinder_testing_cu$cuda_version" set -u +# Keep this list aligned with the Linux-installable subset of +# cuda_pathfinder/pyproject.toml. cpkgs=( - "pytest>=6.2.4" - "pytest-mock" - "pytest-repeat" - "pytest-randomly" "cusparselt-dev" "cutensor" "cutlass"