Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ jobs:
timeout-minutes: 60
env:
TOX_SKIP_MISSING_INTERPRETERS: False
VIRTUALENV_SYSTEM_SITE_PACKAGES: ${{ matrix.test_mypyc && 1 || 0 }}
# Rich (pip) -- Disable color for windows + pytest
FORCE_COLOR: ${{ !(startsWith(matrix.os, 'windows-') && startsWith(matrix.toxenv, 'py')) && 1 || 0 }}
# Tox
Expand Down Expand Up @@ -209,8 +210,10 @@ jobs:

- name: Compiled with mypyc
if: ${{ matrix.test_mypyc }}
# Use local version of librt during self-compilation in tests.
run: |
pip install -r test-requirements.txt
pip install -U mypyc/lib-rt
CC=clang MYPYC_OPT_LEVEL=0 MYPY_USE_MYPYC=1 pip install -e .

- name: Setup tox environment
Expand Down
2 changes: 2 additions & 0 deletions mypy/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Always use this type alias to refer to type tags.
Tag = u8

# Note: all tags should be kept in sync with lib-rt/internal/librt_internal.c.
# Primitives.
LITERAL_FALSE: Final[Tag] = 0
LITERAL_TRUE: Final[Tag] = 1
Expand All @@ -264,6 +265,7 @@ def read(cls, data: ReadBuffer, data_file: str) -> CacheMeta | None:
# Four integers representing source file (line, column) range.
LOCATION: Final[Tag] = 152

RESERVED: Final[Tag] = 254
END_TAG: Final[Tag] = 255


Expand Down
20 changes: 15 additions & 5 deletions mypy/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4930,7 +4930,20 @@ def read(cls, data: ReadBuffer) -> SymbolTableNode:
sym.plugin_generated = read_bool(data)
cross_ref = read_str_opt(data)
if cross_ref is None:
sym.node = read_symbol(data)
tag = read_tag(data)
if tag == TYPE_INFO:
sym.node = TypeInfo.read(data)
else:
# This logic is temporary, to make sure we don't introduce
# regressions until we have proper lazy deserialization.
# It has negligible performance impact.
try:
from librt.internal import extract_symbol
except ImportError:
sym.node = read_symbol(data, tag)
else:
node_bytes = extract_symbol(data)
sym.node = read_symbol(ReadBuffer(node_bytes), tag)
else:
sym.cross_ref = cross_ref
assert read_tag(data) == END_TAG
Expand Down Expand Up @@ -5333,17 +5346,14 @@ def local_definitions(
TSTRING_EXPR: Final[Tag] = 229


def read_symbol(data: ReadBuffer) -> SymbolNode:
tag = read_tag(data)
def read_symbol(data: ReadBuffer, tag: Tag) -> SymbolNode:
# The branches here are ordered manually by type "popularity".
if tag == VAR:
return Var.read(data)
if tag == FUNC_DEF:
return FuncDef.read(data)
if tag == DECORATOR:
return Decorator.read(data)
if tag == TYPE_INFO:
return TypeInfo.read(data)
if tag == OVERLOADED_FUNC_DEF:
return OverloadedFuncDef.read(data)
if tag == TYPE_VAR_EXPR:
Expand Down
1 change: 1 addition & 0 deletions mypy/typeshed/stubs/librt/librt/internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ def read_int(data: ReadBuffer, /) -> int: ...
def write_tag(data: WriteBuffer, value: u8, /) -> None: ...
def read_tag(data: ReadBuffer, /) -> u8: ...
def cache_version() -> u8: ...
def extract_symbol(data: ReadBuffer, /) -> bytes: ...
269 changes: 269 additions & 0 deletions mypyc/lib-rt/internal/librt_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,273 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) {
return Py_None;
}

// All tags must be kept in sync with cache.py, nodes.py, and types.py.
// Primitive types.
#define LITERAL_FALSE 0
#define LITERAL_TRUE 1
#define LITERAL_NONE 2
#define LITERAL_INT 3
#define LITERAL_STR 4
#define LITERAL_BYTES 5
#define LITERAL_FLOAT 6
#define LITERAL_COMPLEX 7

// Supported builtin collections.
#define LIST_GEN 20
#define LIST_INT 21
#define LIST_STR 22
#define LIST_BYTES 23
#define TUPLE_GEN 24
#define DICT_STR_GEN 30

// This is the smallest custom class tag.
#define MYPY_FILE 50

// Instance class has special formats.
#define INSTANCE 80
#define INSTANCE_SIMPLE 81
#define INSTANCE_GENERIC 82
#define INSTANCE_STR 83
#define INSTANCE_FUNCTION 84
#define INSTANCE_INT 85
#define INSTANCE_BOOL 86
#define INSTANCE_OBJECT 87

#define RESERVED 254
#define END_TAG 255

// Forward declaration.
static char _skip_object(PyObject *data, uint8_t tag);

static inline char
_skip(PyObject *data, Py_ssize_t size) {
// We are careful about error conditions, so all
// _skip_xxx() functions can return an error value.
_CHECK_READ(data, size, CPY_NONE_ERROR)
((ReadBufferObject *)data)->ptr += size;
return CPY_NONE;
}

static inline char
_skip_short_int(PyObject *data, uint8_t first) {
if ((first & TWO_BYTES_INT_BIT) == 0)
return CPY_NONE;
if ((first & FOUR_BYTES_INT_BIT) == 0)
return _skip(data, 1);
return _skip(data, 3);
}

static inline char
_skip_int(PyObject *data) {
_CHECK_READ(data, 1, CPY_NONE_ERROR)

uint8_t first;
_READ(&first, data, uint8_t);
if (likely(first != LONG_INT_TRAILER)) {
return _skip_short_int(data, first);
}

_CHECK_READ(data, 1, CPY_NONE_ERROR)
_READ(&first, data, uint8_t);
Py_ssize_t size_and_sign = _read_short_int(data, first);
if (size_and_sign == CPY_INT_TAG)
return CPY_NONE_ERROR;
if ((Py_ssize_t)size_and_sign < 0) {
PyErr_SetString(PyExc_ValueError, "invalid int data");
return CPY_NONE_ERROR;
}
Py_ssize_t size = size_and_sign >> 2;
return _skip(data, size);
}

// This is essentially a wrapper around _read_short_int() that makes
// sure the result is valid.
static inline Py_ssize_t
_read_size(PyObject *data) {
_CHECK_READ(data, 1, -1)
uint8_t first;
_READ(&first, data, uint8_t);
// We actually allow serializing lists/dicts with over 4 billion items,
// but we don't really need to, fail with ValueError just in case.
if (unlikely(first == LONG_INT_TRAILER)) {
PyErr_SetString(PyExc_ValueError, "unsupported size");
return -1;
}
CPyTagged tagged_size = _read_short_int(data, first);
if (tagged_size == CPY_INT_TAG)
return -1;
if ((Py_ssize_t)tagged_size < 0) {
PyErr_SetString(PyExc_ValueError, "invalid size");
return -1;
}
Py_ssize_t size = tagged_size >> 1;
return size;
}

static inline char
_skip_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
return _skip(data, size);
}

// List/dict logic should be kept in sync with mypy/cache.py
static inline char
_skip_list_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this have type Py_ssize_t? (Here and other similar functions.)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I guess I just copied this from somewhere and all test passed. I will check what is the best cross-platform type here.

for (i = 0; i < size; i++) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_int(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_int(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_list_str_bytes(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

static inline char
_skip_dict_str_gen(PyObject *data) {
Py_ssize_t size = _read_size(data);
if (size < 0)
return CPY_NONE_ERROR;
int i;
for (i = 0; i < size; i++) {
// Bare key followed by tagged value.
if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR))
return CPY_NONE_ERROR;
}
return CPY_NONE;
}

// Similar to mypy/cache.py, the convention is that the caller reads
// the opening tag for custom classes.
static inline char
_skip_class(PyObject *data) {
while (1) {
uint8_t tag = read_tag_internal(data);
if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (tag == END_TAG) {
return CPY_NONE;
}
if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) {
return CPY_NONE_ERROR;
}
}
}

// Instance has special compact layout (as an important optimization).
static inline char
_skip_instance(PyObject *data) {
uint8_t second_tag = read_tag_internal(data);
if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) {
return CPY_NONE_ERROR;
}
if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) {
return CPY_NONE;
}
if (second_tag == INSTANCE_SIMPLE) {
return _skip_str_bytes(data);
}
if (second_tag == INSTANCE_GENERIC) {
return _skip_class(data);
}
PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag);
return CPY_NONE_ERROR;
}

// This is the main dispatch point. Branches are ordered manually
// based roughly on frequency in self-check.
static char
_skip_object(PyObject *data, uint8_t tag) {
if (tag == LITERAL_STR || tag == LITERAL_BYTES)
return _skip_str_bytes(data);
if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE)
return CPY_NONE;
if (tag == LIST_GEN || tag == TUPLE_GEN)
return _skip_list_gen(data);
if (tag == LITERAL_INT)
return _skip_int(data);
if (tag == INSTANCE)
return _skip_instance(data);
if (tag > MYPY_FILE && tag < RESERVED)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be >=?

Copy link
Copy Markdown
Member Author

@ilevkivskyi ilevkivskyi Apr 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is actually intentional, a little sanity check. We know that we should only read symbol nodes using this function, and MypyFile should be always handled using cross_ref in cache, and never appear explicitly in symbol tables.

return _skip_class(data);
if (tag == LIST_INT)
return _skip_list_int(data);
if (tag == LIST_STR || tag == LIST_BYTES)
return _skip_list_str_bytes(data);
if (tag == DICT_STR_GEN)
return _skip_dict_str_gen(data);
if (tag == LITERAL_FLOAT)
return _skip(data, 8);
if (tag == LITERAL_COMPLEX)
return _skip(data, 16);
PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag);
return CPY_NONE_ERROR;
}

static PyObject*
extract_symbol_internal(PyObject *data) {
char *ptr = ((ReadBufferObject *)data)->ptr;
if (unlikely(_skip_class(data) == CPY_NONE_ERROR))
return NULL;
Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr;
PyObject *res = PyBytes_FromStringAndSize(ptr, size);
if (unlikely(res == NULL))
return NULL;
return res;
}

static PyObject*
extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) {
if (unlikely(nargs != 1)) {
PyErr_Format(PyExc_TypeError,
"extract_symbol() takes exactly 1 argument (%zu given)", nargs);
return NULL;
}
PyObject *data = args[0];
_CHECK_READ_BUFFER(data, NULL)
return extract_symbol_internal(data);
}

static uint8_t
cache_version_internal(void) {
return 0;
Expand Down Expand Up @@ -954,6 +1221,7 @@ static PyMethodDef librt_internal_module_methods[] = {
{"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")},
{"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")},
{"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")},
{"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")},
{NULL, NULL, 0, NULL}
};

Expand Down Expand Up @@ -1005,6 +1273,7 @@ librt_internal_module_exec(PyObject *m)
(void *)ReadBuffer_type_internal,
(void *)WriteBuffer_type_internal,
(void *)NativeInternal_API_Version,
(void *)extract_symbol_internal
};
PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL);
if (PyModule_Add(m, "_C_API", c_api_object) < 0) {
Expand Down
Loading
Loading