-
-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Add librt functionality for lazy deserialization #21158
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
331d4b7
e584af9
3fead33
ac2b67c
88ab4e6
c336e7a
ac16d19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -920,6 +920,273 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) { | |
| return Py_None; | ||
| } | ||
|
|
||
| // All tags must be kept in sync with cache.py, nodes.py, and types.py. | ||
| // Primitive types. | ||
| #define LITERAL_FALSE 0 | ||
| #define LITERAL_TRUE 1 | ||
| #define LITERAL_NONE 2 | ||
| #define LITERAL_INT 3 | ||
| #define LITERAL_STR 4 | ||
| #define LITERAL_BYTES 5 | ||
| #define LITERAL_FLOAT 6 | ||
| #define LITERAL_COMPLEX 7 | ||
|
|
||
| // Supported builtin collections. | ||
| #define LIST_GEN 20 | ||
| #define LIST_INT 21 | ||
| #define LIST_STR 22 | ||
| #define LIST_BYTES 23 | ||
| #define TUPLE_GEN 24 | ||
| #define DICT_STR_GEN 30 | ||
|
|
||
| // This is the smallest custom class tag. | ||
| #define MYPY_FILE 50 | ||
|
|
||
| // Instance class has special formats. | ||
| #define INSTANCE 80 | ||
| #define INSTANCE_SIMPLE 81 | ||
| #define INSTANCE_GENERIC 82 | ||
| #define INSTANCE_STR 83 | ||
| #define INSTANCE_FUNCTION 84 | ||
| #define INSTANCE_INT 85 | ||
| #define INSTANCE_BOOL 86 | ||
| #define INSTANCE_OBJECT 87 | ||
|
|
||
| #define RESERVED 254 | ||
| #define END_TAG 255 | ||
|
|
||
| // Forward declaration. | ||
| static char _skip_object(PyObject *data, uint8_t tag); | ||
|
|
||
| static inline char | ||
| _skip(PyObject *data, Py_ssize_t size) { | ||
| // We are careful about error conditions, so all | ||
| // _skip_xxx() functions can return an error value. | ||
| _CHECK_READ(data, size, CPY_NONE_ERROR) | ||
| ((ReadBufferObject *)data)->ptr += size; | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_short_int(PyObject *data, uint8_t first) { | ||
| if ((first & TWO_BYTES_INT_BIT) == 0) | ||
| return CPY_NONE; | ||
| if ((first & FOUR_BYTES_INT_BIT) == 0) | ||
| return _skip(data, 1); | ||
| return _skip(data, 3); | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_int(PyObject *data) { | ||
| _CHECK_READ(data, 1, CPY_NONE_ERROR) | ||
|
|
||
| uint8_t first; | ||
| _READ(&first, data, uint8_t); | ||
| if (likely(first != LONG_INT_TRAILER)) { | ||
| return _skip_short_int(data, first); | ||
| } | ||
|
|
||
| _CHECK_READ(data, 1, CPY_NONE_ERROR) | ||
| _READ(&first, data, uint8_t); | ||
| Py_ssize_t size_and_sign = _read_short_int(data, first); | ||
| if (size_and_sign == CPY_INT_TAG) | ||
| return CPY_NONE_ERROR; | ||
| if ((Py_ssize_t)size_and_sign < 0) { | ||
| PyErr_SetString(PyExc_ValueError, "invalid int data"); | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| Py_ssize_t size = size_and_sign >> 2; | ||
| return _skip(data, size); | ||
| } | ||
|
|
||
| // This is essentially a wrapper around _read_short_int() that makes | ||
| // sure the result is valid. | ||
| static inline Py_ssize_t | ||
| _read_size(PyObject *data) { | ||
| _CHECK_READ(data, 1, -1) | ||
| uint8_t first; | ||
| _READ(&first, data, uint8_t); | ||
| // We actually allow serializing lists/dicts with over 4 billion items, | ||
| // but we don't really need to, fail with ValueError just in case. | ||
| if (unlikely(first == LONG_INT_TRAILER)) { | ||
| PyErr_SetString(PyExc_ValueError, "unsupported size"); | ||
| return -1; | ||
| } | ||
| CPyTagged tagged_size = _read_short_int(data, first); | ||
| if (tagged_size == CPY_INT_TAG) | ||
| return -1; | ||
| if ((Py_ssize_t)tagged_size < 0) { | ||
| PyErr_SetString(PyExc_ValueError, "invalid size"); | ||
| return -1; | ||
| } | ||
| Py_ssize_t size = tagged_size >> 1; | ||
| return size; | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_str_bytes(PyObject *data) { | ||
| Py_ssize_t size = _read_size(data); | ||
| if (size < 0) | ||
| return CPY_NONE_ERROR; | ||
| return _skip(data, size); | ||
| } | ||
|
|
||
| // List/dict logic should be kept in sync with mypy/cache.py | ||
| static inline char | ||
| _skip_list_gen(PyObject *data) { | ||
| Py_ssize_t size = _read_size(data); | ||
| if (size < 0) | ||
| return CPY_NONE_ERROR; | ||
| int i; | ||
| for (i = 0; i < size; i++) { | ||
| uint8_t tag = read_tag_internal(data); | ||
| if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_list_int(PyObject *data) { | ||
| Py_ssize_t size = _read_size(data); | ||
| if (size < 0) | ||
| return CPY_NONE_ERROR; | ||
| int i; | ||
| for (i = 0; i < size; i++) { | ||
| if (unlikely(_skip_int(data) == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_list_str_bytes(PyObject *data) { | ||
| Py_ssize_t size = _read_size(data); | ||
| if (size < 0) | ||
| return CPY_NONE_ERROR; | ||
| int i; | ||
| for (i = 0; i < size; i++) { | ||
| if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| static inline char | ||
| _skip_dict_str_gen(PyObject *data) { | ||
| Py_ssize_t size = _read_size(data); | ||
| if (size < 0) | ||
| return CPY_NONE_ERROR; | ||
| int i; | ||
| for (i = 0; i < size; i++) { | ||
| // Bare key followed by tagged value. | ||
| if (unlikely(_skip_str_bytes(data) == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| uint8_t tag = read_tag_internal(data); | ||
| if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| return CPY_NONE; | ||
| } | ||
|
|
||
| // Similar to mypy/cache.py, the convention is that the caller reads | ||
| // the opening tag for custom classes. | ||
| static inline char | ||
| _skip_class(PyObject *data) { | ||
| while (1) { | ||
| uint8_t tag = read_tag_internal(data); | ||
| if (unlikely(tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| if (tag == END_TAG) { | ||
| return CPY_NONE; | ||
| } | ||
| if (unlikely(_skip_object(data, tag) == CPY_NONE_ERROR)) { | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // Instance has special compact layout (as an important optimization). | ||
| static inline char | ||
| _skip_instance(PyObject *data) { | ||
| uint8_t second_tag = read_tag_internal(data); | ||
| if (unlikely(second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred())) { | ||
| return CPY_NONE_ERROR; | ||
| } | ||
| if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT) { | ||
| return CPY_NONE; | ||
| } | ||
| if (second_tag == INSTANCE_SIMPLE) { | ||
| return _skip_str_bytes(data); | ||
| } | ||
| if (second_tag == INSTANCE_GENERIC) { | ||
| return _skip_class(data); | ||
| } | ||
| PyErr_Format(PyExc_ValueError, "Unexpected instance tag: %d", second_tag); | ||
| return CPY_NONE_ERROR; | ||
| } | ||
|
|
||
| // This is the main dispatch point. Branches are ordered manually | ||
| // based roughly on frequency in self-check. | ||
| static char | ||
| _skip_object(PyObject *data, uint8_t tag) { | ||
| if (tag == LITERAL_STR || tag == LITERAL_BYTES) | ||
| return _skip_str_bytes(data); | ||
| if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE) | ||
| return CPY_NONE; | ||
| if (tag == LIST_GEN || tag == TUPLE_GEN) | ||
| return _skip_list_gen(data); | ||
| if (tag == LITERAL_INT) | ||
| return _skip_int(data); | ||
| if (tag == INSTANCE) | ||
| return _skip_instance(data); | ||
| if (tag > MYPY_FILE && tag < RESERVED) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is actually intentional, a little sanity check. We know that we should only read symbol nodes using this function, and |
||
| return _skip_class(data); | ||
| if (tag == LIST_INT) | ||
| return _skip_list_int(data); | ||
| if (tag == LIST_STR || tag == LIST_BYTES) | ||
| return _skip_list_str_bytes(data); | ||
| if (tag == DICT_STR_GEN) | ||
| return _skip_dict_str_gen(data); | ||
| if (tag == LITERAL_FLOAT) | ||
| return _skip(data, 8); | ||
| if (tag == LITERAL_COMPLEX) | ||
| return _skip(data, 16); | ||
| PyErr_Format(PyExc_ValueError, "Unsupported tag: %d", tag); | ||
| return CPY_NONE_ERROR; | ||
| } | ||
|
|
||
| static PyObject* | ||
| extract_symbol_internal(PyObject *data) { | ||
| char *ptr = ((ReadBufferObject *)data)->ptr; | ||
| if (unlikely(_skip_class(data) == CPY_NONE_ERROR)) | ||
| return NULL; | ||
| Py_ssize_t size = ((ReadBufferObject *)data)->ptr - ptr; | ||
| PyObject *res = PyBytes_FromStringAndSize(ptr, size); | ||
| if (unlikely(res == NULL)) | ||
| return NULL; | ||
| return res; | ||
| } | ||
|
|
||
| static PyObject* | ||
| extract_symbol(PyObject *self, PyObject *const *args, size_t nargs) { | ||
| if (unlikely(nargs != 1)) { | ||
| PyErr_Format(PyExc_TypeError, | ||
| "extract_symbol() takes exactly 1 argument (%zu given)", nargs); | ||
| return NULL; | ||
| } | ||
| PyObject *data = args[0]; | ||
| _CHECK_READ_BUFFER(data, NULL) | ||
| return extract_symbol_internal(data); | ||
| } | ||
|
|
||
| static uint8_t | ||
| cache_version_internal(void) { | ||
| return 0; | ||
|
|
@@ -954,6 +1221,7 @@ static PyMethodDef librt_internal_module_methods[] = { | |
| {"write_tag", (PyCFunction)write_tag, METH_FASTCALL, PyDoc_STR("write a short int")}, | ||
| {"read_tag", (PyCFunction)read_tag, METH_FASTCALL, PyDoc_STR("read a short int")}, | ||
| {"cache_version", (PyCFunction)cache_version, METH_NOARGS, PyDoc_STR("cache format version")}, | ||
| {"extract_symbol", (PyCFunction)extract_symbol, METH_FASTCALL, PyDoc_STR("extract bytes for a mypy symbol")}, | ||
| {NULL, NULL, 0, NULL} | ||
| }; | ||
|
|
||
|
|
@@ -1005,6 +1273,7 @@ librt_internal_module_exec(PyObject *m) | |
| (void *)ReadBuffer_type_internal, | ||
| (void *)WriteBuffer_type_internal, | ||
| (void *)NativeInternal_API_Version, | ||
| (void *)extract_symbol_internal | ||
| }; | ||
| PyObject *c_api_object = PyCapsule_New((void *)NativeInternal_API, "librt.internal._C_API", NULL); | ||
| if (PyModule_Add(m, "_C_API", c_api_object) < 0) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this have type
Py_ssize_t? (Here and other similar functions.)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, I guess I just copied this from somewhere and all test passed. I will check what is the best cross-platform type here.