@@ -804,7 +804,7 @@ _write_long_int(PyObject *data, CPyTagged value) {
804804 }
805805
806806 // Write absolute integer value as byte array in a variable-length little endian format.
807- int i ;
807+ Py_ssize_t i ;
808808 for (i = len ; i > 1 ; i -= 2 ) {
809809 if (write_tag_internal (
810810 data , hex_to_int (str [i - 1 ]) | (hex_to_int (str [i - 2 ]) << 4 )) == CPY_NONE_ERROR )
@@ -920,6 +920,275 @@ write_tag(PyObject *self, PyObject *const *args, size_t nargs) {
920920 return Py_None ;
921921}
922922
923+ // All tags must be kept in sync with cache.py, nodes.py, and types.py.
924+ // Primitive types.
925+ #define LITERAL_FALSE 0
926+ #define LITERAL_TRUE 1
927+ #define LITERAL_NONE 2
928+ #define LITERAL_INT 3
929+ #define LITERAL_STR 4
930+ #define LITERAL_BYTES 5
931+ #define LITERAL_FLOAT 6
932+ #define LITERAL_COMPLEX 7
933+
934+ // Supported builtin collections.
935+ #define LIST_GEN 20
936+ #define LIST_INT 21
937+ #define LIST_STR 22
938+ #define LIST_BYTES 23
939+ #define TUPLE_GEN 24
940+ #define DICT_STR_GEN 30
941+
942+ // This is the smallest custom class tag.
943+ #define MYPY_FILE 50
944+
945+ // Instance class has special formats.
946+ #define INSTANCE 80
947+ #define INSTANCE_SIMPLE 81
948+ #define INSTANCE_GENERIC 82
949+ #define INSTANCE_STR 83
950+ #define INSTANCE_FUNCTION 84
951+ #define INSTANCE_INT 85
952+ #define INSTANCE_BOOL 86
953+ #define INSTANCE_OBJECT 87
954+
955+ #define RESERVED 254
956+ #define END_TAG 255
957+
958+ // Forward declaration.
959+ static char _skip_object (PyObject * data , uint8_t tag );
960+
961+ static inline char
962+ _skip (PyObject * data , Py_ssize_t size ) {
963+ // We are careful about error conditions, so all
964+ // _skip_xxx() functions can return an error value.
965+ _CHECK_READ (data , size , CPY_NONE_ERROR )
966+ ((ReadBufferObject * )data )-> ptr += size ;
967+ return CPY_NONE ;
968+ }
969+
970+ static inline char
971+ _skip_short_int (PyObject * data , uint8_t first ) {
972+ if ((first & TWO_BYTES_INT_BIT ) == 0 )
973+ return CPY_NONE ;
974+ if ((first & FOUR_BYTES_INT_BIT ) == 0 )
975+ return _skip (data , 1 );
976+ return _skip (data , 3 );
977+ }
978+
979+ static inline char
980+ _skip_int (PyObject * data ) {
981+ _CHECK_READ (data , 1 , CPY_NONE_ERROR )
982+
983+ uint8_t first ;
984+ _READ (& first , data , uint8_t );
985+ if (likely (first != LONG_INT_TRAILER )) {
986+ return _skip_short_int (data , first );
987+ }
988+
989+ _CHECK_READ (data , 1 , CPY_NONE_ERROR )
990+ _READ (& first , data , uint8_t );
991+ Py_ssize_t size_and_sign = _read_short_int (data , first );
992+ if (size_and_sign == CPY_INT_TAG )
993+ return CPY_NONE_ERROR ;
994+ if ((Py_ssize_t )size_and_sign < 0 ) {
995+ PyErr_SetString (PyExc_ValueError , "invalid int data" );
996+ return CPY_NONE_ERROR ;
997+ }
998+ Py_ssize_t size = size_and_sign >> 2 ;
999+ return _skip (data , size );
1000+ }
1001+
1002+ // This is essentially a wrapper around _read_short_int() that makes
1003+ // sure the result is valid.
1004+ static inline Py_ssize_t
1005+ _read_size (PyObject * data ) {
1006+ _CHECK_READ (data , 1 , -1 )
1007+ uint8_t first ;
1008+ _READ (& first , data , uint8_t );
1009+ // We actually allow serializing lists/dicts with over 4 billion items,
1010+ // but we don't really need to, fail with ValueError just in case.
1011+ if (unlikely (first == LONG_INT_TRAILER )) {
1012+ PyErr_SetString (PyExc_ValueError , "unsupported size" );
1013+ return -1 ;
1014+ }
1015+ CPyTagged tagged_size = _read_short_int (data , first );
1016+ if (tagged_size == CPY_INT_TAG )
1017+ return -1 ;
1018+ if ((Py_ssize_t )tagged_size < 0 ) {
1019+ PyErr_SetString (PyExc_ValueError , "invalid size" );
1020+ return -1 ;
1021+ }
1022+ Py_ssize_t size = tagged_size >> 1 ;
1023+ return size ;
1024+ }
1025+
1026+ static inline char
1027+ _skip_str_bytes (PyObject * data ) {
1028+ Py_ssize_t size = _read_size (data );
1029+ if (size < 0 )
1030+ return CPY_NONE_ERROR ;
1031+ return _skip (data , size );
1032+ }
1033+
1034+ // List/dict logic should be kept in sync with mypy/cache.py
1035+ static inline char
1036+ _skip_list_gen (PyObject * data ) {
1037+ Py_ssize_t size = _read_size (data );
1038+ if (size < 0 )
1039+ return CPY_NONE_ERROR ;
1040+ Py_ssize_t i ;
1041+ for (i = 0 ; i < size ; i ++ ) {
1042+ uint8_t tag = read_tag_internal (data );
1043+ if (unlikely (tag == CPY_LL_UINT_ERROR && PyErr_Occurred ())) {
1044+ return CPY_NONE_ERROR ;
1045+ }
1046+ if (unlikely (_skip_object (data , tag ) == CPY_NONE_ERROR ))
1047+ return CPY_NONE_ERROR ;
1048+ }
1049+ return CPY_NONE ;
1050+ }
1051+
1052+ static inline char
1053+ _skip_list_int (PyObject * data ) {
1054+ Py_ssize_t size = _read_size (data );
1055+ if (size < 0 )
1056+ return CPY_NONE_ERROR ;
1057+ Py_ssize_t i ;
1058+ for (i = 0 ; i < size ; i ++ ) {
1059+ if (unlikely (_skip_int (data ) == CPY_NONE_ERROR ))
1060+ return CPY_NONE_ERROR ;
1061+ }
1062+ return CPY_NONE ;
1063+ }
1064+
1065+ static inline char
1066+ _skip_list_str_bytes (PyObject * data ) {
1067+ Py_ssize_t size = _read_size (data );
1068+ if (size < 0 )
1069+ return CPY_NONE_ERROR ;
1070+ Py_ssize_t i ;
1071+ for (i = 0 ; i < size ; i ++ ) {
1072+ if (unlikely (_skip_str_bytes (data ) == CPY_NONE_ERROR ))
1073+ return CPY_NONE_ERROR ;
1074+ }
1075+ return CPY_NONE ;
1076+ }
1077+
1078+ static inline char
1079+ _skip_dict_str_gen (PyObject * data ) {
1080+ Py_ssize_t size = _read_size (data );
1081+ if (size < 0 )
1082+ return CPY_NONE_ERROR ;
1083+ Py_ssize_t i ;
1084+ for (i = 0 ; i < size ; i ++ ) {
1085+ // Bare key followed by tagged value.
1086+ if (unlikely (_skip_str_bytes (data ) == CPY_NONE_ERROR ))
1087+ return CPY_NONE_ERROR ;
1088+ uint8_t tag = read_tag_internal (data );
1089+ if (unlikely (tag == CPY_LL_UINT_ERROR && PyErr_Occurred ())) {
1090+ return CPY_NONE_ERROR ;
1091+ }
1092+ if (unlikely (_skip_object (data , tag ) == CPY_NONE_ERROR ))
1093+ return CPY_NONE_ERROR ;
1094+ }
1095+ return CPY_NONE ;
1096+ }
1097+
1098+ // Similar to mypy/cache.py, the convention is that the caller reads
1099+ // the opening tag for custom classes.
1100+ static inline char
1101+ _skip_class (PyObject * data ) {
1102+ while (1 ) {
1103+ uint8_t tag = read_tag_internal (data );
1104+ if (unlikely (tag == CPY_LL_UINT_ERROR && PyErr_Occurred ())) {
1105+ return CPY_NONE_ERROR ;
1106+ }
1107+ if (tag == END_TAG ) {
1108+ return CPY_NONE ;
1109+ }
1110+ if (unlikely (_skip_object (data , tag ) == CPY_NONE_ERROR )) {
1111+ return CPY_NONE_ERROR ;
1112+ }
1113+ }
1114+ }
1115+
1116+ // Instance has special compact layout (as an important optimization).
1117+ static inline char
1118+ _skip_instance (PyObject * data ) {
1119+ uint8_t second_tag = read_tag_internal (data );
1120+ if (unlikely (second_tag == CPY_LL_UINT_ERROR && PyErr_Occurred ())) {
1121+ return CPY_NONE_ERROR ;
1122+ }
1123+ if (second_tag >= INSTANCE_STR && second_tag <= INSTANCE_OBJECT ) {
1124+ return CPY_NONE ;
1125+ }
1126+ if (second_tag == INSTANCE_SIMPLE ) {
1127+ return _skip_str_bytes (data );
1128+ }
1129+ if (second_tag == INSTANCE_GENERIC ) {
1130+ return _skip_class (data );
1131+ }
1132+ PyErr_Format (PyExc_ValueError , "Unexpected instance tag: %d" , second_tag );
1133+ return CPY_NONE_ERROR ;
1134+ }
1135+
1136+ // This is the main dispatch point. Branches are ordered manually
1137+ // based roughly on frequency in self-check.
1138+ static char
1139+ _skip_object (PyObject * data , uint8_t tag ) {
1140+ if (tag == LITERAL_STR || tag == LITERAL_BYTES )
1141+ return _skip_str_bytes (data );
1142+ if (tag == LITERAL_NONE || tag == LITERAL_FALSE || tag == LITERAL_TRUE )
1143+ return CPY_NONE ;
1144+ if (tag == LIST_GEN || tag == TUPLE_GEN )
1145+ return _skip_list_gen (data );
1146+ if (tag == LITERAL_INT )
1147+ return _skip_int (data );
1148+ if (tag == INSTANCE )
1149+ return _skip_instance (data );
1150+ // We intentionally exclude MypyFile as a sanity check. Module symbols should
1151+ // be always handled via cross_ref, and never appear in a symbol table as is.
1152+ if (tag > MYPY_FILE && tag < RESERVED )
1153+ return _skip_class (data );
1154+ if (tag == LIST_INT )
1155+ return _skip_list_int (data );
1156+ if (tag == LIST_STR || tag == LIST_BYTES )
1157+ return _skip_list_str_bytes (data );
1158+ if (tag == DICT_STR_GEN )
1159+ return _skip_dict_str_gen (data );
1160+ if (tag == LITERAL_FLOAT )
1161+ return _skip (data , 8 );
1162+ if (tag == LITERAL_COMPLEX )
1163+ return _skip (data , 16 );
1164+ PyErr_Format (PyExc_ValueError , "Unsupported tag: %d" , tag );
1165+ return CPY_NONE_ERROR ;
1166+ }
1167+
1168+ static PyObject *
1169+ extract_symbol_internal (PyObject * data ) {
1170+ char * ptr = ((ReadBufferObject * )data )-> ptr ;
1171+ if (unlikely (_skip_class (data ) == CPY_NONE_ERROR ))
1172+ return NULL ;
1173+ Py_ssize_t size = ((ReadBufferObject * )data )-> ptr - ptr ;
1174+ PyObject * res = PyBytes_FromStringAndSize (ptr , size );
1175+ if (unlikely (res == NULL ))
1176+ return NULL ;
1177+ return res ;
1178+ }
1179+
1180+ static PyObject *
1181+ extract_symbol (PyObject * self , PyObject * const * args , size_t nargs ) {
1182+ if (unlikely (nargs != 1 )) {
1183+ PyErr_Format (PyExc_TypeError ,
1184+ "extract_symbol() takes exactly 1 argument (%zu given)" , nargs );
1185+ return NULL ;
1186+ }
1187+ PyObject * data = args [0 ];
1188+ _CHECK_READ_BUFFER (data , NULL )
1189+ return extract_symbol_internal (data );
1190+ }
1191+
9231192static uint8_t
9241193cache_version_internal (void ) {
9251194 return 0 ;
@@ -954,6 +1223,7 @@ static PyMethodDef librt_internal_module_methods[] = {
9541223 {"write_tag" , (PyCFunction )write_tag , METH_FASTCALL , PyDoc_STR ("write a short int" )},
9551224 {"read_tag" , (PyCFunction )read_tag , METH_FASTCALL , PyDoc_STR ("read a short int" )},
9561225 {"cache_version" , (PyCFunction )cache_version , METH_NOARGS , PyDoc_STR ("cache format version" )},
1226+ {"extract_symbol" , (PyCFunction )extract_symbol , METH_FASTCALL , PyDoc_STR ("extract bytes for a mypy symbol" )},
9571227 {NULL , NULL , 0 , NULL }
9581228};
9591229
@@ -1005,6 +1275,7 @@ librt_internal_module_exec(PyObject *m)
10051275 (void * )ReadBuffer_type_internal ,
10061276 (void * )WriteBuffer_type_internal ,
10071277 (void * )NativeInternal_API_Version ,
1278+ (void * )extract_symbol_internal
10081279 };
10091280 PyObject * c_api_object = PyCapsule_New ((void * )NativeInternal_API , "librt.internal._C_API" , NULL );
10101281 if (PyModule_Add (m , "_C_API" , c_api_object ) < 0 ) {
0 commit comments