[python internal] From Python to Bytecode until C
Abstract
- Use module dis to get the minimize assembly code.
- Found the entry from Python/ceval.c with the bytecode that disassembled before.
- Now the door is open and the road is at your feet.
Python -> Bytecode -> C
Vesion: Python-3.10.6
1. Example - Python
>>> def f():
... d = {}
... d[1] = 1
...
2. Python -> Bytecode
>>> import dis
>>>
>>> def f():
... d = {}
... d[1] = 1
...
>>>
>>> dis.dis(f)
2 0 BUILD_MAP 0
2 STORE_FAST 0 (d)
3 4 LOAD_CONST 1 (1)
6 LOAD_FAST 0 (d)
8 LOAD_CONST 1 (1)
10 STORE_SUBSCR
12 LOAD_CONST 0 (None)
14 RETURN_VALUE
>>>
3. Bytecode -> C
3.0 bytecode entry function
// All bytecode's entries are in Python/ceval.c
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
3.1 BUILD_MAP
bytecode: BUILD_MAP 0
python: d = {}
// Python/ceval.c
//
// opcode = BUILD_MAP
// oparg = 0
//
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
switch (opcode) {
...
case TARGET(BUILD_MAP): {
Py_ssize_t i;
PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg);
if (map == NULL)
goto error;
for (i = oparg; i > 0; i--) {
int err;
PyObject *key = PEEK(2*i);
PyObject *value = PEEK(2*i - 1);
err = PyDict_SetItem(map, key, value);
if (err != 0) {
Py_DECREF(map);
goto error;
}
}
while (oparg--) {
Py_DECREF(POP());
Py_DECREF(POP());
}
PUSH(map);
DISPATCH();
}
...
}
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
// Objects/dictobject.c
//
// create a dict object
//
PyObject *
_PyDict_NewPresized(Py_ssize_t minused)
{
const Py_ssize_t max_presize = 128 * 1024;
Py_ssize_t newsize;
PyDictKeysObject *new_keys;
if (minused <= USABLE_FRACTION(PyDict_MINSIZE)) {
return PyDict_New();
}
/* There are no strict guarantee that returned dict can contain minused
* items without resize. So we create medium size dict instead of very
* large dict or MemoryError.
*/
if (minused > USABLE_FRACTION(max_presize)) {
newsize = max_presize;
}
else {
newsize = estimate_keysize(minused);
}
new_keys = new_keys_object(newsize);
if (new_keys == NULL)
return NULL;
return new_dict(new_keys, NULL);
}
3.2 STORE_FAST
bytecode: STORE_FAST 0 (d)
// Python/ceval.c
//
// give a variable name for the new created object
//
//
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
switch (opcode) {
...
case TARGET(STORE_FAST): {
PREDICTED(STORE_FAST);
PyObject *value = POP();
SETLOCAL(oparg, value);
DISPATCH();
}
...
}
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
3.3 LOAD_CONST
bytecode: LOAD_CONST 1 (1)
// Python/ceval.c
//
// Pushes `co_consts[consti]` onto the stack.
//
//
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
switch (opcode) {
...
case TARGET(LOAD_CONST): {
PREDICTED(LOAD_CONST);
PyObject *value = GETITEM(consts, oparg);
Py_INCREF(value);
PUSH(value);
DISPATCH();
}
...
}
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
3.4 LOAD_FAST
bytecode: LOAD_FAST 0 (d)
// Python/ceval.c
//
// Pushes a reference to the local `co_varnames[var_num]` onto the stack.
//
//
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
switch (opcode) {
...
case TARGET(LOAD_FAST): {
PyObject *value = GETLOCAL(oparg);
if (value == NULL) {
format_exc_check_arg(tstate, PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
PyTuple_GetItem(co->co_varnames, oparg));
goto error;
}
Py_INCREF(value);
PUSH(value);
DISPATCH();
}
...
}
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
3.5 STORE_SUBSCR
bytecode: STORE_SUBSCR
3 4 LOAD_CONST 1 (1) // TOS = 1
6 LOAD_FAST 0 (d) // TOS1 = d
8 LOAD_CONST 1 (1) // TOS2 = 1
10 STORE_SUBSCR // TOS1[TOS] = TOS2 => d[1] = 1
Chain: _PyEval_EvalFrameDefault -> PyObject_SetItem -> dict_ass_sub -> PyDict_SetItem -> insertdict
// Python/ceval.c
//
// Implements `TOS1[TOS] = TOS2`.
//
//
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyThreadState *tstate, PyFrameObject *f, int throwflag)
{
_Py_EnsureTstateNotNULL(tstate);
...
switch (opcode) {
...
case TARGET(STORE_SUBSCR): {
PyObject *sub = TOP();
PyObject *container = SECOND();
PyObject *v = THIRD();
int err;
STACK_SHRINK(3);
/* container[sub] = v */
err = PyObject_SetItem(container, sub, v);
Py_DECREF(v);
Py_DECREF(container);
Py_DECREF(sub);
if (err != 0)
goto error;
DISPATCH();
}
...
}
...
return _Py_CheckFunctionResult(tstate, NULL, retval, __func__);
}
// Objects/abstract.c
//
// abstract function implement SetItem, container[k] = v
// type(container) is dict or list
//
int
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
{
if (o == NULL || key == NULL || value == NULL) {
null_error();
return -1;
}
// Annotation:
// 1. check object is mapping object or not, like dict()
PyMappingMethods *m = Py_TYPE(o)->tp_as_mapping;
if (m && m->mp_ass_subscript) {
int res = m->mp_ass_subscript(o, key, value);
assert(_Py_CheckSlotResult(o, "__setitem__", res >= 0));
return res;
}
// Annotation:
// 1. check object is sequence object or not, like list()
if (Py_TYPE(o)->tp_as_sequence) {
if (_PyIndex_Check(key)) {
Py_ssize_t key_value;
key_value = PyNumber_AsSsize_t(key, PyExc_IndexError);
if (key_value == -1 && PyErr_Occurred())
return -1;
return PySequence_SetItem(o, key_value, value);
}
else if (Py_TYPE(o)->tp_as_sequence->sq_ass_item) {
type_error("sequence index must be "
"integer, not '%.200s'", key);
return -1;
}
}
type_error("'%.200s' object does not support item assignment", o);
return -1;
}
// include/object.h
// define all attrs that the typeObject supported
//
/* PyTypeObject structure is defined in cpython/object.h.
In Py_LIMITED_API, PyTypeObject is an opaque structure. */
typedef struct _typeobject PyTypeObject;
// include/cpython/object.h
// If this structure is modified, Doc/includes/typestruct.h should be updated
// as well.
struct _typeobject {
PyObject_VAR_HEAD
const char *tp_name; /* For printing, in format "<module>.<name>" */
Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
/* Methods to implement standard operations */
destructor tp_dealloc;
Py_ssize_t tp_vectorcall_offset;
getattrfunc tp_getattr;
setattrfunc tp_setattr;
PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
or tp_reserved (Python 3) */
reprfunc tp_repr;
/* Method suites for standard classes */
PyNumberMethods *tp_as_number;
PySequenceMethods *tp_as_sequence;
PyMappingMethods *tp_as_mapping;
/* More standard operations (here for binary compatibility) */
hashfunc tp_hash;
ternaryfunc tp_call;
reprfunc tp_str;
getattrofunc tp_getattro;
setattrofunc tp_setattro;
/* Functions to access object as input/output buffer */
PyBufferProcs *tp_as_buffer;
/* Flags to define presence of optional/expanded features */
unsigned long tp_flags;
const char *tp_doc; /* Documentation string */
/* Assigned meaning in release 2.0 */
/* call function for all accessible objects */
traverseproc tp_traverse;
/* delete references to contained objects */
inquiry tp_clear;
/* Assigned meaning in release 2.1 */
/* rich comparisons */
richcmpfunc tp_richcompare;
/* weak reference enabler */
Py_ssize_t tp_weaklistoffset;
/* Iterators */
getiterfunc tp_iter;
iternextfunc tp_iternext;
/* Attribute descriptor and subclassing stuff */
struct PyMethodDef *tp_methods; // object's supported methods, part of dir(o) show
struct PyMemberDef *tp_members;
struct PyGetSetDef *tp_getset;
// Strong reference on a heap type, borrowed reference on a static type
struct _typeobject *tp_base;
PyObject *tp_dict;
descrgetfunc tp_descr_get;
descrsetfunc tp_descr_set;
Py_ssize_t tp_dictoffset;
initproc tp_init;
allocfunc tp_alloc;
newfunc tp_new;
freefunc tp_free; /* Low-level free-memory routine */
inquiry tp_is_gc; /* For PyObject_IS_GC */
PyObject *tp_bases;
PyObject *tp_mro; /* method resolution order */
PyObject *tp_cache;
PyObject *tp_subclasses;
PyObject *tp_weaklist;
destructor tp_del;
/* Type attribute cache version tag. Added in version 2.6 */
unsigned int tp_version_tag;
destructor tp_finalize;
vectorcallfunc tp_vectorcall;
};
// Include/cpython/object.h
typedef struct {
lenfunc mp_length;
binaryfunc mp_subscript;
objobjargproc mp_ass_subscript;
} PyMappingMethods;
// Objects/dictobject.c
//
// math rule: PyTypeObject->tp_as_mapping != NULL
PyTypeObject PyDict_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"dict",
...
&dict_as_mapping, /* tp_as_mapping */
...
};
// match rule: PyMappingMethods->mp_ass_subscript(PyDictObject *mp, PyObject *v, PyObject *w)
static PyMappingMethods dict_as_mapping = {
(lenfunc)dict_length, /*mp_length*/
(binaryfunc)dict_subscript, /*mp_subscript*/
(objobjargproc)dict_ass_sub, /*mp_ass_subscript*/
};
// DELETE_SUBSCR also using this function, when "w" is NULL, it means delete key
// when "w" is NOT NULL, it means set key
static int
dict_ass_sub(PyDictObject *mp, PyObject *v, PyObject *w)
{
if (w == NULL)
return PyDict_DelItem((PyObject *)mp, v);
else
return PyDict_SetItem((PyObject *)mp, v, w);
}
/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
* dictionary if it's merely replacing the value for an existing key.
* This means that it's safe to loop over a dictionary with PyDict_Next()
* and occasionally replace a value -- but you can't insert new keys or
* remove them.
*/
int
PyDict_SetItem(PyObject *op, PyObject *key, PyObject *value)
{
PyDictObject *mp;
Py_hash_t hash;
if (!PyDict_Check(op)) {
PyErr_BadInternalCall();
return -1;
}
assert(key);
assert(value);
mp = (PyDictObject *)op;
if (!PyUnicode_CheckExact(key) ||
(hash = ((PyASCIIObject *) key)->hash) == -1)
{
hash = PyObject_Hash(key);
if (hash == -1)
return -1;
}
if (mp->ma_keys == Py_EMPTY_KEYS) {
return insert_to_emptydict(mp, key, hash, value);
}
/* insertdict() handles any resizing that might be necessary */
return insertdict(mp, key, hash, value);
}