大师兄的Python源码学习笔记(十一): Python的虚拟机框架
大师兄的Python源码学习笔记(十三): Python虚拟机中的一般表达式(二)
一、模拟查看.pyc文件的结构
- 我们可以自己创建一个方法模拟读取.pyc文件。
demo.py
>>>i = 1
>>>s = "str"
>>>d = {}
>>>l = []
>>>st = {1}
>>>tp = (1,)
pycparser.py
>>>import sys
>>>import demo
>>>import types
>>>import marshal
>>>import dis
>>>def show_code(code, indent=''):
>>> print(f"{indent}code")
>>> indent += ' '
>>> print(f"{indent}argcount {code.co_argcount}")
>>> print(f"{indent}nlocals {code.co_nlocals}")
>>> print(f"{indent}stacksize {code.co_stacksize}")
>>> print(f"{indent}flags {code.co_flags:04x}")
>>> dis.disassemble(code)
>>> print(f"{indent}consts")
>>> for const in code.co_consts:
>>> if isinstance(const, types.CodeType):
>>> show_code(const, indent + ' ')
>>> else:
>>> print(f" {indent}{const}")
>>> print(f"{indent}names {code.co_names}")
>>> print(f"{indent}varnames {code.co_varnames}")
>>> print(f"{indent}freevars {code.co_freevars}")
>>> print(f"{indent}cellvars {code.co_cellvars}")
>>> print(f"{indent}filename {code.co_filename}")
>>> print(f"{indent}name {code.co_name}")
>>> print(f"{indent}firstlineno {code.co_firstlineno}")
>>>def show_file():
>>> header_sizes = [(8, (0, 9, 2)), (12, (3, 6)), (16, (3, 7))]
>>> header_size = next(s for s, v in reversed(header_sizes) if sys.version_info >= v)
>>> print(isinstance(demo, types.ModuleType))
>>> with open(demo.__cached__, "rb") as f:
>>> metadata = f.read(header_size)
>>> code = marshal.load(f)
>>> print(code)
>>> show_code(code)
>>>if __name__ == '__main__':
>>> show_file()
True
<code object <module> at 0x000001ED4C1F6E40, file "D:\demo.py", line 1>
code
argcount 0
nlocals 0
stacksize 1
flags 0040
1 0 LOAD_CONST 0 (1)
2 STORE_NAME 0 (i)
2 4 LOAD_CONST 1 ('str')
6 STORE_NAME 1 (s)
3 8 BUILD_MAP 0
10 STORE_NAME 2 (d)
4 12 BUILD_LIST 0
14 STORE_NAME 3 (l)
5 16 LOAD_CONST 0 (1)
18 BUILD_SET 1
20 STORE_NAME 4 (st)
6 22 LOAD_CONST 2 ((1,))
24 STORE_NAME 5 (tp)
26 LOAD_CONST 3 (None)
28 RETURN_VALUE
consts
1
str
None
names ('i', 's', 'd', 'l', 'set', 'st', 'tuple', 'tp')
varnames ()
freevars ()
cellvars ()
filename D:\demo.py
name <module>
firstlineno 1
二、简单内建对象的创建
- 在
_PyEval_EvalFrameDefault
方法中,包含了大量的宏包括了对栈的各种操作:
- 访问tuple中的元素:
#ifndef Py_DEBUG #define GETITEM(v, i) PyTuple_GET_ITEM((PyTupleObject *)(v), (i)) #else #define GETITEM(v, i) PyTuple_GetItem((v), (i)) #endif
- 调整栈顶指针:
#define BASIC_STACKADJ(n) (stack_pointer += n) #define STACKADJ(n) { (void)(BASIC_STACKADJ(n), \ lltrace && prtrace(TOP(), "stackadj")); \ assert(STACK_LEVEL() <= co->co_stacksize); }
- 入栈操作:
#define BASIC_PUSH(v) (*stack_pointer++ = (v)) #define PUSH(v) { (void)(BASIC_PUSH(v), \ lltrace && prtrace(TOP(), "push")); \ assert(STACK_LEVEL() <= co->co_stacksize); }
- 出栈操作:
#define BASIC_POP() (*--stack_pointer) #define POP() ((void)(lltrace && prtrace(TOP(), "pop")), \ BASIC_POP())
1.1 创建整数和字符串变量
- 截取demo.pyc中创建整数值i的字节码
1 0 LOAD_CONST 0 (1)
2 STORE_NAME 0 (i)
- 其中第一行的
LOAD_CONST
在虚拟机的执行动作如下:
ceval.c
TARGET(LOAD_CONST) {
PyObject *value = GETITEM(consts, oparg);
Py_INCREF(value);
PUSH(value);
FAST_DISPATCH();
}
GETITEM(consts,oparg)
实际上就是调用了宏GETITEM(v, i)
,即PyTuple_GET_ITEM((PyTupleObject *)(v), (i))
。- 而
consts
实际上就是PyCodeObject
中的co_consts
,也称为常量表。PUSH(v)
宏则将从co_consts
中读取的对象塞入栈中。
- 第二行
STORE_NAME
在虚拟机的执行动作如下:
ceval.c
...
case STORE_NAME:
{
PyObject *names = f->f_code->co_names;
PyObject *name = GETITEM(names, oparg);
PyObject *locals = f->f_locals;
if (locals && PyDict_CheckExact(locals) &&
PyDict_GetItem(locals, name) == v) {
if (PyDict_DelItem(locals, name) != 0) {
PyErr_Clear();
}
}
break;
}
...
- 这条指令改变了
locals
的名字空间,并创建了变量和值的映射关系(i=1)。
1.2 创建字典
- 截取demo.py中创建字典的字节码:
3 8 BUILD_MAP 0
10 STORE_NAME 2 (d)
- 其中第一行
BUILD_MAP
在虚拟机的执行动作如下:
ceval.c
TARGET(BUILD_MAP) {
Py_ssize_t i;
PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg);
if (map == NULL)
goto error;
for (i = oparg; i > 0; i--) {
int err;
PyObject *key = PEEK(2*i);
PyObject *value = PEEK(2*i - 1);
err = PyDict_SetItem(map, key, value);
if (err != 0) {
Py_DECREF(map);
goto error;
}
}
while (oparg--) {
Py_DECREF(POP());
Py_DECREF(POP());
}
PUSH(map);
DISPATCH();
}
- 这段代码首先创建了一个空的PyDictObject对象。
- 将键值对加入到PyDictObject对象中。
- 最后将PyDictObject压入到运行时栈中。
- 第二行
STORE_NAME
与上一节相同,创建了locals的名字空间并创建了映射关系。
1.3 创建列表
- 截取demo.py中创建列表的字节码:
4 12 BUILD_LIST 0
14 STORE_NAME 3 (l)
- 其中第一行
BUILD_LIST
在虚拟机的执行动作如下:
ceval.c
TARGET(BUILD_LIST) {
PyObject *list = PyList_New(oparg);
if (list == NULL)
goto error;
while (--oparg >= 0) {
PyObject *item = POP();
PyList_SET_ITEM(list, oparg, item);
}
PUSH(list);
DISPATCH();
}
- 这段代码首先创建了一个oparg长度的PyListObject对象。
- 将每一个元素从运行时栈中弹出,之后塞到PyListObject对象中。
- 最后将PyListObject对象塞入到运行时栈中。
- 第二行
STORE_NAME
与上一节相同,创建了locals的名字空间并创建了映射关系。
1.4 创建set
- 截取demo.py中创建set的字节码:
5 16 LOAD_CONST 0 (1)
18 BUILD_SET 1
20 STORE_NAME 4 (st)
- 第一行
LOAD_CONST
将PySetObject中的唯一一个元素1塞入栈中。 - 第二行
BUILD_SET
在虚拟机的执行动作如下:
ceval.c
TARGET(BUILD_SET) {
PyObject *set = PySet_New(NULL);
int err = 0;
int i;
if (set == NULL)
goto error;
for (i = oparg; i > 0; i--) {
PyObject *item = PEEK(i);
if (err == 0)
err = PySet_Add(set, item);
Py_DECREF(item);
}
STACKADJ(-oparg);
if (err != 0) {
Py_DECREF(set);
goto error;
}
PUSH(set);
DISPATCH();
}
- 这段代码首先创建了一个空的PySetObject对象。
- 将每一个元素从运行时栈中弹出,之后塞到PySetObject对象中。
- 最后将PySetObject对象塞入到运行时栈中。
- 第三行STORE_NAME,创建了locals的名字空间并创建了映射关系。
1.5 创建tuple
- 截取demo.py中创建tuple的字节码:
6 22 LOAD_CONST 2 ((1,))
24 STORE_NAME 5 (tp)
- 这里的第一行
LOAD_CONST
稍有不同,实际上是触发了宏:
#define GETITEM(v, i) PyTuple_GET_ITEM((PyTupleObject *)(v), (i))
- 直接将PyTupleObject对象塞入栈中。
- 第二行
STORE_NAME
,创建了locals的名字空间并创建了映射关系。
1.6 返回值
- 截取demo.py中最后两行字节码:
26 LOAD_CONST 3 (None)
28 RETURN_VALUE
- 第一行
LOAD_CONST
首先往栈中压入了一个空值,这里只是个过场,并没有实际价值。 - 第二行
RETURN_VALUE
在虚拟机中的执行动作如下:
ceval.c
TARGET(RETURN_VALUE) {
retval = POP();
why = WHY_RETURN;
goto fast_block_end;
}
ceval.c
fast_block_end:
assert(why != WHY_NOT);
/* Unwind stacks if a (pseudo) exception occurred */
while (why != WHY_NOT && f->f_iblock > 0) {
/* Peek at the current block. */
PyTryBlock *b = &f->f_blockstack[f->f_iblock - 1];
assert(why != WHY_YIELD);
if (b->b_type == SETUP_LOOP && why == WHY_CONTINUE) {
why = WHY_NOT;
JUMPTO(PyLong_AS_LONG(retval));
Py_DECREF(retval);
break;
}
/* Now we have to pop the block. */
f->f_iblock--;
if (b->b_type == EXCEPT_HANDLER) {
UNWIND_EXCEPT_HANDLER(b);
continue;
}
UNWIND_BLOCK(b);
if (b->b_type == SETUP_LOOP && why == WHY_BREAK) {
why = WHY_NOT;
JUMPTO(b->b_handler);
break;
}
if (why == WHY_EXCEPTION && (b->b_type == SETUP_EXCEPT
|| b->b_type == SETUP_FINALLY)) {
PyObject *exc, *val, *tb;
int handler = b->b_handler;
_PyErr_StackItem *exc_info = tstate->exc_info;
/* Beware, this invalidates all b->b_* fields */
PyFrame_BlockSetup(f, EXCEPT_HANDLER, -1, STACK_LEVEL());
PUSH(exc_info->exc_traceback);
PUSH(exc_info->exc_value);
if (exc_info->exc_type != NULL) {
PUSH(exc_info->exc_type);
}
else {
Py_INCREF(Py_None);
PUSH(Py_None);
}
PyErr_Fetch(&exc, &val, &tb);
/* Make the raw exception data
available to the handler,
so a program can emulate the
Python main loop. */
PyErr_NormalizeException(
&exc, &val, &tb);
if (tb != NULL)
PyException_SetTraceback(val, tb);
else
PyException_SetTraceback(val, Py_None);
Py_INCREF(exc);
exc_info->exc_type = exc;
Py_INCREF(val);
exc_info->exc_value = val;
exc_info->exc_traceback = tb;
if (tb == NULL)
tb = Py_None;
Py_INCREF(tb);
PUSH(tb);
PUSH(val);
PUSH(exc);
why = WHY_NOT;
JUMPTO(handler);
break;
}
if (b->b_type == SETUP_FINALLY) {
if (why & (WHY_RETURN | WHY_CONTINUE))
PUSH(retval);
PUSH(PyLong_FromLong((long)why));
why = WHY_NOT;
JUMPTO(b->b_handler);
break;
}
} /* unwind stack */
/* End the loop if we still have an error (or return) */
if (why != WHY_NOT)
break;
assert(!PyErr_Occurred());
} /* main loop */
assert(why != WHY_YIELD);
/* Pop remaining stack entries. */
while (!EMPTY()) {
PyObject *o = POP();
Py_XDECREF(o);
}
if (why != WHY_RETURN)
retval = NULL;
assert((retval != NULL) ^ (PyErr_Occurred() != NULL));
- 这段代码进行出栈操作,清空运行时栈,并将信息装到local名字空间中。