Python Virtual Machine 関連のソース・コード

「dis/inspect モジュールを使った Python のハッキング」の中では詳細すぎるソース・コード部分を、こちら側においておきます。

MAKE_FUNCTION 関連

MAKE_FUNCTION コードが行う関数オブジェクトの生成とデフォルト引数値タプルの生成の C ソース・コード追跡を行います。

MAKE_FUNCTION 処理の開始部分

PyFunction_New(v, f->f_globals) で関数オブジェクトを生成しています。引数 v が code_object への参照値です。data stack に積まれています。PyEval_EvalFrame(PyFrameObject *f) の引数 f が呼び出し側の FrameObject へのポインタです。PyFunction_SetDefaults(x, v) マクロ関数で、関数オブジェクトの func_defaults 配列データ・メンバーに引数のデフォルト値を設定します。

PyObject * PyEval_EvalFrame(PyFrameObject *f)
{
            ・
            ・
        case MAKE_FUNCTION:
                v = POP(); /* code object */
                x = PyFunction_New(v, f->f_globals);
                Py_DECREF(v);
                /* XXX Maybe this should be a separate opcode? */
                # oparg にはデフォルト値付き引数の個数が入っている
                if (x != NULL && oparg > 0) {
                        v = PyTuple_New(oparg);
                        if (v == NULL) {
                                Py_DECREF(x);
                                x = NULL;
                                break;
                        }
                        while (--oparg >= 0) {
                                w = POP();
                                #define PyTuple_SET_ITEM(op, i, v)
                                # (((PyTupleObject *)(op))->ob_item[i] = v)
                                PyTuple_SET_ITEM(v, oparg, w);
                        }
                        # FunctionObject.func_defaults を設定します
                        err = PyFunction_SetDefaults(x, v);
                        Py_DECREF(v);
                }
                PUSH(x);
                break;
            ・
            ・
            ・
}

MAKE_FUNCTION→PyFunction_New(.)

PyFunction_New(v, f->f_globals) は関数オブジェクトの生成し、その関数オブジェクトが持つグローバル変数へのポインタを、関数呼び出し側のグローバル変数へのポインタの値に設定します。

typedef struct {
    PyObject_HEAD
    PyObject *func_code;
    PyObject *func_globals;
    PyObject *func_defaults;
    PyObject *func_closure;
    PyObject *func_doc;
    PyObject *func_name;
    PyObject *func_dict;
    PyObject *func_weakreflist;
    PyObject *func_module;
} PyFunctionObject;


#define PyObject_GC_New         PyObject_New
#define PyObject_New(type, typeobj)     ( (type *) _PyObject_New(typeobj) )

PyObject *
PyFunction_New(PyObject *code, PyObject *globals)
{
    PyFunctionObject *op = PyObject_GC_New(PyFunctionObject,
                        &PyFunction_Type);
    static PyObject *__name__ = 0;
    if (op != NULL) {
        PyObject *doc;
        PyObject *consts;
        PyObject *module;
        op->func_weakreflist = NULL;
        Py_INCREF(code);
        op->func_code = code;
        Py_INCREF(globals);
        op->func_globals = globals;
        op->func_name = ((PyCodeObject *)code)->co_name;
        Py_INCREF(op->func_name);
        op->func_defaults = NULL; /* No default arguments */
        op->func_closure = NULL;
        consts = ((PyCodeObject *)code)->co_consts;
        if (PyTuple_Size(consts) >= 1) {
            doc = PyTuple_GetItem(consts, 0);
            if (!PyString_Check(doc) && !PyUnicode_Check(doc))
                doc = Py_None;
        }
        else
            doc = Py_None;
        Py_INCREF(doc);
        op->func_doc = doc;
        op->func_dict = NULL;
        op->func_module = NULL;

        /* __module__: If module name is in globals, use it.
           Otherwise, use None.
        */
        if (!__name__) {
            __name__ = PyString_InternFromString("__name__");
            if (!__name__) {
                Py_DECREF(op);
                return NULL;
            }
        }
        module = PyDict_GetItem(globals, __name__);
        if (module) {
            Py_INCREF(module);
            op->func_module = module;
        }
    }
    else
        return NULL;
    _PyObject_GC_TRACK(op);
    return (PyObject *)op;
}

MAKE_FUNCTION→PyFunction_SetDefaults(.)

int
PyFunction_SetDefaults(PyObject *op, PyObject *defaults)
{
    if (!PyFunction_Check(op)) {
        PyErr_BadInternalCall();
        return -1;
    }
    if (defaults == Py_None)
        defaults = NULL;
    else if (PyTuple_Check(defaults)) {
        Py_XINCREF(defaults);
    }
    else {
        PyErr_SetString(PyExc_SystemError, "non-tuple default args");
        return -1;
    }
    Py_XDECREF(((PyFunctionObject *) op) -> func_defaults);
    ((PyFunctionObject *) op) -> func_defaults = defaults;
    return 0;
}

CALL_FUNCTION 関連

CALL_FUNCTION 処理の開始部分

#ifdef WITH_TSC
/* Use Pentium timestamp counter to mark certain events:
   inst0 -- beginning of switch statement for opcode dispatch
   inst1 -- end of switch statement (may be skipped)
   loop0 -- the top of the mainloop
   loop1 -- place where control returns again to top of mainloop 
            (may be skipped)
   intr1 -- beginning of long interruption
   intr2 -- end of long interruption

   Many opcodes call out to helper C functions.  In some cases, the
   time in those functions should be counted towards the time for the
   opcode, but not in all cases.  For example, a CALL_FUNCTION opcode
   calls another Python function; there's no point in charge all the
   bytecode executed by the called function to the caller.

   It's hard to make a useful judgement statically.  In the presence
   of operator overloading, it's impossible to tell if a call will
   execute new Python code or not.

   It's a case-by-case judgement.  I'll use intr1 for the following
   cases:

   EXEC_STMT
   IMPORT_STAR
   IMPORT_FROM
   CALL_FUNCTION (and friends)
 */

PyObject * PyEval_EvalFrame(PyFrameObject *f)
{
                    ・
                    ・
                case CALL_FUNCTION:
                {
                        PyObject **sp;
                        #define PCALL(POS) pcall[POS]++
                        #As a result, the relationship among the statistics appears to be
                        #PCALL_ALL == PCALL_FUNCTION + PCALL_METHOD - PCALL_BOUND_METHOD +
                        #             PCALL_CFUNCTION + PCALL_TYPE + PCALL_GENERATOR + PCALL_OTHER
                        PCALL(PCALL_ALL);
                        sp = stack_pointer;     # stack_pointer = f->f_stacktop;

#ifdef WITH_TSC
                        x = call_function(&sp, oparg, &intr0, &intr1);
#else
                        x = call_function(&sp, oparg);
#endif
                        stack_pointer = sp;
                        PUSH(x);
                        if (x != NULL)
                                continue;
                        break;
                }
                    ・
                    ・
}

CALL_FUNCTION → call_function(.)

Python interpreter 側の関数を呼び出すとき python code の関数と、dll:dynamic link libraly 側に実装されている関数呼び出しに分けて処理しています。Python interpreter で処理させるよりも dll で処理させるほうが Object Frame などを生成する必要がなく高速であり、python の関数を dll で実装することが多くあるためです。

またキーワード引数がある時と位置引数のみの時とで処理を振り分けます。位置引数だけのときは、文字列の参照対応を調べる必要がないので高速に処理できるためです。

static PyObject *
call_function(PyObject ***pp_stack, int oparg
#ifdef WITH_TSC
                , uint64* pintr0, uint64* pintr1
#endif
                )
{
        int na = oparg & 0xff;      # 位置引数の個数
        int nk = (oparg>>8) & 0xff; # キーワード引数の個数
        int n = na + 2 * nk;
        PyObject **pfunc = (*pp_stack) - n - 1;
        PyObject *func = *pfunc;    # スタックにあるコード・オブジェクトへの参照
        PyObject *x, *w;

        /* Always dispatch PyCFunction first, because these are
           presumed to be the most frequent callable object.
        */
        # PyCFunction_Check() は DLL 側に実装されている関数であるかをチェックする
        if (PyCFunction_Check(func) && nk == 0) {
                # こっちは dll 側に実装されている関数の呼び出し処理です
                int flags = PyCFunction_GET_FLAGS(func);
                PyThreadState *tstate = PyThreadState_GET();

                PCALL(PCALL_CFUNCTION);
                if (flags & (METH_NOARGS | METH_O)) {
                        PyCFunction meth = PyCFunction_GET_FUNCTION(func);
                        PyObject *self = PyCFunction_GET_SELF(func);
                        if (flags & METH_NOARGS && na == 0) {
                                C_TRACE(x, (*meth)(self,NULL));
                        }
                        else if (flags & METH_O && na == 1) {
                                PyObject *arg = EXT_POP(*pp_stack);
                                C_TRACE(x, (*meth)(self,arg));
                                Py_DECREF(arg);
                        }
                        else {
                                err_args(func, flags, na);
                                x = NULL;
                        }
                }
                else {
                        PyObject *callargs;
                        callargs = load_args(pp_stack, na);
                        rdtscll(*pintr0);
                        C_TRACE(x, PyCFunction_Call(func,callargs,NULL));
                        rdtscll(*pintr1);
                        Py_XDECREF(callargs);
                }
        } else {
                # こっち python interpreter 関数の呼び出し処理です
                if (PyMethod_Check(func) && PyMethod_GET_SELF(func) != NULL) {
                        # self 引数を持つクラス・メソッドのための処理
                        /* optimize access to bound methods */
                        PyObject *self = PyMethod_GET_SELF(func);
                        PCALL(PCALL_METHOD);
                        PCALL(PCALL_BOUND_METHOD);
                        Py_INCREF(self);
                        func = PyMethod_GET_FUNCTION(func);
                        Py_INCREF(func);
                        Py_DECREF(*pfunc);
                        *pfunc = self;
                        na++;
                        n++;
                } else
                        Py_INCREF(func);
                rdtscll(*pintr0);
                if (PyFunction_Check(func))
                        # 位置引数のみの時の、高速な関数呼び出し処理
                        # 関数側の新しく生成する FrameObject の f_locals に、呼び出し側の
                        # スタックにある引数の参照値を設定する
                        x = fast_function(func, pp_stack, n, na, nk);
                else
                        # キーワード引数があるときの処理
                        x = do_call(func, pp_stack, na, nk);
                rdtscll(*pintr1);
                Py_DECREF(func);
        }

        /* What does this do? */ # C source に最初からある。小林が追加したのではない
        while ((*pp_stack) > pfunc) {
                w = EXT_POP(*pp_stack);
                Py_DECREF(w);
                PCALL(PCALL_POP);
        }
        return x;
}
call_function(.)→fast_function(.)

キーワード引数がないときの高速な関数呼び出し処理を行います。デフォルト引数があるときは、さらに PyEval_EvalCodeEx(.) を呼び出します。

/* The fast_function() function optimize calls for which no argument
   tuple is necessary; the objects are passed directly from the stack.
   For the simplest case -- a function that takes only positional
   arguments and is called with only positional arguments -- it
   inlines the most primitive frame setup code from
   PyEval_EvalCodeEx(), which vastly reduces the checks that must be
   done before evaluating the frame.
*/

        #define PyFunction_GET_CODE(func) \
                (((PyFunctionObject *)func) -> func_code)

static PyObject *
fast_function(PyObject *func, PyObject ***pp_stack, int n, int na, int nk)
{
        PyCodeObject *co = (PyCodeObject *)PyFunction_GET_CODE(func);
        PyObject *globals = PyFunction_GET_GLOBALS(func);
        PyObject *argdefs = PyFunction_GET_DEFAULTS(func);
        PyObject **d = NULL;
        int nd = 0;

        PCALL(PCALL_FUNCTION);
        PCALL(PCALL_FAST_FUNCTION);
        if (argdefs == NULL && co->co_argcount == n && nk==0 &&
            co->co_flags == (CO_OPTIMIZED | CO_NEWLOCALS | CO_NOFREE)) {
                # デフォルト引数がないときの、高速な関数呼び出し処理
                PyFrameObject *f;
                PyObject *retval = NULL;
                PyThreadState *tstate = PyThreadState_GET();
                PyObject **fastlocals, **stack;
                int i;

                PCALL(PCALL_FASTER_FUNCTION);
                assert(globals != NULL);
                /* XXX Perhaps we should create a specialized
                   PyFrame_New() that doesn't take locals, but does
                   take builtins without sanity checking them.
                */
                # tstate:thread state, co:code object
                f = PyFrame_New(tstate, co, globals, NULL);
                if (f == NULL)
                        return NULL;

                fastlocals = f->f_localsplus;
                stack = (*pp_stack) - n;

                for (i = 0; i < n; i++) {
                        Py_INCREF(*stack);
                        fastlocals[i] = *stack++;
                }
                retval = PyEval_EvalFrame(f);
                assert(tstate != NULL);
                ++tstate->recursion_depth;
                Py_DECREF(f);
                --tstate->recursion_depth;
                return retval;
        }
        # デフォルト引数があるときの、低速になっても許される関数呼び出し
        if (argdefs != NULL) {
                # デフォルト引数が存在するときの処理
                d = &PyTuple_GET_ITEM(argdefs, 0);
                nd = ((PyTupleObject *)argdefs)->ob_size;
        }
        # デフォルト引数があるときの関数呼び出し
        return PyEval_EvalCodeEx(co, globals,
                                 (PyObject *)NULL, (*pp_stack)-n, na,
                                 (*pp_stack)-2*nk, nk, d, nd,
                                 PyFunction_GET_CLOSURE(func));
}
fast_function(.)→PyEval_EvalCodeEx(.)

デフォルト引数またはキーワード引数があるときのための、少し遅い関数呼び出し処理です。

func_code.co_argcount で引数の数が解ります。この数と MAKE_FUNCTION が作った func_defaults タプルと組み合わせれば、関数呼び出しに対して default 引数値で補うべき引数値が決まります。FromeObject.f_localsplus の引数値部分を設定できます。下のように、co_varnames ローカル変数文字列タプルの最初に引数の順序に従って引数名が記録されていますから。

//@@
#07.06.27
def testF(inAg, inKeywdAg , inDfAg=3, inDfAg2='ABC'):
    inAt2=3
    inAt = "a"
    inAt3=3
    inAt0=3
    print str(inDfAg) + inDfAg2 + inAt

print testF.func_code.co_varnames
//@@@
('inAg', 'inKeywdAg', 'inDfAg', 'inDfAg2', 'inAt', 'inAt3', 'inAt2', 'inAt0')
/* this is gonna seem *real weird*, but if you put some other code between
   PyEval_EvalFrame() and PyEval_EvalCodeEx() you will need to adjust
        the test in the if statement in Misc/gdbinit:ppystack */

PyObject *
PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
           PyObject **args, int argcount, PyObject **kws, int kwcount,
           PyObject **defs, int defcount, PyObject *closure)
{
        register PyFrameObject *f;
        register PyObject *retval = NULL;
        register PyObject **fastlocals, **freevars;
        PyThreadState *tstate = PyThreadState_GET();
        PyObject *x, *u;

        if (globals == NULL) {
                PyErr_SetString(PyExc_SystemError,
                                "PyEval_EvalCodeEx: NULL globals");
                return NULL;
        }

        assert(globals != NULL);
        f = PyFrame_New(tstate, co, globals, locals);
        if (f == NULL)
                return NULL;

        fastlocals = f->f_localsplus;
        freevars = f->f_localsplus + f->f_nlocals;

        if (co->co_argcount > 0 ||
            co->co_flags & (CO_VARARGS | CO_VARKEYWORDS)) {
                int i;
                int n = argcount;
                PyObject *kwdict = NULL;
                if (co->co_flags & CO_VARKEYWORDS) {
                        kwdict = PyDict_New();
                        if (kwdict == NULL)
                                goto fail;
                        i = co->co_argcount;
                        if (co->co_flags & CO_VARARGS)
                                i++;
                        SETLOCAL(i, kwdict);
                }
                if (argcount > co->co_argcount) {
                        if (!(co->co_flags & CO_VARARGS)) {
                                PyErr_Format(PyExc_TypeError,
                                    "%.200s() takes %s %d "
                                    "%sargument%s (%d given)",
                                    PyString_AsString(co->co_name),
                                    defcount ? "at most" : "exactly",
                                    co->co_argcount,
                                    kwcount ? "non-keyword " : "",
                                    co->co_argcount == 1 ? "" : "s",
                                    argcount);
                                goto fail;
                        }
                        n = co->co_argcount;
                }
                for (i = 0; i < n; i++) {
                        x = args[i];
                        Py_INCREF(x);
                        SETLOCAL(i, x);
                }
                if (co->co_flags & CO_VARARGS) {
                        u = PyTuple_New(argcount - n);
                        if (u == NULL)
                                goto fail;
                        SETLOCAL(co->co_argcount, u);
                        for (i = n; i < argcount; i++) {
                                x = args[i];
                                Py_INCREF(x);
                                PyTuple_SET_ITEM(u, i-n, x);
                        }
                }
                for (i = 0; i < kwcount; i++) {
                        PyObject *keyword = kws[2*i];
                        PyObject *value = kws[2*i + 1];
                        int j;
                        if (keyword == NULL || !PyString_Check(keyword)) {
                                PyErr_Format(PyExc_TypeError,
                                    "%.200s() keywords must be strings",
                                    PyString_AsString(co->co_name));
                                goto fail;
                        }
                        /* XXX slow -- speed up using dictionary? */
                        for (j = 0; j < co->co_argcount; j++) {
                                PyObject *nm = PyTuple_GET_ITEM(
                                        co->co_varnames, j);
                                int cmp = PyObject_RichCompareBool(
                                        keyword, nm, Py_EQ);
                                if (cmp > 0)
                                        break;
                                else if (cmp < 0)
                                        goto fail;
                        }
                        /* Check errors from Compare */
                        if (PyErr_Occurred())
                                goto fail;
                        if (j >= co->co_argcount) {
                                if (kwdict == NULL) {
                                        PyErr_Format(PyExc_TypeError,
                                            "%.200s() got an unexpected "
                                            "keyword argument '%.400s'",
                                            PyString_AsString(co->co_name),
                                            PyString_AsString(keyword));
                                        goto fail;
                                }
                                PyDict_SetItem(kwdict, keyword, value);
                        }
                        else {
                                if (GETLOCAL(j) != NULL) {
                                        PyErr_Format(PyExc_TypeError,
                                             "%.200s() got multiple "
                                             "values for keyword "
                                             "argument '%.400s'",
                                             PyString_AsString(co->co_name),
                                             PyString_AsString(keyword));
                                        goto fail;
                                }
                                Py_INCREF(value);
                                SETLOCAL(j, value);
                        }
                }
                if (argcount < co->co_argcount) {
                        int m = co->co_argcount - defcount;
                        for (i = argcount; i < m; i++) {
                                if (GETLOCAL(i) == NULL) {
                                        PyErr_Format(PyExc_TypeError,
                                            "%.200s() takes %s %d "
                                            "%sargument%s (%d given)",
                                            PyString_AsString(co->co_name),
                                            ((co->co_flags & CO_VARARGS) ||
                                             defcount) ? "at least"
                                                       : "exactly",
                                            m, kwcount ? "non-keyword " : "",
                                            m == 1 ? "" : "s", i);
                                        goto fail;
                                }
                        }
                        if (n > m)
                                i = n - m;
                        else
                                i = 0;
                        for (; i < defcount; i++) {
                                if (GETLOCAL(m+i) == NULL) {
                                        PyObject *def = defs[i];
                                        Py_INCREF(def);
                                        SETLOCAL(m+i, def);
                                }
                        }
                }
        }
        else {
                if (argcount > 0 || kwcount > 0) {
                        PyErr_Format(PyExc_TypeError,
                                     "%.200s() takes no arguments (%d given)",
                                     PyString_AsString(co->co_name),
                                     argcount + kwcount);
                        goto fail;
                }
        }
        /* Allocate and initialize storage for cell vars, and copy free
           vars into frame.  This isn't too efficient right now. */
        if (f->f_ncells) {
                int i = 0, j = 0, nargs, found;
                char *cellname, *argname;
                PyObject *c;

                nargs = co->co_argcount;
                if (co->co_flags & CO_VARARGS)
                        nargs++;
                if (co->co_flags & CO_VARKEYWORDS)
                        nargs++;

                /* Check for cells that shadow args */
                for (i = 0; i < f->f_ncells && j < nargs; ++i) {
                        cellname = PyString_AS_STRING(
                                PyTuple_GET_ITEM(co->co_cellvars, i));
                        found = 0;
                        while (j < nargs) {
                                argname = PyString_AS_STRING(
                                        PyTuple_GET_ITEM(co->co_varnames, j));
                                if (strcmp(cellname, argname) == 0) {
                                        c = PyCell_New(GETLOCAL(j));
                                        if (c == NULL)
                                                goto fail;
                                        GETLOCAL(f->f_nlocals + i) = c;
                                        found = 1;
                                        break;
                                }
                                j++;
                        }
                        if (found == 0) {
                                c = PyCell_New(NULL);
                                if (c == NULL)
                                        goto fail;
                                SETLOCAL(f->f_nlocals + i, c);
                        }
                }
                /* Initialize any that are left */
                while (i < f->f_ncells) {
                        c = PyCell_New(NULL);
                        if (c == NULL)
                                goto fail;
                        SETLOCAL(f->f_nlocals + i, c);
                        i++;
                }
        }
        if (f->f_nfreevars) {
                int i;
                for (i = 0; i < f->f_nfreevars; ++i) {
                        PyObject *o = PyTuple_GET_ITEM(closure, i);
                        Py_INCREF(o);
                        freevars[f->f_ncells + i] = o;
                }
        }

        if (co->co_flags & CO_GENERATOR) {
                /* Don't need to keep the reference to f_back, it will be set
                 * when the generator is resumed. */
                Py_XDECREF(f->f_back);
                f->f_back = NULL;

                PCALL(PCALL_GENERATOR);

                /* Create a new generator that owns the ready to run frame
                 * and return that as the value. */
                return PyGen_New(f);
        }

        retval = PyEval_EvalFrame(f);

  fail: /* Jump here from prelude on failure */

        /* decref'ing the frame can cause __del__ methods to get invoked,
           which can call back into Python.  While we're done with the
           current Python frame (f), the associated C stack is still in use,
           so recursion_depth must be boosted for the duration.
        */
        assert(tstate != NULL);
        ++tstate->recursion_depth;
        Py_DECREF(f);
        --tstate->recursion_depth;
        return retval;
}
call_function(.)→do_call(.)

キーワード引数があるときの関数呼び出し処理です。

static PyObject *
do_call(PyObject *func, PyObject ***pp_stack, int na, int nk)
{
        PyObject *callargs = NULL;
        PyObject *kwdict = NULL;
        PyObject *result = NULL;

        if (nk > 0) {
                kwdict = update_keyword_args(NULL, nk, pp_stack, func);
                if (kwdict == NULL)
                        goto call_fail;
        }
        callargs = load_args(pp_stack, na);
        if (callargs == NULL)
                goto call_fail;
#ifdef CALL_PROFILE
        /* At this point, we have to look at the type of func to
           update the call stats properly.  Do it here so as to avoid
           exposing the call stats machinery outside ceval.c
        */
        if (PyFunction_Check(func))
                # static int pcall[PCALL_NUM];
                #define PCALL(POS) pcall[POS]++
                #define PCALL_FUNCTION 1
                PCALL(PCALL_FUNCTION);
        else if (PyMethod_Check(func))
                PCALL(PCALL_METHOD);
        else if (PyType_Check(func))
                PCALL(PCALL_TYPE);
        else
                PCALL(PCALL_OTHER);
#endif
        result = PyObject_Call(func, callargs, kwdict);
 call_fail:
        Py_XDECREF(callargs);
        Py_XDECREF(kwdict);
        return result;
}
do_call(.)→PyObject_Call(.)
PyObject *
PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw)
{
        ternaryfunc call;

    if ((call = func->ob_type->tp_call) != NULL) {
        PyObject *result = (*call)(func, arg, kw);
        if (result == NULL && !PyErr_Occurred())
            PyErr_SetString(
                PyExc_SystemError,
                "NULL result without error in PyObject_Call");
        return result;
    }
    PyErr_Format(PyExc_TypeError, "'%s' object is not callable",
             func->ob_type->tp_name);
    return NULL;
}

STORE_GLOBAL/STORE_NAME 関連

    case STORE_GLOBAL:
            w = GETITEM(names, oparg);
            v = POP();
            err = PyDict_SetItem(f-<f_globals, w, v);
            Py_DECREF(v);
            if (err == 0) continue;
            break;

    case STORE_NAME:
            w = GETITEM(names, oparg);
            v = POP();      # value
            if ((x = f-<f_locals) != NULL) {
                    #define PyDict_CheckExact(op) ((op)->ob_type == &PyDict_Type)
                    if (PyDict_CheckExact(x))
                            # f_locals が辞書:PyDict_Type なので STORE_GLOBAL の
                            # ときと同様に、変数名文字列を使って hash 値経由で 値
                            # 設定も行っていく。デフォルト辞書サイズでは足りなくな
                            # ったら辞書の作り直しも行う
                            err = PyDict_SetItem(x, w, v);
                    else
                            # w の ob_ival 属性を使って local 変数値をインデックスで設定する
                            err = PyObject_SetItem(x, w, v);
                    Py_DECREF(v);
                    if (err == 0) continue;
                    break;
            }
            PyErr_Format(PyExc_SystemError,
                         "no locals found when storing %s",
                         PyObject_REPR(w));
            break;
                 ・
                 ・
    case LOAD_NAME:
            w = GETITEM(names, oparg);
            if ((v = f->f_locals) == NULL) {
                    PyErr_Format(PyExc_SystemError,
                                 "no locals when loading %s",
                                 PyObject_REPR(w));
                    break;
            }
            if (PyDict_CheckExact(v)) {
                    x = PyDict_GetItem(v, w);
                    Py_XINCREF(x);
            }
            else {
                    x = PyObject_GetItem(v, w);
                    if (x == NULL && PyErr_Occurred()) {
                            if (!PyErr_ExceptionMatches(PyExc_KeyError))
                                    break;
                            PyErr_Clear();
                    }
            }
            if (x == NULL) {
                    x = PyDict_GetItem(f->f_globals, w);
                    if (x == NULL) {
                            x = PyDict_GetItem(f->f_builtins, w);
                            if (x == NULL) {
                                    format_exc_check_arg(
                                                PyExc_NameError,
                                                NAME_ERROR_MSG ,w);
                                    break;
                            }
                    }
                    Py_INCREF(x);
            }
            PUSH(x);
            continue;


/* CAUTION: PyDict_SetItem() must guarantee that it won't resize the
 * dictionary if it is merely replacing the value for an existing key.
 * This is means that it's safe to loop over a dictionary with
 * PyDict_Next() and occasionally replace a value -- but you can't
 * insert new keys or remove them.
 */
                                w = GETITEM(names, oparg); w は変数名文字列だけで
                                なく, ob_type, ob_ival:index 値などの属性も備えている
                   f_locals                │        v = POP();
int                    ↓                  ↓           ↓
PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
{
    register dictobject *mp;
    register long hash;
    register int n_used;

    if (!PyDict_Check(op)) {
        PyErr_BadInternalCall();
        return -1;
    }
    mp = (dictobject *)op;
    if (PyString_CheckExact(key)) {     # PyDict_Type に PyDict_Check(op) 結果が残っている
        hash = ((PyStringObject *)key)->ob_shash;
        if (hash == -1)
            hash = PyObject_Hash(key);
    }
    else {
        hash = PyObject_Hash(key);
        if (hash == -1)
            return -1;
    }
    assert(mp->ma_fill <= mp->ma_mask);  /* at least one empty slot */
    n_used = mp->ma_used;
    Py_INCREF(value);
    Py_INCREF(key);
    insertdict(mp, key, hash, value);
    /* If we added a key, we can safely resize.  Otherwise just return!
     * If fill >= 2/3 size, adjust size.  Normally, this doubles or
     * quaduples the size, but it's also possible for the dict to shrink
     * (if ma_fill is much larger than ma_used, meaning a lot of dict
     * keys have been * deleted).
     *
     * Quadrupling the size improves average dictionary sparseness
     * (reducing collisions) at the cost of some memory and iteration
     * speed (which loops over every possible entry).  It also halves
     * the number of expensive resize operations in a growing dictionary.
     *
     * Very large dictionaries (over 50K items) use doubling instead.
     * This may help applications with severe memory constraints.
     */
    if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
        return 0;
    return dictresize(mp, mp->ma_used*(mp->ma_used>50000 ? 2 : 4));
}


PyObject *
PyDict_GetItem(PyObject *op, PyObject *key)
{
    long hash;
    dictobject *mp = (dictobject *)op;
    if (!PyDict_Check(op)) {
        return NULL;
    }
    if (mp->ma_table == NULL)
        return NULL;
#ifdef CACHE_HASH
    if (!PyString_Check(key) ||
        (hash = ((PyStringObject *) key)->ob_shash) == -1)
#endif
    {
        hash = PyObject_Hash(key);
        if (hash == -1) {
            PyErr_Clear();
            return NULL;
        }
    }
    return (mp->ma_lookup)(mp, key, hash)->me_value;
}


int
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
{
    PyMappingMethods *m;

    if (o == NULL || key == NULL || value == NULL) {
        null_error();
        return -1;
    }
    m = o->ob_type->tp_as_mapping;
    if (m && m->mp_ass_subscript)
        return m->mp_ass_subscript(o, key, value);

    if (o->ob_type->tp_as_sequence) {
        #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type)
        if (PyInt_Check(key))
            return PySequence_SetItem(o, PyInt_AsLong(key), value);
        else if (PyLong_Check(key)) {
            long key_value = PyLong_AsLong(key);
            if (key_value == -1 && PyErr_Occurred())
                return -1;
            return PySequence_SetItem(o, key_value, value);
        }
        else if (o->ob_type->tp_as_sequence->sq_ass_item) {
            type_error("sequence index must be integer");
            return -1;
        }
    }

    type_error("object does not support item assignment");
    return -1;
}

PyObject *
PyObject_GetItem(PyObject *o, PyObject *key)
{
    PyMappingMethods *m;

    if (o == NULL || key == NULL)
        return null_error();

    m = o->ob_type->tp_as_mapping;
    if (m && m->mp_subscript)
        return m->mp_subscript(o, key);

    if (o->ob_type->tp_as_sequence) {
        if (PyInt_Check(key))
            return PySequence_GetItem(o, PyInt_AsLong(key));
        else if (PyLong_Check(key)) {
            long key_value = PyLong_AsLong(key);
            if (key_value == -1 && PyErr_Occurred())
                return NULL;
            return PySequence_GetItem(o, key_value);
        }
        return type_error("sequence index must be integer");
    }

    return type_error("unsubscriptable object");
}


long
PyInt_AsLong(register PyObject *op)
{
    PyNumberMethods *nb;
    PyIntObject *io;
    long val;

    #define PyInt_Check(op) ((op)->ob_type == &PyInt_Type)
    if (op && PyInt_Check(op))
        #define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival)
        return PyInt_AS_LONG((PyIntObject*) op);

    if (op == NULL || (nb = op->ob_type->tp_as_number) == NULL ||
        nb->nb_int == NULL) {
        PyErr_SetString(PyExc_TypeError, "an integer is required");
        return -1;
    }

    io = (PyIntObject*) (*nb->nb_int) (op);
    if (io == NULL)
        return -1;
    if (!PyInt_Check(io)) {
        PyErr_SetString(PyExc_TypeError,
                "nb_int should return int object");
        return -1;
    }

    #define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival)
    val = PyInt_AS_LONG(io);
    Py_DECREF(io);

    return val;
}

long
PyObject_Hash(PyObject *v)
{
    PyTypeObject *tp = v->ob_type;
    if (tp->tp_hash != NULL)
        return (*tp->tp_hash)(v);
    if (tp->tp_compare == NULL && RICHCOMPARE(tp) == NULL) {
        return _Py_HashPointer(v); /* Use address as hash value */
    }
    /* If there's a cmp but no hash defined, the object can't be hashed */
    PyErr_SetString(PyExc_TypeError, "unhashable type");
    return -1;
}


hash

Python 内部では FrameObject.f_globals など様々の辞書データが使われています。Interpreter の実行時に変数名文字列と実際のオブジェクト・データの参照対応が辞書を使って解決されていきます。でも文字列の比較には時間がかかります。そこで辞書の中へのの配置位置の計算を hash 関数を使って高速化してます。単純な配列よりは遅いのですが、文字列を比較しながら辞書データ全体をスキャンするよりは、ずっと早くなります。C コンパイラでは許されない選択ですが、interpreter ならば柔軟性とのトレード・オフで許される選択です。

この hash 関数の呼び出しは結構複雑になっています。文字列などのオブジェクトに hash 値を持たせています。何度も hash 関数を計算することを避けるためです。オブジェクトごとに備える hash 関数も大部分が hash 関数への関数ポインタを持たせる実装になっています。

私自身も まだ hash の扱われ方をきちんと見通せていません。ただ基本にあるのは dictobject.c のようです。ですので dictoject.c のソースをここ に置いておきます。