Call Python code from LLVM JIT

I write a language lexer/parser/compiler in python, that should run in the LLVM JIT-VM (using llvm-py) later. The first two steps are quite straightforward for now, but (even if I didn’t start the compile-task yet) I see a problem, when my code wants to call Python-Code (in general), or interact with the Python lexer/parser/compiler (in special) respectively. My main concern is, that the code should be able to dynamically load additional code into the VM at runtime and thus it must trigger the whole lexer/parser/compiler-chain in Python from within the VM.

First of all: Is this even possible, or is the VM “unmutable” once it is started?

If it is I currently see 3 possible solutions (I am open for other suggestions)

  • “Break out” of the VM and make it possible to call Python functions of the main process directly (maybe by registering it as a LLVM-function, that redirects to the main process somehow). I didn’t found anything about this and anyway I am not sure, if this is a good idea (security and such).
  • Compile the runtime (statically or dynamically at runtime) into LLVM-Assembly/-IR. This requires, that the IR-code is able to modify the VM it runs in
  • Compile the runtime (statically) into a library and load it directly into the VM. Again it must be able to add functions (etc) to the VM it runs in.

Best answer

Like Eli said, there’s not stopping you from calling out to the Python C-API. When you call an external function from inside of the LLVM JIT it effectively just uses dlopen() on the process space so if you’re running from inside of llvmpy you already have all the Python interpreter symbols accessible, you can even interact with the active interpreter that invoked the ExecutionEngine or you can spin a new Python interpreter if needed.

To get you started, create a new C file with our evaluator.

#include <Python.h>

void python_eval(const char* s)
{
    PyCodeObject* code = (PyCodeObject*) Py_CompileString(s, "example", Py_file_input);

    PyObject* main_module = PyImport_AddModule("__main__");
    PyObject* global_dict = PyModule_GetDict(main_module);
    PyObject* local_dict = PyDict_New();
    PyObject* obj = PyEval_EvalCode(code, global_dict, local_dict);

    PyObject* result = PyObject_Str(obj);

    // Print the result if you want.
    // PyObject_Print(result, stdout, 0);
}

Here’s a little Makefile to compile that:

CC = gcc
LPYTHON = $(shell python-config --includes)
CFLAGS = -shared -fPIC -lpthread $(LPYTHON)

.PHONY: all clean

all:
    $(CC) $(CFLAGS) cbits.c -o cbits.so

clean:
    -rm cbits.c

Then we start with the usual boilerplate for LLVM but use ctypes to load the shared object of our cbits.so shared library into the global process space so that we have the python_eval symbol. Then just create a simple LLVM module with a function, allocate a string with some Python source with ctypes and pass the pointer to the ExecutionEngine running the JIT’d function from our module, which in turns passes the Python source to the C-function which invokes the Python C-API and then yields back to the LLVM JIT.

import llvm.core as lc
import llvm.ee as le

import ctypes
import inspect

ctypes._dlopen('./cbits.so', ctypes.RTLD_GLOBAL)

pointer = lc.Type.pointer

i32 = lc.Type.int(32)
i64 = lc.Type.int(64)

char_type  = lc.Type.int(8)
string_type = pointer(char_type)

zero = lc.Constant.int(i64, 0)

def build():
    mod = lc.Module.new('call python')
    evalfn = lc.Function.new(mod,
        lc.Type.function(lc.Type.void(),
        [string_type], False), "python_eval")

    funty = lc.Type.function(lc.Type.void(), [string_type])

    fn = lc.Function.new(mod, funty, "call")
    fn_arg0 = fn.args[0]
    fn_arg0.name = "input"

    block = fn.append_basic_block("entry")
    builder = lc.Builder.new(block)

    builder.call(evalfn, [fn_arg0])
    builder.ret_void()

    return fn, mod

def run(fn, mod, buf):

    tm = le.TargetMachine.new(features='', cm=le.CM_JITDEFAULT)
    eb = le.EngineBuilder.new(mod)
    engine = eb.create(tm)

    ptr = ctypes.cast(buf, ctypes.c_voidp)
    ax = le.GenericValue.pointer(ptr.value)

    print 'IR'.center(80, '=')
    print mod

    mod.verify()
    print 'Assembly'.center(80, '=')
    print mod.to_native_assembly()

    print 'Result'.center(80, '=')
    engine.run_function(fn, [ax])

if __name__ == '__main__':
    # If you want to evaluate the source of an existing function
    # source_str = inspect.getsource(mypyfn)

    # If you want to pass a source string
    source_str = "print 'Hello from Python C-API inside of LLVM!'"

    buf = ctypes.create_string_buffer(source_str)
    fn, mod = build()
    run(fn, mod, buf)

You should the following output:

=======================================IR=======================================
; ModuleID = 'call python'

declare void @python_eval(i8*)

define void @call(i8* %input) {
entry:
  call void @python_eval(i8* %input)
  ret void
}
=====================================Result=====================================
Hello from Python C-API inside of LLVM!