Cython
Example from here and this tutorial
Cython is a superset of python programming language that gives access into c/c++ constructs. - Cython acts as a bridge between Python and C/C++.
Load cython notebook extension (and i have already installed cython )
[1]:
pip install Cython
Requirement already satisfied: Cython in /srv/conda/envs/notebook/lib/python3.7/site-packages (0.29.24)
Note: you may need to restart the kernel to use updated packages.
[2]:
%load_ext cython
[3]:
%%cython
'''Inside this cell is Cython Code only
'''
def first_cython_function(int i):
return i * 3.1415926;
[4]:
# use cython function in a separate (python) cell
first_cython_function(10)
[4]:
31.415926
[5]:
%%cython?
Docstring:
::
%cython [-a] [-+] [-3] [-2] [-f] [-c COMPILE_ARGS]
[--link-args LINK_ARGS] [-l LIB] [-n NAME] [-L dir] [-I INCLUDE]
[-S SRC] [--pgo] [--verbose]
Compile and import everything from a Cython code cell.
The contents of the cell are written to a `.pyx` file in the
directory `IPYTHONDIR/cython` using a filename with the hash of the
code. This file is then cythonized and compiled. The resulting module
is imported and all of its symbols are injected into the user's
namespace. The usage is similar to that of `%%cython_pyximport` but
you don't have to pass a module name::
%%cython
def f(x):
return 2.0*x
To compile OpenMP codes, pass the required `--compile-args`
and `--link-args`. For example with gcc::
%%cython --compile-args=-fopenmp --link-args=-fopenmp
...
To enable profile guided optimisation, pass the ``--pgo`` option.
Note that the cell itself needs to take care of establishing a suitable
profile when executed. This can be done by implementing the functions to
optimise, and then calling them directly in the same cell on some realistic
training data like this::
%%cython --pgo
def critical_function(data):
for item in data:
...
# execute function several times to build profile
from somewhere import some_typical_data
for _ in range(100):
critical_function(some_typical_data)
In Python 3.5 and later, you can distinguish between the profile and
non-profile runs as follows::
if "_pgo_" in __name__:
... # execute critical code here
optional arguments:
-a, --annotate Produce a colorized HTML version of the source.
-+, --cplus Output a C++ rather than C file.
-3 Select Python 3 syntax.
-2 Select Python 2 syntax.
-f, --force Force the compilation of a new module, even if the
source has been previously compiled.
-c COMPILE_ARGS, --compile-args COMPILE_ARGS
Extra flags to pass to compiler via the
`extra_compile_args` Extension flag (can be specified
multiple times).
--link-args LINK_ARGS
Extra flags to pass to linker via the
`extra_link_args` Extension flag (can be specified
multiple times).
-l LIB, --lib LIB Add a library to link the extension against (can be
specified multiple times).
-n NAME, --name NAME Specify a name for the Cython module.
-L dir Add a path to the list of library directories (can be
specified multiple times).
-I INCLUDE, --include INCLUDE
Add a path to the list of include directories (can be
specified multiple times).
-S SRC, --src SRC Add a path to the list of src files (can be specified
multiple times).
--pgo Enable profile guided optimisation in the C compiler.
Compiles the cell twice and executes it in between to
generate a runtime profile.
--verbose Print debug information like generated .c/.cpp file
location and exact gcc/g++ command invoked.
File: /srv/conda/envs/notebook/lib/python3.7/site-packages/Cython/Build/IpythonMagic.py
[6]:
def foo(a, b):
return a + b
[7]:
from dis import dis
# disassebles compiled python objects
dis(foo)
2 0 LOAD_FAST 0 (a)
2 LOAD_FAST 1 (b)
4 BINARY_ADD
6 RETURN_VALUE
[8]:
%%cython
def cyfoo(a, b):
return a + b
[9]:
%timeit foo(1000000, 2000000)
120 ns ± 3.57 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[10]:
%timeit cyfoo(1000000, 2000000)
110 ns ± 2.29 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[11]:
%timeit foo('x', 'y')
139 ns ± 3.65 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[12]:
%timeit cyfoo('x', 'y')
138 ns ± 5.23 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[13]:
def pyfac(n):
if n <= 1:
return 1
return n * pyfac(n -1)
[14]:
%timeit pyfac(20.0)
3.51 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
[44]:
%%cython
def cyfac(n):
if n <= 1:
return 1
return n * cyfac(n-1)
def cyfac_double(double n):
if n <= 1:
return 1.0
return n * cyfac_double(n-1)
[16]:
%timeit cyfac(20.0)
1.46 µs ± 47.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
[17]:
%timeit cyfac_double(20.0)
914 ns ± 71 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
[43]:
%%cython -a
cpdef double cyfac_double_fast(double n):
if n <= 1:
return 1.0
return n * cyfac_double_fast(n - 1)
[43]:
Generated by Cython 0.29.24
Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.
1:
+2: cpdef double cyfac_double_fast(double n):
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/
static double __pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(double __pyx_v_n, CYTHON_UNUSED int __pyx_skip_dispatch) {
double __pyx_r;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("cyfac_double_fast", 0);
/* … */
/* function exit code */
__pyx_L0:;
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n) {
double __pyx_v_n;
PyObject *__pyx_r = 0;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("cyfac_double_fast (wrapper)", 0);
assert(__pyx_arg_n); {
__pyx_v_n = __pyx_PyFloat_AsDouble(__pyx_arg_n); if (unlikely((__pyx_v_n == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 2, __pyx_L3_error)
}
goto __pyx_L4_argument_unpacking_done;
__pyx_L3_error:;
__Pyx_AddTraceback("_cython_magic_88dfdb3a94d86c859cfd2127a1590a68.cyfac_double_fast", __pyx_clineno, __pyx_lineno, __pyx_filename);
__Pyx_RefNannyFinishContext();
return NULL;
__pyx_L4_argument_unpacking_done:;
__pyx_r = __pyx_pf_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(__pyx_self, ((double)__pyx_v_n));
int __pyx_lineno = 0;
const char *__pyx_filename = NULL;
int __pyx_clineno = 0;
/* function exit code */
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
static PyObject *__pyx_pf_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_n) {
PyObject *__pyx_r = NULL;
__Pyx_RefNannyDeclarations
__Pyx_RefNannySetupContext("cyfac_double_fast", 0);
__Pyx_XDECREF(__pyx_r);
__pyx_t_1 = PyFloat_FromDouble(__pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(__pyx_v_n, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
__Pyx_GOTREF(__pyx_t_1);
__pyx_r = __pyx_t_1;
__pyx_t_1 = 0;
goto __pyx_L0;
/* function exit code */
__pyx_L1_error:;
__Pyx_XDECREF(__pyx_t_1);
__Pyx_AddTraceback("_cython_magic_88dfdb3a94d86c859cfd2127a1590a68.cyfac_double_fast", __pyx_clineno, __pyx_lineno, __pyx_filename);
__pyx_r = NULL;
__pyx_L0:;
__Pyx_XGIVEREF(__pyx_r);
__Pyx_RefNannyFinishContext();
return __pyx_r;
}
+3: if n <= 1:
__pyx_t_1 = ((__pyx_v_n <= 1.0) != 0);
if (__pyx_t_1) {
/* … */
}
+4: return 1.0
__pyx_r = 1.0;
goto __pyx_L0;
+5: return n * cyfac_double_fast(n - 1)
__pyx_r = (__pyx_v_n * __pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast((__pyx_v_n - 1.0), 0)); goto __pyx_L0;
[19]:
%timeit cyfac_double_fast(20.0)
127 ns ± 2.17 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
Integral Types
[20]:
%%cython
# cdef is an directive , telling objects are c objects
cdef:
int i = 0
unsigned long j = 1
signed short k = -3
bint flag = True
long long ll = 1LL
float a = 1.0
double b = -2.0
long double c= 1e5
str s = "abc"
print(i, j, k, ll, flag, a, b, c, s)
0 1 -3 1 True 1.0 -2.0 100000.0 abc
[21]:
%%cython
import datetime
cimport cpython.datetime
import array
cimport cpython.array
Example spin field
[22]:
import numpy as np
[23]:
def random_spin_field(N, M):
return np.random.choice([-1, 1], size = (N, M))
[24]:
field = random_spin_field(10, 10)
field
[24]:
array([[-1, -1, -1, -1, -1, 1, -1, -1, 1, -1],
[-1, 1, -1, 1, -1, 1, 1, -1, 1, -1],
[-1, -1, 1, -1, -1, 1, -1, 1, -1, 1],
[ 1, -1, -1, -1, -1, -1, 1, 1, 1, -1],
[-1, -1, -1, 1, -1, 1, 1, 1, 1, 1],
[-1, 1, -1, 1, -1, -1, 1, -1, 1, -1],
[ 1, 1, -1, 1, 1, 1, 1, -1, 1, 1],
[-1, -1, 1, 1, -1, 1, 1, -1, 1, -1],
[ 1, -1, 1, -1, -1, -1, 1, -1, -1, 1],
[ 1, 1, -1, 1, -1, -1, 1, -1, 1, -1]])
[25]:
# pip install pillow
from PIL import Image
[26]:
def display_spin_field(field):
return Image.fromarray(np.uint8((field +1 )* 0.5 * 255)) # expects nu. between 0 and 255
[27]:
display_spin_field(random_spin_field(200,200))
[27]:
[28]:
def ising_step(field, beta=0.5):
N, M = field.shape
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_ising_update(field, n, m, beta)
return field
def _ising_update(field, n, m, beta):
total = 0
N, M = field.shape
for i in range(n-1, n+2):
for j in range(m-1, m+2):
if i == n and j == m:
continue
total += field[i%N, j%M]
dE = 2 * field[n, m] * total
if dE < 0:
field[n, m] *= -1 # switch
elif np.exp(-dE * beta) > np.random.rand():
field[n, m] *= -1
[29]:
display_spin_field(ising_step(random_spin_field(200, 200)))
[29]:
[30]:
# Animate using ipywidgets
[31]:
from ipywidgets import interact
[32]:
def display_ising_sequence(images):
def _show(frame=(0, len(images) -1)):
return display_spin_field(images[frame])
return interact(_show)
[33]:
images = [random_spin_field(200, 200)]
for i in range(50):
images.append(ising_step(images[-1].copy()))
display_ising_sequence(images)
[33]:
<function __main__.display_ising_sequence.<locals>._show(frame=(0, 50))>
[34]:
%%cython
import numpy as np
cimport numpy as np # gives you access to Numpy C API
def cy_ising_step(field, beta=0.5):
N, M = field.shape
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_cy_ising_update(field, n, m, beta)
return field
def _cy_ising_update(field, n, m, beta):
total = 0
N, M = field.shape
for i in range(n-1, n+2):
for j in range(m-1, m+2):
if i == n and j == m:
continue
total += field[i%N, j%M]
dE = 2 * field[n, m] * total
if dE < 0:
field[n, m] *= -1 # switch
elif np.exp(-dE * beta) > np.random.rand():
field[n, m] *= -1
[35]:
field = random_spin_field(200, 200)
%timeit ising_step(field)
%timeit cy_ising_step(field)
417 ms ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
393 ms ± 33.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
[36]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API
def cy_ising_step_fast(long[:, :]field, float beta=0.5):
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int n_offset, m_offset, n, m
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_cy_ising_update_fast(field, n, m, beta)
return field
cdef _cy_ising_update_fast(long[:,:]field, int n, int m, float beta):
cdef int total = 0
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int i, j
for i in range(n-1, n+2):
for j in range(m-1, m+2):
if i == n and j == m:
continue
total += field[i%N, j%M]
cdef float dE = 2 * field[n, m] * total
if dE < 0:
field[n, m] *= -1 # switch
elif np.exp(-dE * beta) > np.random.rand():
field[n, m] *= -1
[37]:
field = random_spin_field(200, 200)
%timeit ising_step(field)
%timeit cy_ising_step(field)
%timeit cy_ising_step_fast(field)
414 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
382 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
64.5 ms ± 2.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
[38]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API
from libc.math cimport exp
from libc.stdlib cimport rand
cdef extern from "limits.h":
int RAND_MAX
def cy_ising_step_faster(long[:, :]field, float beta=0.5):
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int n_offset, m_offset, n, m
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_cy_ising_update_faster(field, n, m, beta)
return field
cdef _cy_ising_update_faster(long[:,:]field, int n, int m, float beta):
cdef int total = 0
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int i, j
for i in range(n-1, n+2):
for j in range(m-1, m+2):
if i == n and j == m:
continue
total += field[i%N, j%M]
cdef float dE = 2 * field[n, m] * total
if dE < 0:
field[n, m] *= -1 # switch
elif exp(-dE * beta) > rand()/RAND_MAX:
field[n, m] *= -1
[39]:
field = random_spin_field(200, 200)
%timeit ising_step(field)
%timeit cy_ising_step(field)
%timeit cy_ising_step_fast(field)
%timeit cy_ising_step_faster(field)
418 ms ± 13.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
386 ms ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
63.6 ms ± 1.61 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
5.54 ms ± 259 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
[40]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API
cimport cython
from libc.math cimport exp
from libc.stdlib cimport rand
cdef extern from "limits.h":
int RAND_MAX
@cython.boundscheck(False) # dont check index boundaries
@cython.wraparound(False) # can not use negative index
def cy_ising_step_fastest(long[:, :]field, float beta=0.5):
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int n_offset, m_offset, n, m
for n_offset in range(2):
for m_offset in range(2):
for n in range(n_offset, N, 2):
for m in range(m_offset, M, 2):
_cy_ising_update_fastest(field, n, m, beta)
return field
cdef _cy_ising_update_fastest(long[:,:]field, int n, int m, float beta):
cdef int total = 0
cdef int N = field.shape[0]
cdef int M = field.shape[1]
cdef int i, j
for i in range(n-1, n+2):
for j in range(m-1, m+2):
if i == n and j == m:
continue
total += field[i%N, j%M]
cdef float dE = 2 * field[n, m] * total
if dE < 0:
field[n, m] *= -1 # switch
elif exp(-dE * beta) > rand()/RAND_MAX:
field[n, m] *= -1
[41]:
field = random_spin_field(200, 200)
%timeit ising_step(field)
%timeit cy_ising_step(field)
%timeit cy_ising_step_fast(field)
%timeit cy_ising_step_faster(field)
%timeit cy_ising_step_fastest(field)
417 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
377 ms ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
63.9 ms ± 1.95 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
5.48 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.47 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)