Cython

Cython is a superset of python programming language that gives access into c/c++ constructs. - Cython acts as a bridge between Python and C/C++.

Load cython notebook extension (and i have already installed cython )

[1]:
pip install Cython
Requirement already satisfied: Cython in /srv/conda/envs/notebook/lib/python3.7/site-packages (0.29.24)
Note: you may need to restart the kernel to use updated packages.
[2]:
%load_ext cython
[3]:
%%cython
'''Inside this cell is Cython Code only
'''

def first_cython_function(int i):
    return i * 3.1415926;

[4]:
# use cython function in a separate (python) cell
first_cython_function(10)
[4]:
31.415926
[5]:
%%cython?
Docstring:
::

  %cython [-a] [-+] [-3] [-2] [-f] [-c COMPILE_ARGS]
              [--link-args LINK_ARGS] [-l LIB] [-n NAME] [-L dir] [-I INCLUDE]
              [-S SRC] [--pgo] [--verbose]

Compile and import everything from a Cython code cell.

The contents of the cell are written to a `.pyx` file in the
directory `IPYTHONDIR/cython` using a filename with the hash of the
code. This file is then cythonized and compiled. The resulting module
is imported and all of its symbols are injected into the user's
namespace. The usage is similar to that of `%%cython_pyximport` but
you don't have to pass a module name::

    %%cython
    def f(x):
        return 2.0*x

To compile OpenMP codes, pass the required  `--compile-args`
and `--link-args`.  For example with gcc::

    %%cython --compile-args=-fopenmp --link-args=-fopenmp
    ...

To enable profile guided optimisation, pass the ``--pgo`` option.
Note that the cell itself needs to take care of establishing a suitable
profile when executed. This can be done by implementing the functions to
optimise, and then calling them directly in the same cell on some realistic
training data like this::

    %%cython --pgo
    def critical_function(data):
        for item in data:
            ...

    # execute function several times to build profile
    from somewhere import some_typical_data
    for _ in range(100):
        critical_function(some_typical_data)

In Python 3.5 and later, you can distinguish between the profile and
non-profile runs as follows::

    if "_pgo_" in __name__:
        ...  # execute critical code here

optional arguments:
  -a, --annotate        Produce a colorized HTML version of the source.
  -+, --cplus           Output a C++ rather than C file.
  -3                    Select Python 3 syntax.
  -2                    Select Python 2 syntax.
  -f, --force           Force the compilation of a new module, even if the
                        source has been previously compiled.
  -c COMPILE_ARGS, --compile-args COMPILE_ARGS
                        Extra flags to pass to compiler via the
                        `extra_compile_args` Extension flag (can be specified
                        multiple times).
  --link-args LINK_ARGS
                        Extra flags to pass to linker via the
                        `extra_link_args` Extension flag (can be specified
                        multiple times).
  -l LIB, --lib LIB     Add a library to link the extension against (can be
                        specified multiple times).
  -n NAME, --name NAME  Specify a name for the Cython module.
  -L dir                Add a path to the list of library directories (can be
                        specified multiple times).
  -I INCLUDE, --include INCLUDE
                        Add a path to the list of include directories (can be
                        specified multiple times).
  -S SRC, --src SRC     Add a path to the list of src files (can be specified
                        multiple times).
  --pgo                 Enable profile guided optimisation in the C compiler.
                        Compiles the cell twice and executes it in between to
                        generate a runtime profile.
  --verbose             Print debug information like generated .c/.cpp file
                        location and exact gcc/g++ command invoked.
File:      /srv/conda/envs/notebook/lib/python3.7/site-packages/Cython/Build/IpythonMagic.py

[6]:
def foo(a, b):
    return a + b
[7]:
from dis import dis
# disassebles compiled python objects
dis(foo)
  2           0 LOAD_FAST                0 (a)
              2 LOAD_FAST                1 (b)
              4 BINARY_ADD
              6 RETURN_VALUE
[8]:
%%cython
def cyfoo(a, b):
    return a + b
[9]:
%timeit foo(1000000, 2000000)
120 ns ± 3.57 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[10]:
%timeit cyfoo(1000000, 2000000)
110 ns ± 2.29 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[11]:
%timeit foo('x', 'y')
139 ns ± 3.65 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[12]:
%timeit cyfoo('x', 'y')
138 ns ± 5.23 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
[13]:
def pyfac(n):
    if n <= 1:
        return 1
    return n * pyfac(n -1)
[14]:
%timeit pyfac(20.0)
3.51 µs ± 151 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
[44]:
%%cython

def cyfac(n):
    if n <= 1:
        return 1
    return n * cyfac(n-1)

def cyfac_double(double n):
    if n <= 1:
        return 1.0
    return n * cyfac_double(n-1)
[16]:
%timeit cyfac(20.0)
1.46 µs ± 47.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
[17]:
%timeit cyfac_double(20.0)
914 ns ± 71 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
[43]:
%%cython -a

cpdef double cyfac_double_fast(double n):
    if n <= 1:
        return 1.0
    return n * cyfac_double_fast(n - 1)
[43]:
Cython: _cython_magic_88dfdb3a94d86c859cfd2127a1590a68.pyx

Generated by Cython 0.29.24

Yellow lines hint at Python interaction.
Click on a line that starts with a "+" to see the C code that Cython generated for it.

 1: 
+2: cpdef double cyfac_double_fast(double n):
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/
static double __pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(double __pyx_v_n, CYTHON_UNUSED int __pyx_skip_dispatch) {
  double __pyx_r;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cyfac_double_fast", 0);
/* … */
  /* function exit code */
  __pyx_L0:;
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

/* Python wrapper */
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/
static PyObject *__pyx_pw_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_1cyfac_double_fast(PyObject *__pyx_self, PyObject *__pyx_arg_n) {
  double __pyx_v_n;
  PyObject *__pyx_r = 0;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cyfac_double_fast (wrapper)", 0);
  assert(__pyx_arg_n); {
    __pyx_v_n = __pyx_PyFloat_AsDouble(__pyx_arg_n); if (unlikely((__pyx_v_n == (double)-1) && PyErr_Occurred())) __PYX_ERR(0, 2, __pyx_L3_error)
  }
  goto __pyx_L4_argument_unpacking_done;
  __pyx_L3_error:;
  __Pyx_AddTraceback("_cython_magic_88dfdb3a94d86c859cfd2127a1590a68.cyfac_double_fast", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __Pyx_RefNannyFinishContext();
  return NULL;
  __pyx_L4_argument_unpacking_done:;
  __pyx_r = __pyx_pf_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(__pyx_self, ((double)__pyx_v_n));
  int __pyx_lineno = 0;
  const char *__pyx_filename = NULL;
  int __pyx_clineno = 0;

  /* function exit code */
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}

static PyObject *__pyx_pf_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(CYTHON_UNUSED PyObject *__pyx_self, double __pyx_v_n) {
  PyObject *__pyx_r = NULL;
  __Pyx_RefNannyDeclarations
  __Pyx_RefNannySetupContext("cyfac_double_fast", 0);
  __Pyx_XDECREF(__pyx_r);
  __pyx_t_1 = PyFloat_FromDouble(__pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast(__pyx_v_n, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 2, __pyx_L1_error)
  __Pyx_GOTREF(__pyx_t_1);
  __pyx_r = __pyx_t_1;
  __pyx_t_1 = 0;
  goto __pyx_L0;

  /* function exit code */
  __pyx_L1_error:;
  __Pyx_XDECREF(__pyx_t_1);
  __Pyx_AddTraceback("_cython_magic_88dfdb3a94d86c859cfd2127a1590a68.cyfac_double_fast", __pyx_clineno, __pyx_lineno, __pyx_filename);
  __pyx_r = NULL;
  __pyx_L0:;
  __Pyx_XGIVEREF(__pyx_r);
  __Pyx_RefNannyFinishContext();
  return __pyx_r;
}
+3:     if n <= 1:
  __pyx_t_1 = ((__pyx_v_n <= 1.0) != 0);
  if (__pyx_t_1) {
/* … */
  }
+4:         return 1.0
    __pyx_r = 1.0;
    goto __pyx_L0;
+5:     return n * cyfac_double_fast(n - 1)
  __pyx_r = (__pyx_v_n * __pyx_f_46_cython_magic_88dfdb3a94d86c859cfd2127a1590a68_cyfac_double_fast((__pyx_v_n - 1.0), 0));
  goto __pyx_L0;
[19]:
%timeit cyfac_double_fast(20.0)
127 ns ± 2.17 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)

Integral Types

[20]:
%%cython

# cdef is an directive , telling objects are c objects
cdef:
    int i = 0
    unsigned long j = 1
    signed short k = -3
    bint flag = True
    long long ll = 1LL
    float a = 1.0
    double b = -2.0
    long double c= 1e5
    str s = "abc"


print(i, j, k, ll, flag, a, b, c, s)
0 1 -3 1 True 1.0 -2.0 100000.0 abc
[21]:
%%cython

import datetime
cimport cpython.datetime

import array
cimport cpython.array

Example spin field

[22]:
import numpy as np
[23]:
def random_spin_field(N, M):
    return np.random.choice([-1, 1], size = (N, M))
[24]:
field = random_spin_field(10, 10)
field
[24]:
array([[-1, -1, -1, -1, -1,  1, -1, -1,  1, -1],
       [-1,  1, -1,  1, -1,  1,  1, -1,  1, -1],
       [-1, -1,  1, -1, -1,  1, -1,  1, -1,  1],
       [ 1, -1, -1, -1, -1, -1,  1,  1,  1, -1],
       [-1, -1, -1,  1, -1,  1,  1,  1,  1,  1],
       [-1,  1, -1,  1, -1, -1,  1, -1,  1, -1],
       [ 1,  1, -1,  1,  1,  1,  1, -1,  1,  1],
       [-1, -1,  1,  1, -1,  1,  1, -1,  1, -1],
       [ 1, -1,  1, -1, -1, -1,  1, -1, -1,  1],
       [ 1,  1, -1,  1, -1, -1,  1, -1,  1, -1]])
[25]:
# pip install pillow
from PIL import Image
[26]:
def display_spin_field(field):
    return Image.fromarray(np.uint8((field +1 )* 0.5 * 255)) # expects  nu. between 0 and 255
[27]:
display_spin_field(random_spin_field(200,200))
[27]:
../_images/cs_cython_31_0.png
### Ising model logic For every point in grid: energy = my spin * sum of all spins (+1 or -1) of neighbouring points if energy is improved by switching: switch else if we are particularly unlucky: switch anyway
[28]:
def ising_step(field, beta=0.5):
    N, M = field.shape
    for n_offset in range(2):
        for m_offset in range(2):
            for n in range(n_offset, N, 2):
                for m in range(m_offset, M, 2):
                    _ising_update(field, n, m, beta)
    return field


def _ising_update(field, n, m, beta):
    total = 0
    N, M  = field.shape
    for i in range(n-1, n+2):
        for j in range(m-1, m+2):
            if i == n and j == m:
                continue
            total += field[i%N, j%M]
    dE = 2 * field[n, m] * total
    if dE < 0:
        field[n, m] *= -1 # switch
    elif np.exp(-dE * beta) > np.random.rand():
        field[n, m] *= -1
[29]:
display_spin_field(ising_step(random_spin_field(200, 200)))
[29]:
../_images/cs_cython_34_0.png
[30]:
# Animate using ipywidgets
[31]:
from ipywidgets import interact
[32]:
def display_ising_sequence(images):
    def _show(frame=(0, len(images) -1)):
        return display_spin_field(images[frame])
    return interact(_show)
[33]:
images = [random_spin_field(200, 200)]
for i in range(50):
    images.append(ising_step(images[-1].copy()))
display_ising_sequence(images)
[33]:
<function __main__.display_ising_sequence.<locals>._show(frame=(0, 50))>
[34]:
%%cython
import numpy as np
cimport numpy as np # gives you access to Numpy C API
def cy_ising_step(field, beta=0.5):
    N, M = field.shape
    for n_offset in range(2):
        for m_offset in range(2):
            for n in range(n_offset, N, 2):
                for m in range(m_offset, M, 2):
                    _cy_ising_update(field, n, m, beta)
    return field


def _cy_ising_update(field, n, m, beta):
    total = 0
    N, M  = field.shape
    for i in range(n-1, n+2):
        for j in range(m-1, m+2):
            if i == n and j == m:
                continue
            total += field[i%N, j%M]
    dE = 2 * field[n, m] * total
    if dE < 0:
        field[n, m] *= -1 # switch
    elif np.exp(-dE * beta) > np.random.rand():
        field[n, m] *= -1
[35]:
field = random_spin_field(200, 200)

%timeit ising_step(field)

%timeit cy_ising_step(field)
417 ms ± 21.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
393 ms ± 33.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
[36]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API

def cy_ising_step_fast(long[:, :]field, float beta=0.5):
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef  int n_offset, m_offset, n, m

    for n_offset in range(2):
        for m_offset in range(2):
            for n in range(n_offset, N, 2):
                for m in range(m_offset, M, 2):
                    _cy_ising_update_fast(field, n, m, beta)
    return field


cdef _cy_ising_update_fast(long[:,:]field, int n, int m, float beta):
    cdef int total = 0
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef int i, j
    for i in range(n-1, n+2):
        for j in range(m-1, m+2):
            if i == n and j == m:
                continue
            total += field[i%N, j%M]
    cdef float dE = 2 * field[n, m] * total
    if dE < 0:
        field[n, m] *= -1 # switch
    elif np.exp(-dE * beta) > np.random.rand():
        field[n, m] *= -1
[37]:
field = random_spin_field(200, 200)

%timeit ising_step(field)

%timeit cy_ising_step(field)

%timeit cy_ising_step_fast(field)
414 ms ± 10.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
382 ms ± 15.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
64.5 ms ± 2.41 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
[38]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API

from libc.math cimport exp
from libc.stdlib cimport rand
cdef extern from "limits.h":
    int RAND_MAX

def cy_ising_step_faster(long[:, :]field, float beta=0.5):
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef  int n_offset, m_offset, n, m

    for n_offset in range(2):
        for m_offset in range(2):
            for n in range(n_offset, N, 2):
                for m in range(m_offset, M, 2):
                    _cy_ising_update_faster(field, n, m, beta)
    return field


cdef _cy_ising_update_faster(long[:,:]field, int n, int m, float beta):
    cdef int total = 0
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef int i, j
    for i in range(n-1, n+2):
        for j in range(m-1, m+2):
            if i == n and j == m:
                continue
            total += field[i%N, j%M]
    cdef float dE = 2 * field[n, m] * total
    if dE < 0:
        field[n, m] *= -1 # switch
    elif exp(-dE * beta) > rand()/RAND_MAX:
        field[n, m] *= -1
[39]:
field = random_spin_field(200, 200)

%timeit ising_step(field)

%timeit cy_ising_step(field)

%timeit cy_ising_step_fast(field)

%timeit cy_ising_step_faster(field)
418 ms ± 13.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
386 ms ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
63.6 ms ± 1.61 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
5.54 ms ± 259 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
[40]:
%%cython
import numpy as np # gives access to python functions
cimport numpy as np # gives you access to Numpy C API
cimport cython

from libc.math cimport exp
from libc.stdlib cimport rand
cdef extern from "limits.h":
    int RAND_MAX

@cython.boundscheck(False) # dont check index boundaries
@cython.wraparound(False) # can not use negative index
def cy_ising_step_fastest(long[:, :]field, float beta=0.5):
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef  int n_offset, m_offset, n, m

    for n_offset in range(2):
        for m_offset in range(2):
            for n in range(n_offset, N, 2):
                for m in range(m_offset, M, 2):
                    _cy_ising_update_fastest(field, n, m, beta)
    return field


cdef _cy_ising_update_fastest(long[:,:]field, int n, int m, float beta):
    cdef int total = 0
    cdef int N = field.shape[0]
    cdef int M = field.shape[1]
    cdef int i, j
    for i in range(n-1, n+2):
        for j in range(m-1, m+2):
            if i == n and j == m:
                continue
            total += field[i%N, j%M]
    cdef float dE = 2 * field[n, m] * total
    if dE < 0:
        field[n, m] *= -1 # switch
    elif exp(-dE * beta) > rand()/RAND_MAX:
        field[n, m] *= -1
[41]:
field = random_spin_field(200, 200)

%timeit ising_step(field)

%timeit cy_ising_step(field)

%timeit cy_ising_step_fast(field)

%timeit cy_ising_step_faster(field)

%timeit cy_ising_step_fastest(field)
417 ms ± 22.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
377 ms ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
63.9 ms ± 1.95 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
5.48 ms ± 134 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.47 ms ± 120 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)