{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Python Advanced" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "def table(table_name): \n", " return pd.read_csv(f'./tables/{table_name}.csv').fillna('')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Slicing Time Complexity\n", "\n", "* NumPy slicing is view, while native python list and str slicing is copy\n", " * 跑 `arr[j:i]` 時 NumPy 不會建立新的資料,而是建立一個指向原始陣列的 view\n", " * 這個視圖只改變了 shape 和 strides(步幅),不會複製底層資料:`O(1)`\n", " * 若你使用 advanced indexing 如 `arr[[1, 3, 5]]`,就會建立新的陣列:`O(k)`\n", "\n", "| 類型 | 切片結果 | Time | 備註 |\n", "|-----------|---------------|------------|--------------------------|\n", "| `str` / `list` | copy | `O(k)` | 複製 `k` 個元素 |\n", "| `numpy.array` | view | `O(1)` | 不複製資料,只改 metadata |" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multithreading and Multiprocessing" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* See [this SO post](https://stackoverflow.com/questions/3044580/multiprocessing-vs-threading-python). Threads run in the same memory space, while processes have separate memory. 一個 process 有自己的獨立的記憶體,甚至 IO\n", "* 在 windows,multiprocessing 會很慢,因為每個 process 都重新 new 一個 python interpreter session,在 Unix-like systems 不用。看[這個 SO post](https://stackoverflow.com/questions/28744046/multiprocessing-python-not-running-in-parallel)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Multithreading | \n", "Multiprocessing | \n", "
|---|---|---|
| 0 | \n", "light weight | \n", "heavy, more memory overhead | \n", "
| 1 | \n", "share memory | \n", "isolated | \n", "
| 2 | \n", "easy to communicate | \n", "hard | \n", "
| 3 | \n", "safety concern (race condition, deadlocks) | \n", "safe | \n", "
| 4 | \n", "good for I/O bound tasks | \n", "good for CPU bound tasks | \n", "
| \n", " | Thread 1 | \n", "Thread 2 | \n", "\n", " | Integer value | \n", "
|---|---|---|---|---|
| 0 | \n", "\n", " | \n", " | \n", " | 0 | \n", "
| 1 | \n", "read value | \n", "\n", " | ← | \n", "0 | \n", "
| 2 | \n", "\n", " | read value | \n", "← | \n", "0 | \n", "
| 3 | \n", "increase value | \n", "\n", " | \n", " | 0 | \n", "
| 4 | \n", "\n", " | increase value | \n", "\n", " | 0 | \n", "
| 5 | \n", "write back | \n", "\n", " | → | \n", "1 | \n", "
| 6 | \n", "\n", " | write back | \n", "→ | \n", "1 | \n", "
| \n", " | 符號 | \n", "意義 | \n", "等價於 | \n", "範例 | \n", "
|---|---|---|---|---|
| 0 | \n", "\\d | \n", "數字(digit) | \n", "[0–9] | \n", "re.findall(r\"\\d\", \"A1B2\") → ['1', '2'] | \n", "
| 1 | \n", "\\s | \n", "空白(whitespace) | \n", "[ \\t\\n\\r\\f\\v] | \n", "re.findall(r\"\\s\", \"a b\\tc\\n\") → [' ', '\\t', '\\n'] | \n", "
| 2 | \n", "\\w | \n", "英數底線(word char) | \n", "[a-zA-Z0-9_] | \n", "re.findall(r\"\\w\", \"_Hi123\") → ['_', 'H', 'i', ... | \n", "
| 3 | \n", "\n", " | \n", " | \n", " | \n", " |
| 4 | \n", "\\D | \n", "非數字 | \n", "[^0–9] | \n", "re.findall(r\"\\D\", \"A1!\") → ['A', '!'] | \n", "
| 5 | \n", "\\S | \n", "非空白 | \n", "[^ \\t\\n\\r\\f\\v] | \n", "re.findall(r\"\\S\", \"a b\") → ['a', 'b'] | \n", "
| 6 | \n", "\\W | \n", "非英數底線 | \n", "[^a-zA-Z0-9_] | \n", "re.findall(r\"\\W\", \"!@#^\") → ['!', '@', '#', '^'] | \n", "
Generated by Cython 0.29.24
\n", "\n",
" Yellow lines hint at Python interaction.
\n",
" Click on a line that starts with a \"+\" to see the C code that Cython generated for it.\n",
"
+1: cpdef double cyfac_loop(int n):\n", "
static PyObject *__pyx_pw_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_1cyfac_loop(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/\n",
"static double __pyx_f_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_cyfac_loop(int __pyx_v_n, CYTHON_UNUSED int __pyx_skip_dispatch) {\n",
" double __pyx_v_r;\n",
" int __pyx_v_i;\n",
" double __pyx_r;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"cyfac_loop\", 0);\n",
"/* … */\n",
" /* function exit code */\n",
" __pyx_L0:;\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"/* Python wrapper */\n",
"static PyObject *__pyx_pw_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_1cyfac_loop(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/\n",
"static PyObject *__pyx_pw_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_1cyfac_loop(PyObject *__pyx_self, PyObject *__pyx_arg_n) {\n",
" int __pyx_v_n;\n",
" PyObject *__pyx_r = 0;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"cyfac_loop (wrapper)\", 0);\n",
" assert(__pyx_arg_n); {\n",
" __pyx_v_n = __Pyx_PyInt_As_int(__pyx_arg_n); if (unlikely((__pyx_v_n == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 1, __pyx_L3_error)\n",
" }\n",
" goto __pyx_L4_argument_unpacking_done;\n",
" __pyx_L3_error:;\n",
" __Pyx_AddTraceback(\"_cython_magic_f1b0bfaa9dd99dd25796948e61b32169.cyfac_loop\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __Pyx_RefNannyFinishContext();\n",
" return NULL;\n",
" __pyx_L4_argument_unpacking_done:;\n",
" __pyx_r = __pyx_pf_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_cyfac_loop(__pyx_self, ((int)__pyx_v_n));\n",
" int __pyx_lineno = 0;\n",
" const char *__pyx_filename = NULL;\n",
" int __pyx_clineno = 0;\n",
"\n",
" /* function exit code */\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"\n",
"static PyObject *__pyx_pf_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_cyfac_loop(CYTHON_UNUSED PyObject *__pyx_self, int __pyx_v_n) {\n",
" PyObject *__pyx_r = NULL;\n",
" __Pyx_RefNannyDeclarations\n",
" __Pyx_RefNannySetupContext(\"cyfac_loop\", 0);\n",
" __Pyx_XDECREF(__pyx_r);\n",
" __pyx_t_1 = PyFloat_FromDouble(__pyx_f_46_cython_magic_f1b0bfaa9dd99dd25796948e61b32169_cyfac_loop(__pyx_v_n, 0)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)\n",
" __Pyx_GOTREF(__pyx_t_1);\n",
" __pyx_r = __pyx_t_1;\n",
" __pyx_t_1 = 0;\n",
" goto __pyx_L0;\n",
"\n",
" /* function exit code */\n",
" __pyx_L1_error:;\n",
" __Pyx_XDECREF(__pyx_t_1);\n",
" __Pyx_AddTraceback(\"_cython_magic_f1b0bfaa9dd99dd25796948e61b32169.cyfac_loop\", __pyx_clineno, __pyx_lineno, __pyx_filename);\n",
" __pyx_r = NULL;\n",
" __pyx_L0:;\n",
" __Pyx_XGIVEREF(__pyx_r);\n",
" __Pyx_RefNannyFinishContext();\n",
" return __pyx_r;\n",
"}\n",
"+2: cdef double r = 1.0\n", "
__pyx_v_r = 1.0;\n",
"3: cdef int i\n", "
+4: for i in range(1, n+1):\n", "
__pyx_t_1 = (__pyx_v_n + 1);\n",
" __pyx_t_2 = __pyx_t_1;\n",
" for (__pyx_t_3 = 1; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {\n",
" __pyx_v_i = __pyx_t_3;\n",
"+5: r *= <double>i\n", "
__pyx_v_r = (__pyx_v_r * ((double)__pyx_v_i));\n",
" }\n",
"+6: return r\n", "
__pyx_r = __pyx_v_r;\n",
" goto __pyx_L0;\n",
"