Visualization

hvPlot

  • 以 Bokeh 為基礎的高階 Library

  • 轉成 PDF 會顯示不出來,但因為能互動,適合作研究用

    • 這裡說加下面這個就會好,但實測失敗

  • 把動畫放上 RTD 的例子,不過 code 在 GitLab 上

[ ]:
import holoviews as hv
hv.notebook_extension(display_formats=['html','png'])

Bokeh

This does not render in RTD. Possible solutions are here which this issue mentioned.

[1]:
from bokeh.layouts import column
# from ipywidgets import interact
from bokeh.io import push_notebook, output_notebook, output_file, show
from bokeh.plotting import figure
from math import pi
from bokeh.models import CustomJS, ColumnDataSource, Slider
from sklearn.covariance import EmpiricalCovariance, MinCovDet
from pandas import DataFrame
import numpy as np

# output_file('mcd.html')
# output_notebook(hide_banner=True)
output_notebook()

n_inliers = 250
n_all_outliers = 20

np.random.seed(20)

# generate data with 20 outliers
inliers_cov = [[2, 0], [0, 1]]
outliers_cov = [[1, 4], [4, 1]]
inliers = np.dot(np.random.randn(n_inliers, 2), inliers_cov)
outliers = np.dot(np.random.randn(n_all_outliers, 2), outliers_cov)

df_in = DataFrame(inliers, columns=['x', 'y'])
df_out = DataFrame(outliers, columns=['x', 'y'])
df_out['pt_alpha'] = 0
df_out['em_alpha'] = 0
df_out['mcd_alpha'] = 0
df_out.loc[0, 'em_alpha'] = 1
df_out.loc[0, 'mcd_alpha'] = 1

em_ellipse_param = []
mcd_ellipse_param = []
for n_outliers in range(n_all_outliers):
    cov = EmpiricalCovariance(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
    cov_inv = np.linalg.inv(cov)
    z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
    theta = np.arctan(2*z12/(z11-z22))/2
    rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    wh = rot.transpose() @ cov_inv @ rot
    width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
    em_ellipse_param.append((height, width, theta))

    cov = MinCovDet(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
    cov_inv = np.linalg.inv(cov)
    z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
    theta = np.arctan(2*z12/(z11-z22))/2
    rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
    wh = rot.transpose() @ cov_inv @ rot
    width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
    mcd_ellipse_param.append((height, width, theta))

df_out['em_h'], df_out['em_w'], df_out['em_th'] = np.array(em_ellipse_param).T
df_out['mcd_h'], df_out['mcd_w'], df_out['mcd_th'] = np.array(mcd_ellipse_param).T

src_in = ColumnDataSource(df_in)
src_out = ColumnDataSource(df_out)

TOOLTIPS = [("(x, y)", "(@x, @y)")]
p = figure(tools='box_zoom,reset,save',
#            tooltips=TOOLTIPS,
           plot_height=400,
           plot_width=400,
           x_range=(-10, 10), y_range=(-10, 10)
          )
p.circle(x='x', y='y', fill_color='black', line_color=None, size=6, source=src_in)
p.circle(x='x', y='y', fill_color='red', line_color=None, size=6, source=src_out, fill_alpha='pt_alpha')
p.ellipse(x=0, y=0, width='mcd_w', height='mcd_h', angle='mcd_th', line_alpha='mcd_alpha', line_color='orange', fill_alpha=0, line_width=2.5, source=src_out)
p.ellipse(x=0, y=0, width='em_w', height='em_h', angle='em_th', line_alpha='em_alpha', line_color='green', fill_alpha=0, line_width=2.5, source=src_out)
p.line(x=[-20], y=[0], line_width=2.5, color='orange', legend_label='MCD')
p.line(x=[-20], y=[0], line_width=2.5, color='green', legend_label='MLE')
p.legend.location = "top_right"

callback_code = """
    var data = src_out.data
    var n_outliers = cb_obj.value
    var pt_alpha = data['pt_alpha']
    var em_alpha = data['em_alpha']
    var mcd_alpha = data['mcd_alpha']

    for (var i = 0; i < pt_alpha.length; i++) {
        em_alpha[i] = 0
        mcd_alpha[i] = 0
        if (i < n_outliers)
            pt_alpha[i] = 1
        else
            pt_alpha[i] = 0
    }
    em_alpha[n_outliers] = 1
    mcd_alpha[n_outliers] = 1

    src_out.change.emit()
"""
callback = CustomJS(args=dict(src_out=src_out), code=callback_code)
slider = Slider(start=0, end=n_outliers, value=0, step=1, title='n_outliers')
slider.js_on_change('value', callback)

layout = column(slider, p)

show(layout)
Loading BokehJS ...

ipywidgets

[1]:
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np

def f(m, b):
    plt.figure(2)
    x = np.linspace(-10, 10, num=1000)
    plt.plot(x, m * x + b)
    plt.ylim(-5, 5)
    plt.show()

interactive_plot = interactive(f, m=(-2.0, 2.0), b=(-3, 3, 0.5))
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot

SGT

Theodossiou’s Skewed Generalized \(t\) Distribution. See this notebook for cdf, ppf and more.

[24]:
%matplotlib inline

from pandas import DataFrame
from scipy.special import beta
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np

def pdf(x, mu, k, n, lam, sigma2):
    '''
    k > 0, n > 2, sigma2 > 0, -1 < lam < 1
    '''
    sigma = np.sqrt(sigma2)
    S = np.sqrt(1 + 3*(lam**2) - 4*(lam**2)*((beta(2/k, (n-1)/k)**2)/(beta(1/k, n/k)*beta(3/k, (n-2)/k))))
    c = 0.5*k*np.sqrt((beta(1/k, n/k)**(-3))*beta(3/k, (n-2)/k))*S/sigma
    theta = ((k/(n-2))**(1/k))*np.sqrt(beta(1/k, n/k)/beta(3/k, (n-2)/k))/S
    return c*(1+(k/(n-2))*(np.abs(x-mu)/(sigma*theta*(1 + np.sign(x-mu)*lam)))**k)**(-(n+1)/k)

@interact(mu=(-3, 3, 0.1), k=(0.1, 5, 0.1), n=(2.1, 10, 0.1), sigma2=(0.1, 3, 0.1), lam=(-0.9, 0.9, 0.1))
def f(mu=0, k=0.5, n=2.5, sigma2=0.1, lam=0):
    fig, ax = plt.subplots()
    DataFrame([(x, pdf(x, mu, k, n, lam, sigma2)) for x in np.linspace(-2, 2, 500)], columns=['x', 'pdf']).set_index('x').plot(ax=ax)
    ax.set(xlim=(-2, 2), ylim=(0, 20))
    plt.show()