Visualization
hvPlot
以 Bokeh 為基礎的高階 Library
轉成 PDF 會顯示不出來,但因為能互動,適合作研究用
這裡說加下面這個就會好,但實測失敗
把動畫放上 RTD 的例子,不過 code 在 GitLab 上
[ ]:
import holoviews as hv
hv.notebook_extension(display_formats=['html','png'])
Bokeh
This does not render in RTD. Possible solutions are here which this issue mentioned.
[1]:
from bokeh.layouts import column
# from ipywidgets import interact
from bokeh.io import push_notebook, output_notebook, output_file, show
from bokeh.plotting import figure
from math import pi
from bokeh.models import CustomJS, ColumnDataSource, Slider
from sklearn.covariance import EmpiricalCovariance, MinCovDet
from pandas import DataFrame
import numpy as np
# output_file('mcd.html')
# output_notebook(hide_banner=True)
output_notebook()
n_inliers = 250
n_all_outliers = 20
np.random.seed(20)
# generate data with 20 outliers
inliers_cov = [[2, 0], [0, 1]]
outliers_cov = [[1, 4], [4, 1]]
inliers = np.dot(np.random.randn(n_inliers, 2), inliers_cov)
outliers = np.dot(np.random.randn(n_all_outliers, 2), outliers_cov)
df_in = DataFrame(inliers, columns=['x', 'y'])
df_out = DataFrame(outliers, columns=['x', 'y'])
df_out['pt_alpha'] = 0
df_out['em_alpha'] = 0
df_out['mcd_alpha'] = 0
df_out.loc[0, 'em_alpha'] = 1
df_out.loc[0, 'mcd_alpha'] = 1
em_ellipse_param = []
mcd_ellipse_param = []
for n_outliers in range(n_all_outliers):
cov = EmpiricalCovariance(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
cov_inv = np.linalg.inv(cov)
z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
theta = np.arctan(2*z12/(z11-z22))/2
rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
wh = rot.transpose() @ cov_inv @ rot
width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
em_ellipse_param.append((height, width, theta))
cov = MinCovDet(assume_centered=True).fit(np.append(inliers, outliers[:n_outliers, :], axis=0)).covariance_
cov_inv = np.linalg.inv(cov)
z11, z22, z12 = cov_inv[0, 0], cov_inv[1, 1], cov_inv[0, 1]
theta = np.arctan(2*z12/(z11-z22))/2
rot = np.array([[np.cos(theta), -np.sin(theta)], [np.sin(theta), np.cos(theta)]])
wh = rot.transpose() @ cov_inv @ rot
width, height = 6/np.sqrt([wh[0, 0], wh[1, 1]])
mcd_ellipse_param.append((height, width, theta))
df_out['em_h'], df_out['em_w'], df_out['em_th'] = np.array(em_ellipse_param).T
df_out['mcd_h'], df_out['mcd_w'], df_out['mcd_th'] = np.array(mcd_ellipse_param).T
src_in = ColumnDataSource(df_in)
src_out = ColumnDataSource(df_out)
TOOLTIPS = [("(x, y)", "(@x, @y)")]
p = figure(tools='box_zoom,reset,save',
# tooltips=TOOLTIPS,
plot_height=400,
plot_width=400,
x_range=(-10, 10), y_range=(-10, 10)
)
p.circle(x='x', y='y', fill_color='black', line_color=None, size=6, source=src_in)
p.circle(x='x', y='y', fill_color='red', line_color=None, size=6, source=src_out, fill_alpha='pt_alpha')
p.ellipse(x=0, y=0, width='mcd_w', height='mcd_h', angle='mcd_th', line_alpha='mcd_alpha', line_color='orange', fill_alpha=0, line_width=2.5, source=src_out)
p.ellipse(x=0, y=0, width='em_w', height='em_h', angle='em_th', line_alpha='em_alpha', line_color='green', fill_alpha=0, line_width=2.5, source=src_out)
p.line(x=[-20], y=[0], line_width=2.5, color='orange', legend_label='MCD')
p.line(x=[-20], y=[0], line_width=2.5, color='green', legend_label='MLE')
p.legend.location = "top_right"
callback_code = """
var data = src_out.data
var n_outliers = cb_obj.value
var pt_alpha = data['pt_alpha']
var em_alpha = data['em_alpha']
var mcd_alpha = data['mcd_alpha']
for (var i = 0; i < pt_alpha.length; i++) {
em_alpha[i] = 0
mcd_alpha[i] = 0
if (i < n_outliers)
pt_alpha[i] = 1
else
pt_alpha[i] = 0
}
em_alpha[n_outliers] = 1
mcd_alpha[n_outliers] = 1
src_out.change.emit()
"""
callback = CustomJS(args=dict(src_out=src_out), code=callback_code)
slider = Slider(start=0, end=n_outliers, value=0, step=1, title='n_outliers')
slider.js_on_change('value', callback)
layout = column(slider, p)
show(layout)
ipywidgets
[1]:
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt
import numpy as np
def f(m, b):
plt.figure(2)
x = np.linspace(-10, 10, num=1000)
plt.plot(x, m * x + b)
plt.ylim(-5, 5)
plt.show()
interactive_plot = interactive(f, m=(-2.0, 2.0), b=(-3, 3, 0.5))
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot
SGT
Theodossiou’s Skewed Generalized \(t\) Distribution. See this notebook for cdf, ppf and more.
[24]:
%matplotlib inline
from pandas import DataFrame
from scipy.special import beta
from ipywidgets import interact
import matplotlib.pyplot as plt
import numpy as np
def pdf(x, mu, k, n, lam, sigma2):
'''
k > 0, n > 2, sigma2 > 0, -1 < lam < 1
'''
sigma = np.sqrt(sigma2)
S = np.sqrt(1 + 3*(lam**2) - 4*(lam**2)*((beta(2/k, (n-1)/k)**2)/(beta(1/k, n/k)*beta(3/k, (n-2)/k))))
c = 0.5*k*np.sqrt((beta(1/k, n/k)**(-3))*beta(3/k, (n-2)/k))*S/sigma
theta = ((k/(n-2))**(1/k))*np.sqrt(beta(1/k, n/k)/beta(3/k, (n-2)/k))/S
return c*(1+(k/(n-2))*(np.abs(x-mu)/(sigma*theta*(1 + np.sign(x-mu)*lam)))**k)**(-(n+1)/k)
@interact(mu=(-3, 3, 0.1), k=(0.1, 5, 0.1), n=(2.1, 10, 0.1), sigma2=(0.1, 3, 0.1), lam=(-0.9, 0.9, 0.1))
def f(mu=0, k=0.5, n=2.5, sigma2=0.1, lam=0):
fig, ax = plt.subplots()
DataFrame([(x, pdf(x, mu, k, n, lam, sigma2)) for x in np.linspace(-2, 2, 500)], columns=['x', 'pdf']).set_index('x').plot(ax=ax)
ax.set(xlim=(-2, 2), ylim=(0, 20))
plt.show()