代码拉取完成,页面将自动刷新
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import make_pipeline
sns.set()
plt.rc('font', family='SimHei')
plt.rc('axes', unicode_minus=False)
通过使用基函数对数据进行预处理,可以将线性的模型转变为非线性的模型,常用的基函数有多项式基函数与高斯基函数;
使用sklearn提供的PolynomialFeatures可以很方便的进行多项式的拟合; 例如,这里使用一个7次的多项式模型来拟合一个带有噪声的正弦波;
x_fit = (np.random.rand(100) * 2 * np.pi)[:, np.newaxis]
y_fit = np.sin(x_fit) + 0.2 * np.random.rand(100)[:, np.newaxis]
plt.figure(figsize=(10, 10))
plt.plot(x_fit, y_fit, 'o')
poly = PolynomialFeatures(degree=7)
model = LinearRegression(fit_intercept=True)
model.fit(poly.fit_transform(x_fit), y_fit)
print(f'斜率拟合结果:\n{model.coef_}\n')
print(f'截距拟合结果:\n{model.intercept_}\n')
res = model.predict(poly.fit_transform(np.linspace(-1, 7, 100)[:, np.newaxis]))
plt.plot(np.linspace(-1, 7, 100), res)
plt.title('使用多项式基函数拟合正弦波(线性回归)')
斜率拟合结果: [[ 0.00000000e+00 7.62873777e-01 4.94286344e-01 -5.73239384e-01 1.56268610e-01 -1.90622234e-02 1.26236424e-03 -4.29223064e-05]] 截距拟合结果: [0.11910499]
Text(0.5, 1.0, '使用多项式基函数拟合正弦波(线性回归)')
class GaussianFeatures(BaseEstimator, TransformerMixin):
def __init__(self, N:int, width_factor:float = 2.0):
self.N = N
self.width_factor=width_factor
self.centers_ = None
self.width_ = None
@staticmethod
def _gauss_basis(x, y, width, axis=None):
arg = (x - y) / width
return np.exp(-0.5 * np.sum(arg ** 2, axis))
def fit(self, x, y=None):
self.centers_ = np.linspace(x.min(), x.max(), self.N)
self.width_ = self.width_factor * (self.centers_[1] - self.centers_[0])
return self
def transform(self, x):
return self._gauss_basis(x[:, :, np.newaxis], self.centers_, self.width_, axis=1)
model = make_pipeline(GaussianFeatures(20), LinearRegression())
x_fit = np.random.rand(100) * 2 * np.pi
y_fit = np.sin(x_fit) + 0.2 * np.random.rand(100)
x_fit = x_fit[:, np.newaxis]; y_fit = y_fit[:, np.newaxis]
x_test = np.linspace(0, 2 * np.pi, 200)[:, np.newaxis]
model.fit(x_fit, y_fit)
res = model.predict(x_test)
plt.figure(figsize=(10, 10))
plt.plot(x_fit, y_fit, 'o')
plt.plot(x_test, res)
plt.title('使用高斯基函数拟合正弦波(线性回归)')
Text(0.5, 1.0, '使用高斯基函数拟合正弦波(线性回归)')
在线性回归中,如果使用过于复杂的模型会造成过拟合,所以我们需要对较大的模型参数进行抑制,即正则化;
岭回归的本质就是在线性回归原有的损失函数上加上下面一项: P=αN∑n=1θ2n
参数α由用户自行设置,用于控制对损失函数的惩罚力度; 书上对岭回归解释的不详细,入门阶段先拿别人写的博客看一下; sklearn中的岭回归由Ridge类实现;
x_fit = np.random.rand(60) * 2 * np.pi - np.pi
y_fit = np.sin(x_fit) + 0.3 * np.random.rand(60)
x_test = np.linspace(-np.pi, np.pi, 200)
x_fit, y_fit, x_test = x_fit[:, np.newaxis], y_fit[:, np.newaxis], x_test[:, np.newaxis]
model1 = make_pipeline(GaussianFeatures(30), LinearRegression())
model2 = make_pipeline(GaussianFeatures(30), Ridge(alpha=0.2))
model1.fit(x_fit, y_fit); model2.fit(x_fit, y_fit)
res1, res2 = model1.predict(x_test), model2.predict(x_test)
fig, axs = plt.subplots(4, 1, figsize=(15, 20))
ax_linear = axs[0] # type: plt.Axes
ax_coef_linear = axs[1] # type: plt.Axes
ax_ridge = axs[2] # type: plt.Axes
ax_coef_ridge = axs[3] # type: plt.Axes
ax_linear.plot(x_fit, y_fit, 'o')
ax_linear.plot(x_test, res1)
ax_linear.set_xlim(-3.5, 3.5)
ax_linear.set_ylim(-1.5, 1.5)
ax_linear.set_title('简单线性回归结果')
ax_coef_linear.plot(model1.steps[0][1].centers_[:, np.newaxis], model1.steps[1][1].coef_.reshape(30, 1))
ax_coef_linear.set_title('简单线性回归振幅')
ax_ridge.plot(x_fit, y_fit, 'o')
ax_ridge.plot(x_test, res2)
ax_ridge.set_xlim(-3.5, 3.5)
ax_ridge.set_ylim(-1.5, 1.5)
ax_ridge.set_title('岭回归结果')
ax_coef_ridge.plot(model2.steps[0][1].centers_[:, np.newaxis], model2.steps[1][1].coef_.reshape(30, 1))
ax_coef_ridge.set_title('岭回归振幅')
Text(0.5, 1.0, '岭回归振幅')
x_fit = np.random.rand(60) * 2 * np.pi - np.pi
y_fit = np.sin(x_fit) + 0.3 * np.random.rand(60)
x_test = np.linspace(-np.pi, np.pi, 200)
x_fit, y_fit, x_test = x_fit[:, np.newaxis], y_fit[:, np.newaxis], x_test[:, np.newaxis]
model1 = make_pipeline(GaussianFeatures(30), Ridge(0.2))
model2 = make_pipeline(GaussianFeatures(30), Lasso(0.002))
model1.fit(x_fit, y_fit); model2.fit(x_fit, y_fit)
res1, res2 = model1.predict(x_test), model2.predict(x_test)
fig, axs = plt.subplots(4, 1, figsize=(15, 20))
ax_linear = axs[0] # type: plt.Axes
ax_coef_linear = axs[1] # type: plt.Axes
ax_ridge = axs[2] # type: plt.Axes
ax_coef_ridge = axs[3] # type: plt.Axes
ax_linear.plot(x_fit, y_fit, 'o')
ax_linear.plot(x_test, res1)
ax_linear.set_xlim(-3.5, 3.5)
ax_linear.set_ylim(-1.5, 1.5)
ax_linear.set_title('岭回归结果')
ax_coef_linear.plot(model1.steps[0][1].centers_[:, np.newaxis], model1.steps[1][1].coef_.reshape(30, 1))
ax_coef_linear.set_title('岭回归振幅')
ax_ridge.plot(x_fit, y_fit, 'o')
ax_ridge.plot(x_test, res2)
ax_ridge.set_xlim(-3.5, 3.5)
ax_ridge.set_ylim(-1.5, 1.5)
ax_ridge.set_title('Lasso回归结果')
ax_coef_ridge.plot(model2.steps[0][1].centers_[:, np.newaxis], model2.steps[1][1].coef_.reshape(30, 1))
ax_coef_ridge.set_title('Lasso回归振幅')
Text(0.5, 1.0, 'Lasso回归振幅')
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。