线性回归
* 线性回归
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model
diabetes=datasets.load_diabetes()
diabetes_X=diabetes.data[:,np.newaxis,2]
#取出第二列(442,1):442行,1列
#拆分为训练数据和测试数据
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]
#目标数据集合
diabetes_Y_train=diabetes.target[:-20]
diabetes_Y_test=diabetes.target[-20:]
#创建线性回归模型
regr=linear_model.LinearRegression()
#训练模型
regr.fit(diabetes_X_train,diabetes_Y_train)
print('Coefficients: \n',regr.coef_);
#打印均方误差(mean squared error)
print("Mean squared error: %.2f"
% np.mean((regr.predict(diabetes_X_test) - diabetes_Y_test) ** 2))
# 方差
print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_Y_test))
#作图
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_Y_test, color='black')
plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',
linewidth=3)
plt.xticks(())
plt.yticks(())
plt.show()
* 岭回归
# -*- coding: utf-8 -*-
from sklearn import linear_model
reg = linear_model.Ridge (alpha = .5)
reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
>>> reg.coef_
array([ 0.34545455, 0.34545455])
>>> reg.intercept_
0.13636...
CV交叉验证
Leave-one-cut交叉验证方法(留一法)
# -*- coding: utf-8 -*-
from sklearn import linear_model
reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])
RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None,
normalize=False)
>>> reg.alpha_
0.1
* lasso
# -*- coding: utf-8 -*-
>>> from sklearn import linear_model
>>> reg = linear_model.Lasso(alpha = 0.1)
>>> reg.fit([[0, 0], [1, 1]], [0, 1])
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
normalize=False, positive=False, precompute=False, random_state=None,
selection='cyclic', tol=0.0001, warm_start=False)
>>> reg.predict([[1, 1]])
array([ 0.8])