线性回归


* 线性回归

# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets,linear_model

diabetes=datasets.load_diabetes()
diabetes_X=diabetes.data[:,np.newaxis,2]
#取出第二列(442,1):442行,1列

#拆分为训练数据和测试数据
diabetes_X_train = diabetes_X[:-20]
diabetes_X_test = diabetes_X[-20:]

#目标数据集合
diabetes_Y_train=diabetes.target[:-20]
diabetes_Y_test=diabetes.target[-20:]

#创建线性回归模型
regr=linear_model.LinearRegression()

#训练模型
regr.fit(diabetes_X_train,diabetes_Y_train)
print('Coefficients: \n',regr.coef_);

#打印均方误差(mean squared error)
print("Mean squared error: %.2f"
% np.mean((regr.predict(diabetes_X_test) - diabetes_Y_test) ** 2))

# 方差
print('Variance score: %.2f' % regr.score(diabetes_X_test, diabetes_Y_test))

#作图
# Plot outputs
plt.scatter(diabetes_X_test, diabetes_Y_test, color='black')
plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',
linewidth=3)

plt.xticks(())
plt.yticks(())
plt.show()

* 岭回归

# -*- coding: utf-8 -*-

from sklearn import linear_model
reg = linear_model.Ridge (alpha = .5)
reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) 

>>> reg.coef_
array([ 0.34545455,  0.34545455])
>>> reg.intercept_ 
0.13636...
  • CV交叉验证

  • Leave-one-cut交叉验证方法(留一法)

# -*- coding: utf-8 -*-
from sklearn import linear_model
reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])       

RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None,
    normalize=False)

>>> reg.alpha_                                      
0.1

* lasso

# -*- coding: utf-8 -*-

>>> from sklearn import linear_model
>>> reg = linear_model.Lasso(alpha = 0.1)
>>> reg.fit([[0, 0], [1, 1]], [0, 1])
Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)
>>> reg.predict([[1, 1]])
array([ 0.8])

results matching ""

    No results matching ""