$$h\left(\theta\right){=}\theta_0+\theta_1{x}$$

$$h\left(\theta\right){=}\theta{0}+\theta{1}{x{1}}+\theta{2}{x_{2}}$$

$$\theta = \begin{pmatrix}\theta_0 \\theta_1 \\theta_2 \\theta_3\end{pmatrix}$$ $$X = \begin{pmatrix}x_0\x_1\x_2\x_3\end{pmatrix}$$

$$h(\theta) = \theta^T * x$$

$$J\left(\theta\right){=}\sum{i=1}^{m} \left({h \theta}({x}^{(i)}){-}{y}^{(i)}\right)^{2}$$

$$cost\left({w0+w_1x_1}\right){=}\sum{i=1}^{N} \left({w_0+w_1x_i}{-}{y_i}\right)^{2}$$

$$cost\left({w_0+w_1x_1}\right)$$的图像其实像一个山谷一样，有一个最低点。找这个最低点的办法就是，先随便找一个点($$w_1$$=5,
$$w_0$$=4), 然后 沿着这个碗下降的方向找，最后就能找到山谷的最低点。

$${w_1}:=-{w_1}-\alpha\frac{\partial{cost\left({w_0+w_1x_1}\right)}}{\partial w1}$$

$${w_0}:=-{w_0}-\alpha\frac{\partial{cost\left({w_0+w_1x_1}\right)}}{\partial w1}$$

LinearRegression

sklearn.linear_model.LinearRegression
class LinearRegression(fit_intercept = True，normalize = False，copy_X =
True，n_jobs = 1) """ :param normalize:如果设置为True时，数据进行标准化。请在使用normalize =
False的估计器调时用fit之前使用preprocessing.StandardScaler :param
copy_X:boolean，可选，默认为True，如果为True，则X将被复制 :param n_jobs：int，可选，默认1。用于计算的CPU核数 """

from sklearn.linear_model import LinearRegression reg = LinearRegression()

fit(X,y,sample_weight = None)

reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
predict(X)

reg.predict([[3,3]]) array([ 3.])

coef_

reg.coef_ array([ 0.5, 0.5])
intercept_表示w0

from sklearn.datasets.samples_generator import make_regression from
sklearn.model_selectionimport cross_val_score from sklearn import linear_model
import matplotlib.pyplot as plt lr = linear_model.LinearRegression() X, y =
make_regression(n_samples=200, n_features=5000, random_state=0) result =
cross_val_score(lr, X, y)print result

https://www.kaggle.com/datasets <https://www.kaggle.com/datasets>

1.美国波士顿地区房价数据描述
boston.DESCR
2.波士顿地区房价数据分割
from sklearn.cross_validation import train_test_split import numpy as np X =
boston.data y = boston.target X_train,X_test,y_train,y_test =
train_test_split(X,y,random_state=33,test_size = 0.25)
3.训练与测试数据标准化处理
from sklearn.preprocessing import StandardScaler ss_X = StandardScaler() ss_y
= StandardScaler() X_train = ss_X.fit_transform(X_train) X_test =
ss_X.transform(X_test) y_train = ss_X.fit_transform(y_train) X_train =
ss_X.transform(y_test)
4.使用最简单的线性回归模型LinearRegression和梯度下降估计SGDRegressor对房价进行预测
from sklearn.linear_model import LinearRegression lr = LinearRegression()
lr.fit(X_train,y_train) lr_y_predict = lr.predict(X_test)from
sklearn.linear_modelimport SGDRegressor sgdr = SGDRegressor()
sgdr.fit(X_train,y_train) sgdr_y_predict = sgdr.predict(X_test)
5.性能评测

Squared Error)MSE，因为这也是线性回归模型所要优化的目标。

MSE的计算方法如式：

$${MSE=}\frac{1}{m}\sum_{i=1}^{m}\left({y^{i}-\bar{y}}\right)^{2}$$

from sklearn.metrics import mean_squared_error print '线性回归模型的均方误差为：'
,mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_tranform(lr_y_predict))
print '梯度下降模型的均方误差为：'
,mean_squared_error(ss_y.inverse_transform(y_test),ss_y.inverse_tranform(sgdr_y_predict))

from sklearn.linear_model import LinearRegression, SGDRegressor, Ridge from
sklearn.preprocessingimport StandardScaler from sklearn.datasets import
sklearn.metricsimport mean_squared_error,classification_report from
sklearn.clusterimport KMeans def linearmodel(): """ 线性回归对波士顿数据集处理 :return: None
""" # 1、加载数据集 ld = load_boston() x_train,x_test,y_train,y_test =
train_test_split(ld.data,ld.target,test_size=0.25) # 2、标准化处理 # 特征值处理 std_x =
StandardScaler() x_train = std_x.fit_transform(x_train) x_test =
std_x.transform(x_test)# 目标值进行处理 std_y = StandardScaler() y_train =
std_y.fit_transform(y_train) y_test = std_y.transform(y_test)# 3、估计器流程 #
LinearRegression lr = LinearRegression() lr.fit(x_train,y_train) #
print(lr.coef_) y_lr_predict = lr.predict(x_test) y_lr_predict =
std_y.inverse_transform(y_lr_predict) print("Lr预测值：",y_lr_predict) #
SGDRegressor sgd = SGDRegressor() sgd.fit(x_train,y_train) # print(sgd.coef_)
y_sgd_predict = sgd.predict(x_test) y_sgd_predict =
std_y.inverse_transform(y_sgd_predict) print("SGD预测值：",y_sgd_predict) #

rd.predict(x_test) y_rd_predict = std_y.inverse_transform(y_rd_predict)
print(rd.coef_)# 两种模型评估结果 print("lr的均方误差为："
,mean_squared_error(std_y.inverse_transform(y_test),y_lr_predict)) print(
"SGD的均方误差为：",mean_squared_error(std_y.inverse_transform(y_test),y_sgd_predict))
print("Ridge的均方误差为："
,mean_squared_error(std_y.inverse_transform(y_test),y_rd_predict))return None