defnormalize(features): """ Normalizes input features X.
Parameters ---------- features: numpy.ndarray, feature matrix of data points.
Returns ------- features_normalized: numpy.ndarray, normalized feature matrix. features_mean: the average value for each feature. features_deviation: the standard deviation for each feature. """ # Copy original array to prevent it from changes. features_normalized = np.copy(features).astype(float)
# Get average values for each feature (column) in X. features_mean = np.mean(features, 0)
# Calculate the standard deviation for each feature. features_deviation = np.std(features, 0)
# Subtract mean values from each feature (column) of every example (row) # to make all features be spread around zero. if features.shape[0] > 1: features_normalized -= features_mean
# Normalize each feature values so that all features are close to [-1:1] boundaries. # Also prevent division by zero error. features_deviation[features_deviation == 0] = 1 features_normalized /= features_deviation
classLinearRegression: def__init__(self, method="normal", reg_lambda=0, fit_intercept=True): """ A simple linear regression algorithm using two methods: a. normal equation b. gradient descent Optionally, this class can perform ridge regression, if the reg_lambda parameter is set to a number greater than zero.
Parameters ---------- method: str, "normal": normal equation, "gradient": gradient descent. reg_lambda: int, regularization constant for ridge regression. fit_intercept: bool, whether or not to include an intercept term. """ assert(method in ["normal", "gradient"]) self.method = method self.normalize_data = None
deffit(self, X, y, lr=0.01, tol=1e-4, max_iters=500, normalize_data=True): """ Fit the regression coefficients
Parameters ---------- X: numpy.ndarray, matrix of data points. y: numpy.ndarray, the measured data for each point in X. lr: float, the gradient descent learning rate. tol: float, tolerance for stopping criteria. max_iters: int, the maximum number of iterations to run the gradient descent solver. """ if self.method == "normal": self.normal_equation(X, y) else: self.normalize_data = normalize_data if normalize_data: X = normalize(X)[0] self.gradient_descent(X, y, lr, tol, max_iters)
defnormal_equation(self, X, y): """ Fit the regression coefficients via maximum likelihood. """ if self.fit_intercept: X = np.c_[np.ones(X.shape[0]), X] A = self.reg_lambda * np.eye(X.shape[1]) A[0, 0] = 0 else: A = self.reg_lambda * np.eye(X.shape[1])
pseudo_inverse = np.linalg.inv(X.T @ X + A) @ X.T self.theta = pseudo_inverse @ y
defgradient_descent(self, X, y, lr=0.01, tol=1e-4, max_iters=500): """ Fit the regression coefficients via gradient descent. """ if self.fit_intercept: X = np.c_[np.ones(X.shape[0]), X]
# Calculate the number of training examples. N = X.shape[0]
# Initialization of model parameters. self.theta = np.zeros(X.shape[1], )
# Calculate regularization parameter. reg_factor = 1 - lr * self.reg_lambda / N
for k in range(max_iters): # The difference between predictions and actual values for all m examples. delta = X @ self.theta - y
# Create theta shortcut. theta = self.theta
# Gradient descent theta = theta * reg_factor - lr * (X.T @ delta) / N
# We should not regularize the parameter theta_zero in ridge regression. if self.fit_intercept and self.reg_lambda: theta[0] = self.theta[0] - lr * np.sum(delta) / N
# Judge whether the stopping criteria is reached. if np.linalg.norm(theta - self.theta) return
# Update model parameters. self.theta = theta
defpredict(self, X): """ Calculate y_i for each data point in points.
Parameters ---------- X: numpy.ndarray, the data points to calculate with.
Returns ------- y_pred: numpy.ndarray, the model predictions for the points in X. """ if self.normalize_data: X = normalize(X)[0]
if self.fit_intercept: X = np.c_[np.ones(X.shape[0]), X]
return X @ self.theta
4. 波士顿房价预测
4.1 加载数据
from sklearn import datasets data = datasets.load_boston() X = data.data y = data.target print(X.shape) print(y.shape)
(506, 13) (506,)
波士顿房价数据集中一共有 506 个样本点,每个样本包含 13 个特征。
from sklearn.model_selection import train_test_split train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2) print(train_X.shape) print(train_y.shape) print(test_X.shape) print(test_y.shape)