import numpy as np # for basic operations over arrays from scipy.spatial import distance # to compute the Gaussian kernel import cvxopt # to solve the dual opt. problem import copy # to copy numpy arrays
可以只使用(P,q)或(P,q,G,h)或(P,q,G,h, A, b)等等来调用它(任何未给出的都将由默认值设置,例如1)。
对于(P, q, G, h, A, b)的值,我们的例子可以做以下比较:
为了便于比较,将第一个重写如下:
现在很明显(0≤α等价于-α≤0):
我们就可以写出如下的fit函数:
@SVMClass deffit(self, X, y, eval_train=False): # if more than two unique labels, call the multiclass version if len(np.unique(y)) > 2: self.multiclass = True return
self.multi_fit(X, y, eval_train)
# if labels given in {0,1} change it to {-1,1} if set(np.unique(y)) == {0, 1}: y[y == 0] = -1
# ensure y is a Nx1 column vector (needed by CVXOPT) self.y = y.reshape(-1, 1).astype(np.double) # Has to be a column vector self.X = X N = X.shape[0] # Number of points
# compute the kernel over all possible pairs of (x, x') in the data # by Numpy's vectorization this yields the matrix K self.K = self.kernel(X, X, self.k)
### Set up optimization parameters # For 1/2 x^T P x + q^T x P = cvxopt.matrix(self.y @ self.y.T * self.K) q = cvxopt.matrix(-np.ones((N, 1)))
# For Ax = b A = cvxopt.matrix(self.y.T) b = cvxopt.matrix(np.zeros(1))
# For Gx <= h G = cvxopt.matrix(np.vstack((-np.identity(N), np.identity(N)))) h = cvxopt.matrix(np.vstack((np.zeros((N,1)), np.ones((N,1)) * self.C)))
# Solve cvxopt.solvers.options['show_progress'] = False sol = cvxopt.solvers.qp(P, q, G, h, A, b) self.αs = np.array(sol["x"]) # our solution
# a Boolean array that flags points which are support vectors self.is_sv = ((self.αs-1e-3 > 0)&(self.αs <= self.C)).squeeze() # an index of some margin support vector self.margin_sv = np.argmax((0 -1e-3)&(self.αs -1e-3))
if eval_train: print(f"Finished training with accuracy{self.evaluate(X, y)}")
@SVMClass def multi_fit(self, X, y, eval_train=False): self.k = len(np.unique(y)) # number of classes # for each pair of classes for i in range(self.k): # get the data for the pair Xs, Ys = X, copy.copy(y) # change the labels to -1 and 1 Ys[Ys!=i], Ys[Ys==i] = -1, +1 # fit the classifier clf = SVM(kernel=self.kernel_str, C=self.C, k=self.k) clf.fit(Xs, Ys) # save the classifier self.clfs.append(clf) if eval_train: print(f"Finished training with accuracy {self.evaluate(X, y)}")
然后,为了对新示例执行预测,我们选择相应分类器最自信(得分最高)的类。
@SVMClass def multi_predict(self, X): # get the predictions from all classifiers N = X.shape[0] preds = np.zeros((N, self.k)) for i, clf in enumerate(self.clfs): _, preds[:, i] = clf.predict(X)
# get the argmax and the corresponding score return np.argmax(preds, axis=1), np.max(preds, axis=1)
完整测试代码:
from sklearn.datasets import make_classification import numpy as np
# Load the dataset 更多优质内容,请关注@公众号:数据STUDIO np.random.seed(1) X, y = make_classification(n_samples=500, n_features=2, n_redundant=0, n_informative=2, n_classes=4, n_clusters_per_class=1, class_sep=0.3)
# Test SVM svm = SVM(kernel='rbf', k=4) svm.fit(X, y, eval_train=True)