Source code for twinsvm

# -*- coding: utf-8 -*-

# LightTwinSVM Program - Simple and Fast
# Version: 0.6.0 - 2019-03-31
# Developer: Mir, A. (mir-am@hotmail.com)
# License: GNU General Public License v3.0

"""
Classes and functios are defined for training and testing TwinSVM classifier.

TwinSVM classifier generates two non-parallel hyperplanes.
For more info, refer to the original papar.
Khemchandani, R., & Chandra, S. (2007). Twin support vector machines for pattern classification. IEEE Transactions on pattern analysis and machine intelligence, 29(5), 905-910.

Motivated by the following paper, the multi-class TSVM is developed.
Tomar, D., & Agarwal, S. (2015). A comparison on multi-class classification methods based on least squares twin support vector machine. Knowledge-Based Systems, 81, 131-147.
"""


from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils import column_or_1d
import numpy as np

# ClipDCD optimizer is an extension module which is implemented in C++
from ltsvm.optimizer import clipdcd


[docs]class TSVM(BaseEstimator): """ Twin Support Vector Machine for binary classification. Parameters ---------- kernel : str, optional (default='linear') Type of the kernel function which is either 'linear' or 'RBF'. rect_kernel : float, optional (default=1.0) Percentage of training samples for Rectangular kernel. C1 : float, optional (default=1.0) Penalty parameter of first optimization problem. C2 : float, optional (default=1.0) Penalty parameter of second optimization problem. gamma : float, optional (default=1.0) Parameter of the RBF kernel function. Attributes ---------- mat_C_t : array-like, shape = [n_samples, n_samples] A matrix that contains kernel values. cls_name : str Name of the classifier. w1 : array-like, shape=[n_features] Weight vector of class +1's hyperplane. b1 : float Bias of class +1's hyperplane. w2 : array-like, shape=[n_features] Weight vector of class -1's hyperplane. b2 : float Bias of class -1's hyperplane. """ def __init__(self, kernel='linear', rect_kernel=1, C1=2**0, C2=2**0, \ gamma=2**0): self.C1 = C1 self.C2 = C2 self.gamma = gamma self.kernel = kernel self.rect_kernel = rect_kernel self.mat_C_t = None self.cls_name = 'TSVM' # Two hyperplanes attributes self.w1, self.b1, self.w2, self.b2 = None, None, None, None
[docs] def get_params_names(self): """ For retrieving the names of hyper-parameters of this classifier. Returns ------- parameters : list of str, {['C1', 'C2'], ['C1', 'C2', 'gamma']} Returns the names of the hyperparameters which are same as the class' attributes. """ return ['C1', 'C2'] if self.kernel == 'linear' else ['C1', 'C2', 'gamma']
[docs] def fit(self, X_train, y_train): """ It fits the binary TwinSVM model according to the given training data. Parameters ---------- X_train : array-like, shape (n_samples, n_features) Training feature vectors, where n_samples is the number of samples and n_features is the number of features. y_train : array-like, shape(n_samples,) Target values or class labels. """ # Matrix A or class 1 samples mat_A = X_train[y_train == 1] # Matrix B or class -1 data mat_B = X_train[y_train == -1] # Vectors of ones mat_e1 = np.ones((mat_A.shape[0], 1)) mat_e2 = np.ones((mat_B.shape[0], 1)) if self.kernel == 'linear': # Linear kernel mat_H = np.column_stack((mat_A, mat_e1)) mat_G = np.column_stack((mat_B, mat_e2)) elif self.kernel == 'RBF': # Non-linear # class 1 & class -1 mat_C = np.row_stack((mat_A, mat_B)) self.mat_C_t = np.transpose(mat_C)[:, :int(mat_C.shape[0] * self.rect_kernel)] mat_H = np.column_stack((rbf_kernel(mat_A, self.mat_C_t, self.gamma), mat_e1)) mat_G = np.column_stack((rbf_kernel(mat_B, self.mat_C_t, self.gamma), mat_e2)) mat_H_t = np.transpose(mat_H) mat_G_t = np.transpose(mat_G) # Compute inverses: # Regulariztion term used for ill-possible condition reg_term = 2 ** float(-7) mat_H_H = np.linalg.inv(np.dot(mat_H_t, mat_H) + (reg_term * np.identity(mat_H.shape[1]))) mat_G_G = np.linalg.inv(np.dot(mat_G_t, mat_G) + (reg_term * np.identity(mat_G.shape[1]))) # Wolfe dual problem of class 1 mat_dual1 = np.dot(np.dot(mat_G, mat_H_H), mat_G_t) # Wolfe dual problem of class -1 mat_dual2 = np.dot(np.dot(mat_H, mat_G_G), mat_H_t) # Obtaining Lagrange multipliers using ClipDCD optimizer alpha_d1 = np.array(clipdcd.clippDCD_optimizer(mat_dual1, self.C1)).reshape(mat_dual1.shape[0], 1) alpha_d2 = np.array(clipdcd.clippDCD_optimizer(mat_dual2, self.C2)).reshape(mat_dual2.shape[0], 1) # Obtain hyperplanes hyper_p_1 = -1 * np.dot(np.dot(mat_H_H, mat_G_t), alpha_d1) # Class 1 self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :] self.b1 = hyper_p_1[-1, :] hyper_p_2 = np.dot(np.dot(mat_G_G, mat_H_t), alpha_d2) # Class -1 self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :] self.b2 = hyper_p_2[-1, :]
[docs] def predict(self, X_test): """ Performs classification on samples in X using the TwinSVM model. Parameters ---------- X_test : array-like, shape (n_samples, n_features) Feature vectors of test data. Returns ------- output : array, shape (n_samples,) Predicted class lables of test data. """ # Calculate prependicular distances for new data points prepen_distance = np.zeros((X_test.shape[0], 2)) kernel_f = {'linear': lambda i: X_test[i, :] , 'RBF': lambda i: rbf_kernel(X_test[i, :], \ self.mat_C_t, self.gamma)} for i in range(X_test.shape[0]): # Prependicular distance of data pint i from hyperplanes prepen_distance[i, 1] = np.abs(np.dot(kernel_f[self.kernel](i), self.w1) + self.b1) prepen_distance[i, 0] = np.abs(np.dot(kernel_f[self.kernel](i), self.w2) + self.b2) # Assign data points to class +1 or -1 based on distance from hyperplanes output = 2 * np.argmin(prepen_distance, axis=1) - 1 return output
[docs]def rbf_kernel(x, y, u): """ It transforms samples into higher dimension using Gaussian (RBF) kernel. Parameters ---------- x, y : array-like, shape (n_features,) A feature vector or sample. u : float Parameter of the RBF kernel function. Returns ------- float Value of kernel matrix for feature vector x and y. """ return np.exp(-2 * u) * np.exp(2 * u * np.dot(x, y))
[docs]class HyperPlane: """ Its object represents a hyperplane Attributes ---------- w : array-like, shape (n_features,) Weight vector. If the RBF kernel is used, the shape will be (n_samples,) b : float Bias. """ def __init__(self): self.w = None # Coordinates of hyperplane self.b = None # Bias term
[docs]class MCTSVM(BaseEstimator): """ Multi-class Twin Support Vector Machine (One-vs-All Scheme) Parameters ---------- kernel : str, optional (default='linear') Type of the kernel function which is either 'linear' or 'RBF'. C : float, optional (default=1.0) Penalty parameter. gamma : float, optional (default=1.0) Parameter of the RBF kernel function. Attributes ---------- classifiers : dict Stores an intance of :class:`HyperPlane` class for each binary classifier. mat_D_t : list of array-like objects Stores kernel matrix for each binary classifier. cls_name : str Name of the classifier. """ def __init__(self, kernel='linear', C=2**0, gamma=2**0): self.kernel = kernel self.C = C self.gamma = gamma self.classfiers = {} # Classifiers self.mat_D_t = [] # For non-linear MCTSVM self.cls_name = 'TSVM_OVA'
[docs] def get_params_names(self): """ For retrieving the names of hyper-parameters of this classifier. Returns ------- parameters : list of str, {['C'], ['C', 'gamma']} Returns the names of the hyperparameters which are same as the class' attributes. """ return ['C'] if self.kernel == 'linear' else ['C', 'gamma']
[docs] def fit(self, X_train, y_train): """ It fits the OVA-TwinSVM model according to the given training data. Parameters ---------- X_train : array-like, shape (n_samples, n_features) Training feature vectors, where n_samples is the number of samples and n_features is the number of features. y_train : array-like, shape(n_samples,) Target values or class labels. """ num_classes = np.unique(y_train) # Construct K-binary classifiers for idx, i in enumerate(num_classes): # Samples of i-th class mat_X_i = X_train[y_train == i] # Samples of other classes mat_Y_i = X_train[y_train != i] # Vectors of ones mat_e1_i = np.ones((mat_X_i.shape[0], 1)) mat_e2_i = np.ones((mat_Y_i.shape[0], 1)) if self.kernel == 'linear': mat_A_i = np.column_stack((mat_X_i, mat_e1_i)) mat_B_i = np.column_stack((mat_Y_i, mat_e2_i)) elif self.kernel == 'RBF': mat_D = np.row_stack((mat_X_i, mat_Y_i)) self.mat_D_t.append(np.transpose(mat_D)) mat_A_i = np.column_stack((rbf_kernel(mat_X_i, self.mat_D_t[idx], self.gamma), mat_e1_i)) mat_B_i = np.column_stack((rbf_kernel(mat_Y_i, self.mat_D_t[idx], self.gamma), mat_e2_i)) mat_A_i_t = np.transpose(mat_A_i) mat_B_i_t = np.transpose(mat_B_i) # Compute inverses: # Regulariztion term used for ill-possible condition reg_term = 2 ** float(-7) mat_A_A = np.linalg.inv(np.dot(mat_A_i_t, mat_A_i) + (reg_term * np.identity(mat_A_i.shape[1]))) # Dual problem of i-th class mat_dual_i = np.dot(np.dot(mat_B_i, mat_A_A), mat_B_i_t) # Obtaining Lagrange multipliers using ClippDCD optimizer alpha_i = np.array(clipdcd.clippDCD_optimizer(mat_dual_i, self.C)).reshape(mat_dual_i.shape[0], 1) hyperplane_i = np.dot(np.dot(mat_A_A, mat_B_i_t), alpha_i) hyper_p_inst = HyperPlane() hyper_p_inst.w = hyperplane_i[:hyperplane_i.shape[0] - 1, :] hyper_p_inst.b = hyperplane_i[-1, :] self.classfiers[i] = hyper_p_inst
[docs] def predict(self, X_test): """ Performs classification on samples in X using the OVA-TwinSVM model. Parameters ---------- X_test : array-like, shape (n_samples, n_features) Feature vectors of test data. Returns ------- output : array, shape (n_samples,) Predicted class lables of test data. """ # Perpendicular distance from each hyperplane prepen_dist = np.zeros((X_test.shape[0], len(self.classfiers.keys()))) kernel_f = {'linear': lambda i, j: X_test[i, :] , 'RBF': lambda i, j: rbf_kernel(X_test[i, :], \ self.mat_D_t[j], self.gamma)} for i in range(X_test.shape[0]): for idx, j in enumerate(self.classfiers.keys()): prepen_dist[i, idx] = np.abs(np.dot(kernel_f[self.kernel](i, idx), \ self.classfiers[j].w) + self.classfiers[j].b) / np.linalg.norm(self.classfiers[j].w) output = np.argmin(prepen_dist, axis=1) + 1 return output
[docs]class OVO_TSVM(BaseEstimator, ClassifierMixin): """ Multi Class Twin Support Vector Machine (One-vs-One Scheme) The :class:`OVO_TSVM` classifier is scikit-learn compatible, which means scikit-learn tools such as `cross_val_score <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html>`_ and `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_ can be used for an instance of :class:`OVO_TSVM` Parameters ---------- kernel : str, optional (default='linear') Type of the kernel function which is either 'linear' or 'RBF'. C1 : float, optional (default=1.0) Penalty parameter of first optimization problem for each binary :class:`TSVM` classifier. C2 : float, optional (default=1.0) Penalty parameter of second optimization problem for each binary :class:`TSVM` classifier. gamma : float, optional (default=1.0) Parameter of the RBF kernel function. Attributes ---------- cls_name : str Name of the classifier. bin_TSVM_models_ : list Stores intances of each binary :class:`TSVM` classifier. """ def __init__(self, kernel='linear', C1=1, C2=1, gamma=1): self.kernel = kernel self.C1 = C1 self.C2 = C2 self.gamma = gamma self.cls_name = 'TSVM_OVO'
[docs] def get_params_names(self): """ For retrieving the names of hyper-parameters of this classifier. Returns ------- parameters : list of str, {['C1', 'C2'], ['C1', 'C2', 'gamma']} Returns the names of the hyperparameters which are same as the class' attributes. """ return ['C1', 'C2'] if self.kernel == 'linear' else ['C1', 'C2', 'gamma']
def _validate_targets(self, y): """ Validates labels for training and testing classifier """ y_ = column_or_1d(y, warn=True) check_classification_targets(y) self.classes_, y = np.unique(y_, return_inverse=True) return np.asarray(y, dtype=np.int) def _validate_for_predict(self, X): """ Checks that the classifier is already trained and also test samples are valid """ check_is_fitted(self, ['bin_TSVM_models_']) X = check_array(X, dtype=np.float64) n_samples, n_features = X.shape if n_features != self.shape_fit_[1]: raise ValueError("X.shape[1] = %d should be equal to %d," "the number of features of training samples" % (n_features, self.shape_fit_[1])) return X
[docs] def fit(self, X, y): """ It fits the OVO-TwinSVM model according to the given training data. Parameters ---------- X : array-like, shape (n_samples, n_features) Training feature vectors, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape(n_samples,) Target values or class labels. Returns ------- self : object """ y = self._validate_targets(y) X, y = check_X_y(X, y, dtype=np.float64) # Allocate n(n-1)/2 binary TSVM classifiers self.bin_TSVM_models_ = ((self.classes_.size * (self.classes_.size - 1)) // 2 ) * [None] p = 0 for i in range(self.classes_.size): for j in range(i + 1, self.classes_.size): #print("%d, %d" % (i, j)) # Break multi-class problem into a binary problem sub_prob_X_i_j = X[(y == i) | (y == j)] sub_prob_y_i_j = y[(y == i) | (y == j)] #print(sub_prob_y_i_j) # For binary classification, labels must be {-1, +1} # i-th class -> +1 and j-th class -> -1 sub_prob_y_i_j[sub_prob_y_i_j == j] = -1 sub_prob_y_i_j[sub_prob_y_i_j == i] = 1 self.bin_TSVM_models_[p] = TSVM(self.kernel, 1, self.C1, self.C2, \ self.gamma) self.bin_TSVM_models_[p].fit(sub_prob_X_i_j, sub_prob_y_i_j) p = p + 1 self.shape_fit_ = X.shape return self
[docs] def predict(self, X): """ Performs classification on samples in X using the OVO-TwinSVM model. Parameters ---------- X : array-like, shape (n_samples, n_features) Feature vectors of test data. Returns ------- y_pred : array, shape (n_samples,) Predicted class lables of test data. """ X = self._validate_for_predict(X) # Initialze votes votes = np.zeros((X.shape[0], self.classes_.size), dtype=np.int) # iterate over test samples for k in range(X.shape[0]): p = 0 for i in range(self.classes_.size): for j in range(i + 1, self.classes_.size): y_pred = self.bin_TSVM_models_[p].predict(X[k, :].reshape(1, X.shape[1])) if y_pred == 1: votes[k, i] = votes[k, i] + 1 else: votes[k, j] = votes[k, j] + 1 p = p + 1 # Labels of test samples based max-win strategy max_votes = np.argmax(votes, axis=1) return self.classes_.take(np.asarray(max_votes, dtype=np.int))
if __name__ == '__main__': # from ltsvm.dataproc import read_data # from sklearn.metrics import accuracy_score # from sklearn.model_selection import train_test_split from sklearn.utils.estimator_checks import check_estimator # from sklearn.model_selection import cross_val_score, GridSearchCV # import time # check_estimator(OVO_TSVM) # train_data, labels, data_name = read_data('/home/mir/mir-projects/Mir-Repo/mc-data/wine.csv') # # X_train, X_test, y_train, y_test = train_test_split(train_data, labels, # test_size=0.30, random_state=42) # # ## param = {'C_1': [float(2**i) for i in range(-5, 6)], ## 'C_2': [float(2**i) for i in range(-5, 6)]} # # start_t = time.time() ## # ovo_tsvm_model = MCTSVM() # ovo_tsvm_model.set_params(**{'C': 4, 'gamma': 0.1}) # print(ovo_tsvm_model.get_params()) # # #cv = cross_val_score(ovo_tsvm_model, train_data, labels, cv=10) # ## result = GridSearchCV(ovo_tsvm_model, param, cv=10, n_jobs=4, refit=False, verbose=1) ## result.fit(train_data, labels) # # print(X_train.shape) ## # ovo_tsvm_model.fit(X_train, y_train) # # pred = ovo_tsvm_model.predict(X_test) ## # print("Finished: %.2f ms" % ((time.time() - start_t) * 1000)) ## # print("Accuracy: %.2f" % (accuracy_score(y_test, pred) * 100))