# -*- coding: utf-8 -*-
# LightTwinSVM Program - Simple and Fast
# Version: 0.6.0 - 2019-03-31
# Developer: Mir, A. (mir-am@hotmail.com)
# License: GNU General Public License v3.0
"""
Classes and functios are defined for training and testing TwinSVM classifier.
TwinSVM classifier generates two non-parallel hyperplanes.
For more info, refer to the original papar.
Khemchandani, R., & Chandra, S. (2007). Twin support vector machines for pattern classification. IEEE Transactions on pattern analysis and machine intelligence, 29(5), 905-910.
Motivated by the following paper, the multi-class TSVM is developed.
Tomar, D., & Agarwal, S. (2015). A comparison on multi-class classification methods based on least squares twin support vector machine. Knowledge-Based Systems, 81, 131-147.
"""
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import check_X_y, check_is_fitted, check_array
from sklearn.utils import column_or_1d
import numpy as np
# ClipDCD optimizer is an extension module which is implemented in C++
from ltsvm.optimizer import clipdcd
[docs]class TSVM(BaseEstimator):
"""
Twin Support Vector Machine for binary classification.
Parameters
----------
kernel : str, optional (default='linear')
Type of the kernel function which is either 'linear' or 'RBF'.
rect_kernel : float, optional (default=1.0)
Percentage of training samples for Rectangular kernel.
C1 : float, optional (default=1.0)
Penalty parameter of first optimization problem.
C2 : float, optional (default=1.0)
Penalty parameter of second optimization problem.
gamma : float, optional (default=1.0)
Parameter of the RBF kernel function.
Attributes
----------
mat_C_t : array-like, shape = [n_samples, n_samples]
A matrix that contains kernel values.
cls_name : str
Name of the classifier.
w1 : array-like, shape=[n_features]
Weight vector of class +1's hyperplane.
b1 : float
Bias of class +1's hyperplane.
w2 : array-like, shape=[n_features]
Weight vector of class -1's hyperplane.
b2 : float
Bias of class -1's hyperplane.
"""
def __init__(self, kernel='linear', rect_kernel=1, C1=2**0, C2=2**0, \
gamma=2**0):
self.C1 = C1
self.C2 = C2
self.gamma = gamma
self.kernel = kernel
self.rect_kernel = rect_kernel
self.mat_C_t = None
self.cls_name = 'TSVM'
# Two hyperplanes attributes
self.w1, self.b1, self.w2, self.b2 = None, None, None, None
[docs] def get_params_names(self):
"""
For retrieving the names of hyper-parameters of this classifier.
Returns
-------
parameters : list of str, {['C1', 'C2'], ['C1', 'C2', 'gamma']}
Returns the names of the hyperparameters which are same as
the class' attributes.
"""
return ['C1', 'C2'] if self.kernel == 'linear' else ['C1', 'C2', 'gamma']
[docs] def fit(self, X_train, y_train):
"""
It fits the binary TwinSVM model according to the given training data.
Parameters
----------
X_train : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y_train : array-like, shape(n_samples,)
Target values or class labels.
"""
# Matrix A or class 1 samples
mat_A = X_train[y_train == 1]
# Matrix B or class -1 data
mat_B = X_train[y_train == -1]
# Vectors of ones
mat_e1 = np.ones((mat_A.shape[0], 1))
mat_e2 = np.ones((mat_B.shape[0], 1))
if self.kernel == 'linear': # Linear kernel
mat_H = np.column_stack((mat_A, mat_e1))
mat_G = np.column_stack((mat_B, mat_e2))
elif self.kernel == 'RBF': # Non-linear
# class 1 & class -1
mat_C = np.row_stack((mat_A, mat_B))
self.mat_C_t = np.transpose(mat_C)[:, :int(mat_C.shape[0] * self.rect_kernel)]
mat_H = np.column_stack((rbf_kernel(mat_A, self.mat_C_t, self.gamma), mat_e1))
mat_G = np.column_stack((rbf_kernel(mat_B, self.mat_C_t, self.gamma), mat_e2))
mat_H_t = np.transpose(mat_H)
mat_G_t = np.transpose(mat_G)
# Compute inverses:
# Regulariztion term used for ill-possible condition
reg_term = 2 ** float(-7)
mat_H_H = np.linalg.inv(np.dot(mat_H_t, mat_H) + (reg_term * np.identity(mat_H.shape[1])))
mat_G_G = np.linalg.inv(np.dot(mat_G_t, mat_G) + (reg_term * np.identity(mat_G.shape[1])))
# Wolfe dual problem of class 1
mat_dual1 = np.dot(np.dot(mat_G, mat_H_H), mat_G_t)
# Wolfe dual problem of class -1
mat_dual2 = np.dot(np.dot(mat_H, mat_G_G), mat_H_t)
# Obtaining Lagrange multipliers using ClipDCD optimizer
alpha_d1 = np.array(clipdcd.clippDCD_optimizer(mat_dual1, self.C1)).reshape(mat_dual1.shape[0], 1)
alpha_d2 = np.array(clipdcd.clippDCD_optimizer(mat_dual2, self.C2)).reshape(mat_dual2.shape[0], 1)
# Obtain hyperplanes
hyper_p_1 = -1 * np.dot(np.dot(mat_H_H, mat_G_t), alpha_d1)
# Class 1
self.w1 = hyper_p_1[:hyper_p_1.shape[0] - 1, :]
self.b1 = hyper_p_1[-1, :]
hyper_p_2 = np.dot(np.dot(mat_G_G, mat_H_t), alpha_d2)
# Class -1
self.w2 = hyper_p_2[:hyper_p_2.shape[0] - 1, :]
self.b2 = hyper_p_2[-1, :]
[docs] def predict(self, X_test):
"""
Performs classification on samples in X using the TwinSVM model.
Parameters
----------
X_test : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
output : array, shape (n_samples,)
Predicted class lables of test data.
"""
# Calculate prependicular distances for new data points
prepen_distance = np.zeros((X_test.shape[0], 2))
kernel_f = {'linear': lambda i: X_test[i, :] , 'RBF': lambda i: rbf_kernel(X_test[i, :], \
self.mat_C_t, self.gamma)}
for i in range(X_test.shape[0]):
# Prependicular distance of data pint i from hyperplanes
prepen_distance[i, 1] = np.abs(np.dot(kernel_f[self.kernel](i), self.w1) + self.b1)
prepen_distance[i, 0] = np.abs(np.dot(kernel_f[self.kernel](i), self.w2) + self.b2)
# Assign data points to class +1 or -1 based on distance from hyperplanes
output = 2 * np.argmin(prepen_distance, axis=1) - 1
return output
[docs]def rbf_kernel(x, y, u):
"""
It transforms samples into higher dimension using Gaussian (RBF) kernel.
Parameters
----------
x, y : array-like, shape (n_features,)
A feature vector or sample.
u : float
Parameter of the RBF kernel function.
Returns
-------
float
Value of kernel matrix for feature vector x and y.
"""
return np.exp(-2 * u) * np.exp(2 * u * np.dot(x, y))
[docs]class HyperPlane:
"""
Its object represents a hyperplane
Attributes
----------
w : array-like, shape (n_features,)
Weight vector. If the RBF kernel is used, the shape will be (n_samples,)
b : float
Bias.
"""
def __init__(self):
self.w = None # Coordinates of hyperplane
self.b = None # Bias term
[docs]class MCTSVM(BaseEstimator):
"""
Multi-class Twin Support Vector Machine (One-vs-All Scheme)
Parameters
----------
kernel : str, optional (default='linear')
Type of the kernel function which is either 'linear' or 'RBF'.
C : float, optional (default=1.0)
Penalty parameter.
gamma : float, optional (default=1.0)
Parameter of the RBF kernel function.
Attributes
----------
classifiers : dict
Stores an intance of :class:`HyperPlane` class for each binary classifier.
mat_D_t : list of array-like objects
Stores kernel matrix for each binary classifier.
cls_name : str
Name of the classifier.
"""
def __init__(self, kernel='linear', C=2**0, gamma=2**0):
self.kernel = kernel
self.C = C
self.gamma = gamma
self.classfiers = {} # Classifiers
self.mat_D_t = [] # For non-linear MCTSVM
self.cls_name = 'TSVM_OVA'
[docs] def get_params_names(self):
"""
For retrieving the names of hyper-parameters of this classifier.
Returns
-------
parameters : list of str, {['C'], ['C', 'gamma']}
Returns the names of the hyperparameters which are same as
the class' attributes.
"""
return ['C'] if self.kernel == 'linear' else ['C', 'gamma']
[docs] def fit(self, X_train, y_train):
"""
It fits the OVA-TwinSVM model according to the given training data.
Parameters
----------
X_train : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y_train : array-like, shape(n_samples,)
Target values or class labels.
"""
num_classes = np.unique(y_train)
# Construct K-binary classifiers
for idx, i in enumerate(num_classes):
# Samples of i-th class
mat_X_i = X_train[y_train == i]
# Samples of other classes
mat_Y_i = X_train[y_train != i]
# Vectors of ones
mat_e1_i = np.ones((mat_X_i.shape[0], 1))
mat_e2_i = np.ones((mat_Y_i.shape[0], 1))
if self.kernel == 'linear':
mat_A_i = np.column_stack((mat_X_i, mat_e1_i))
mat_B_i = np.column_stack((mat_Y_i, mat_e2_i))
elif self.kernel == 'RBF':
mat_D = np.row_stack((mat_X_i, mat_Y_i))
self.mat_D_t.append(np.transpose(mat_D))
mat_A_i = np.column_stack((rbf_kernel(mat_X_i, self.mat_D_t[idx], self.gamma), mat_e1_i))
mat_B_i = np.column_stack((rbf_kernel(mat_Y_i, self.mat_D_t[idx], self.gamma), mat_e2_i))
mat_A_i_t = np.transpose(mat_A_i)
mat_B_i_t = np.transpose(mat_B_i)
# Compute inverses:
# Regulariztion term used for ill-possible condition
reg_term = 2 ** float(-7)
mat_A_A = np.linalg.inv(np.dot(mat_A_i_t, mat_A_i) + (reg_term * np.identity(mat_A_i.shape[1])))
# Dual problem of i-th class
mat_dual_i = np.dot(np.dot(mat_B_i, mat_A_A), mat_B_i_t)
# Obtaining Lagrange multipliers using ClippDCD optimizer
alpha_i = np.array(clipdcd.clippDCD_optimizer(mat_dual_i, self.C)).reshape(mat_dual_i.shape[0], 1)
hyperplane_i = np.dot(np.dot(mat_A_A, mat_B_i_t), alpha_i)
hyper_p_inst = HyperPlane()
hyper_p_inst.w = hyperplane_i[:hyperplane_i.shape[0] - 1, :]
hyper_p_inst.b = hyperplane_i[-1, :]
self.classfiers[i] = hyper_p_inst
[docs] def predict(self, X_test):
"""
Performs classification on samples in X using the OVA-TwinSVM model.
Parameters
----------
X_test : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
output : array, shape (n_samples,)
Predicted class lables of test data.
"""
# Perpendicular distance from each hyperplane
prepen_dist = np.zeros((X_test.shape[0], len(self.classfiers.keys())))
kernel_f = {'linear': lambda i, j: X_test[i, :] , 'RBF': lambda i, j: rbf_kernel(X_test[i, :], \
self.mat_D_t[j], self.gamma)}
for i in range(X_test.shape[0]):
for idx, j in enumerate(self.classfiers.keys()):
prepen_dist[i, idx] = np.abs(np.dot(kernel_f[self.kernel](i, idx), \
self.classfiers[j].w) + self.classfiers[j].b) / np.linalg.norm(self.classfiers[j].w)
output = np.argmin(prepen_dist, axis=1) + 1
return output
[docs]class OVO_TSVM(BaseEstimator, ClassifierMixin):
"""
Multi Class Twin Support Vector Machine (One-vs-One Scheme)
The :class:`OVO_TSVM` classifier is scikit-learn compatible, which means
scikit-learn tools such as `cross_val_score <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.cross_val_score.html>`_
and `GridSearchCV <https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html>`_
can be used for an instance of :class:`OVO_TSVM`
Parameters
----------
kernel : str, optional (default='linear')
Type of the kernel function which is either 'linear' or 'RBF'.
C1 : float, optional (default=1.0)
Penalty parameter of first optimization problem for each binary
:class:`TSVM` classifier.
C2 : float, optional (default=1.0)
Penalty parameter of second optimization problem for each binary
:class:`TSVM` classifier.
gamma : float, optional (default=1.0)
Parameter of the RBF kernel function.
Attributes
----------
cls_name : str
Name of the classifier.
bin_TSVM_models_ : list
Stores intances of each binary :class:`TSVM` classifier.
"""
def __init__(self, kernel='linear', C1=1, C2=1, gamma=1):
self.kernel = kernel
self.C1 = C1
self.C2 = C2
self.gamma = gamma
self.cls_name = 'TSVM_OVO'
[docs] def get_params_names(self):
"""
For retrieving the names of hyper-parameters of this classifier.
Returns
-------
parameters : list of str, {['C1', 'C2'], ['C1', 'C2', 'gamma']}
Returns the names of the hyperparameters which are same as
the class' attributes.
"""
return ['C1', 'C2'] if self.kernel == 'linear' else ['C1', 'C2', 'gamma']
def _validate_targets(self, y):
"""
Validates labels for training and testing classifier
"""
y_ = column_or_1d(y, warn=True)
check_classification_targets(y)
self.classes_, y = np.unique(y_, return_inverse=True)
return np.asarray(y, dtype=np.int)
def _validate_for_predict(self, X):
"""
Checks that the classifier is already trained and also test samples are
valid
"""
check_is_fitted(self, ['bin_TSVM_models_'])
X = check_array(X, dtype=np.float64)
n_samples, n_features = X.shape
if n_features != self.shape_fit_[1]:
raise ValueError("X.shape[1] = %d should be equal to %d,"
"the number of features of training samples" %
(n_features, self.shape_fit_[1]))
return X
[docs] def fit(self, X, y):
"""
It fits the OVO-TwinSVM model according to the given training data.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training feature vectors, where n_samples is the number of samples
and n_features is the number of features.
y : array-like, shape(n_samples,)
Target values or class labels.
Returns
-------
self : object
"""
y = self._validate_targets(y)
X, y = check_X_y(X, y, dtype=np.float64)
# Allocate n(n-1)/2 binary TSVM classifiers
self.bin_TSVM_models_ = ((self.classes_.size * (self.classes_.size - 1))
// 2 ) * [None]
p = 0
for i in range(self.classes_.size):
for j in range(i + 1, self.classes_.size):
#print("%d, %d" % (i, j))
# Break multi-class problem into a binary problem
sub_prob_X_i_j = X[(y == i) | (y == j)]
sub_prob_y_i_j = y[(y == i) | (y == j)]
#print(sub_prob_y_i_j)
# For binary classification, labels must be {-1, +1}
# i-th class -> +1 and j-th class -> -1
sub_prob_y_i_j[sub_prob_y_i_j == j] = -1
sub_prob_y_i_j[sub_prob_y_i_j == i] = 1
self.bin_TSVM_models_[p] = TSVM(self.kernel, 1, self.C1, self.C2, \
self.gamma)
self.bin_TSVM_models_[p].fit(sub_prob_X_i_j, sub_prob_y_i_j)
p = p + 1
self.shape_fit_ = X.shape
return self
[docs] def predict(self, X):
"""
Performs classification on samples in X using the OVO-TwinSVM model.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Feature vectors of test data.
Returns
-------
y_pred : array, shape (n_samples,)
Predicted class lables of test data.
"""
X = self._validate_for_predict(X)
# Initialze votes
votes = np.zeros((X.shape[0], self.classes_.size), dtype=np.int)
# iterate over test samples
for k in range(X.shape[0]):
p = 0
for i in range(self.classes_.size):
for j in range(i + 1, self.classes_.size):
y_pred = self.bin_TSVM_models_[p].predict(X[k, :].reshape(1, X.shape[1]))
if y_pred == 1:
votes[k, i] = votes[k, i] + 1
else:
votes[k, j] = votes[k, j] + 1
p = p + 1
# Labels of test samples based max-win strategy
max_votes = np.argmax(votes, axis=1)
return self.classes_.take(np.asarray(max_votes, dtype=np.int))
if __name__ == '__main__':
# from ltsvm.dataproc import read_data
# from sklearn.metrics import accuracy_score
# from sklearn.model_selection import train_test_split
from sklearn.utils.estimator_checks import check_estimator
# from sklearn.model_selection import cross_val_score, GridSearchCV
# import time
#
check_estimator(OVO_TSVM)
# train_data, labels, data_name = read_data('/home/mir/mir-projects/Mir-Repo/mc-data/wine.csv')
#
# X_train, X_test, y_train, y_test = train_test_split(train_data, labels,
# test_size=0.30, random_state=42)
#
#
## param = {'C_1': [float(2**i) for i in range(-5, 6)],
## 'C_2': [float(2**i) for i in range(-5, 6)]}
#
# start_t = time.time()
##
# ovo_tsvm_model = MCTSVM()
# ovo_tsvm_model.set_params(**{'C': 4, 'gamma': 0.1})
# print(ovo_tsvm_model.get_params())
#
# #cv = cross_val_score(ovo_tsvm_model, train_data, labels, cv=10)
#
## result = GridSearchCV(ovo_tsvm_model, param, cv=10, n_jobs=4, refit=False, verbose=1)
## result.fit(train_data, labels)
#
# print(X_train.shape)
##
# ovo_tsvm_model.fit(X_train, y_train)
#
# pred = ovo_tsvm_model.predict(X_test)
##
# print("Finished: %.2f ms" % ((time.time() - start_t) * 1000))
##
# print("Accuracy: %.2f" % (accuracy_score(y_test, pred) * 100))