This website works better with desktop in both themes, for mobile devices please change to light theme.

Kernel Methods#

WORK IN PROGRESS

TOC:#

References#

[1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics


plt.style.use("fivethirtyeight")

%matplotlib inline
[2]:
from sklearn.datasets import make_circles, make_swiss_roll

X, y = make_circles(n_samples=500, noise=0.05, shuffle=True, random_state=11, factor=0.4)
y = np.where(y == 0, 1, -1) # to change classes from 0, 1 to -1, 1

fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot(1,1,1)
sns.scatterplot(x=X[:,0], y=X[:,1], hue=y, style=y, ax=ax, palette='deep')
plt.show()
../_images/SupportVectorMachine_Kernels_4_0.png

Kernels#

Kernel Name

Equation

Linear

\begin{align} K(x, z) &= x^T z \end{align}

Radial Basis Function (RBF) / Gaussian Kernel

\begin{align} K(x, z) &= e^{\big( - \frac{|| x - z ||^2}{2 \sigma^2}\big)} \text{ or } \gamma = \frac{1}{2 \sigma^2} \\ \\ K(x, z) &= e^{\big( - {\gamma || x - z ||^2}\big)} \end{align}

Laplace

\begin{align} K(x, z) &= e^{\big( - {\gamma | x - z |}\big)} \end{align}

Sigmoid

\begin{align} K(x, z) &= \tanh{(a x^T z + c)} \end{align}

Polynomial

\begin{align} K(x, z) &= (1 + x^T z)^d \end{align}

Implementation#

[3]:
# np.random.seed(0)

class Kernel:
    def __init__(self, name):
        self.name = name

    def _rbf(self, X, gamma=2):
        return np.exp(-gamma * np.linalg.norm(X, axis=1, ord=2))

    def _laplace(self, X, gamma=2):
        return np.exp(-gamma * np.linalg.norm(X, axis=1, ord=1))

    def _linear(self, X):
        return X[:, 0] * X[:, 1]

    def _poly(self, X, d=3):
        return (1 + (X[:, 0] * X[:, 1]))**d

    def _sigmoid(self, X, a=3, c=1):
        return np.tanh((a * X[:, 0] * X[:, 1]) + c)

    def _parabolic(self, X):
        return X[:, 1]**2 - X[:, 0]**2

    def __call__(self, X, *args, **kwargs):
        if self.name == "rbf":
            return self._rbf(X, *args, **kwargs)
        if self.name == "laplace":
            return self._laplace(X, *args, **kwargs)
        elif self.name == "linear":
            return self._linear(X)
        elif self.name == "polynomial":
            return self._poly(X, *args, **kwargs)
        elif self.name == "sigmoid":
            return self._sigmoid(X, *args, **kwargs)
        else:
            print(f"Kernel {self.name} is not identified")
            return X

[4]:
fig = plt.figure(figsize=(15, 10))


ax1 = fig.add_subplot(2, 3, 1)
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, ax=ax1, palette='BrBG_r')
ax1.set_title("original")

ax2 = fig.add_subplot(2, 3, 2, projection='3d')
trans_X = np.c_[X, Kernel('rbf')(X)]
ax2.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')
ax2.set_title("rbf")

ax3 = fig.add_subplot(2, 3, 3, projection='3d')
trans_X = np.c_[X, Kernel('laplace')(X)]
ax3.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')
ax3.set_title("laplace")

ax4 = fig.add_subplot(2, 3, 4, projection='3d')
trans_X = np.c_[X, Kernel('linear')(X)]
ax4.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')
ax4.set_title("linear")

ax5 = fig.add_subplot(2, 3, 5, projection='3d')
trans_X = np.c_[X, Kernel('polynomial')(X)]
ax5.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')
ax5.set_title("polynomial")

ax6 = fig.add_subplot(2, 3, 6, projection='3d')
trans_X = np.c_[X, Kernel('sigmoid')(X)]
ax6.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')
ax6.set_title("sigmoid")

plt.suptitle("Kernel Methods")
plt.tight_layout()
plt.show()
../_images/SupportVectorMachine_Kernels_8_0.png
[5]:
np.random.seed(0)

class SVM:
    def __init__(self, n_iter = 100, lambda_param = 0.01, learning_rate = 0.001,
                 kernel=None, kernel_args=None, kernel_kwargs=None):
        self.n_iter = n_iter
        self.lambda_param = lambda_param
        self.learning_rate = learning_rate
        self.w = None
        self.b = None
        self.l_loss = None
        self.kernel = kernel
        self.kernel_args = kernel_args or []
        self.kernel_kwargs = kernel_kwargs or {}

    @property
    def margin(self):
        # return 1 / np.linalg.norm(self.w, ord=2)
        return 1 / np.sqrt(np.sum(np.square(model.w)))

    @property
    def loss(self):
        return self.l_loss

    def _predict(self, X, *args, **kwargs):
        return np.sign((self.w @ X.T) + self.b)

    def predict(self, X):
        X = self._apply_kernel(X)
        return self._predict(X)

    def _apply_kernel(self, X):
        if self.kernel is not None:
            X = np.c_[X, Kernel(self.kernel)(X, *self.kernel_args, **self.kernel_kwargs)]
        return X


    def fit(self, X, y):
        X = self._apply_kernel(X)

        n_samples, n_features = X.shape

        self.w = np.zeros(n_features) #np.random.rand(n_features)
        self.b = 0
        self.l_loss = []
        self.l_margin = []

        for _ in range(self.n_iter):
            for i in range(n_samples):
                if (y[i] * ((self.w @ X[i].T) + self.b)) >= 1:
                    self.w = self.w - ( self.learning_rate * (2 * self.lambda_param * self.w))
                else:
                    self.w = self.w - ( self.learning_rate * (( 2 * self.lambda_param * self.w ) - (y[i] * X[i])))
                    self.b = self.b + ( self.learning_rate * y[i] )

                self.l_loss.append(metrics.hinge_loss(y, self._predict(X)))
                self.l_margin.append(self.margin)


    def plot_boundary_3d(self, X, ax=None):
        # np.random.seed(10)

        trans_X = self._apply_kernel(X)
        y_pred = self._predict(trans_X)
        margin = self.margin

        if ax is None:
            fig = plt.figure(figsize=(7, 7))
            ax = fig.add_subplot(1, 1, 1, projection='3d')
            ax.set_title("Dicision Boundary - Hyperplane")

        grid_samples = 100

        xx, yy = np.meshgrid(
            np.linspace(X[:, 0].min() - 1 , X[:, 0].max() + 1 , grid_samples),
            np.linspace(X[:, 1].min() - 1 , X[:, 1].max() + 1 , grid_samples)
        )
        mesh = np.c_[xx.ravel(), yy.ravel()]
        trans_mesh = self._apply_kernel(mesh)
        mesh_pred = (self.w @ trans_mesh.T) + self.b
        zz = mesh_pred.reshape(xx.shape)

        ax.plot_surface(xx, yy, zz, cmap='BrBG_r', alpha=0.2)
        ax.scatter(trans_X[:, 0], trans_X[:, 1], trans_X[:, 2], c=y, cmap='BrBG_r')

        # ax.legend()

    def plot_boundary(self, X, ax=None):
        # np.random.seed(10)

        y_pred = self.predict(X)
        margin = self.margin

        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(7, 7))
            ax.set_title("Dicision Boundary - Hyperplane")

        grid_samples = 100

        xx, yy = np.meshgrid(
            np.linspace(X[:, 0].min() - 1 , X[:, 0].max() + 1 , grid_samples),
            np.linspace(X[:, 1].min() - 1 , X[:, 1].max() + 1 , grid_samples)
        )
        mesh = np.c_[xx.ravel(), yy.ravel()]
        mesh_pred = self.predict(mesh)
        zz = mesh_pred.reshape(xx.shape)

        ax.contourf(xx, yy, zz, cmap=plt.cm.coolwarm, alpha=0.2)
        sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, style=y, ax=ax, palette='deep')

        ax.legend()

    def plot_loss(self, ax=None):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(7, 7))
            ax.set_title("Loss over iterations")

        ax.plot(self.loss)


    def plot_margin(self, ax=None):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=(7, 7))
            ax.set_title("Margin over iterations")

        ax.plot(self.l_margin)


    def plot(self, X):
        fig = plt.figure(figsize=(15, 5))

        ax1 = fig.add_subplot(1, 3, 1)
        self.plot_boundary(X, ax=ax1)
        ax1.set_title("Dicision Boundary - Hyperplane")

        ax2 = fig.add_subplot(1, 3, 2)
        self.plot_loss(ax=ax2)
        ax2.set_title("Loss over iterations")

        ax3 = fig.add_subplot(1, 3, 3)
        self.plot_margin(ax=ax3)
        ax3.set_title("Margin over iterations")

Without Kernel#

[6]:
model = SVM(n_iter=10, lambda_param=0.001, learning_rate=0.01)
model.fit(X, y)
model.plot(X)
../_images/SupportVectorMachine_Kernels_11_0.png

RBF Kernel#

[13]:
model = SVM(n_iter=5, lambda_param=0.001, learning_rate=0.01, kernel='rbf', kernel_kwargs={"gamma" : 2})
model.fit(X, y)

model.plot(X)
../_images/SupportVectorMachine_Kernels_13_0.png

Laplace Kernel#

[8]:
model = SVM(n_iter=5, lambda_param=0.001, learning_rate=0.01, kernel='laplace')
model.fit(X, y)

model.plot(X)
../_images/SupportVectorMachine_Kernels_16_0.png

Polynomial Kernel#

[9]:
model = SVM(n_iter=5, lambda_param=0.001, learning_rate=0.01, kernel='polynomial', kernel_kwargs={"d" : 2})
model.fit(X, y)

model.plot(X)
../_images/SupportVectorMachine_Kernels_18_0.png

Linear Kernel#

[10]:
model = SVM(n_iter=5, lambda_param=0.001, learning_rate=0.01, kernel='linear')
model.fit(X, y)

model.plot(X)
../_images/SupportVectorMachine_Kernels_20_0.png

Sigmoid Kernel#

[11]:
model = SVM(n_iter=5, lambda_param=0.001, learning_rate=0.01, kernel='sigmoid')
model.fit(X, y)

model.plot(X)
../_images/SupportVectorMachine_Kernels_22_0.png