This website works better with desktop in both themes, for mobile devices please change to light theme.

Loss Functions#

References#

[1]:
import numpy as np
from sklearn import metrics

import matplotlib.pyplot as plt

plt.style.use("fivethirtyeight")

Loss vs Cost Function#

\begin{align} \text{Cost Function } J(h_w(X), y) = \frac{1}{N} \sum_{i=1}^{N} L(h_w(x_i), y_i) \end{align}

Classification Loss#

Hinge Loss#

\begin{align} L(h_w(x_i), y_i) = \max(1 - h_w(x_i) y_i, 0)^p \end{align}

p

Usecase

p = 1

SVM

p = 2

Squared Loss SVM

  • Generally used for Standard SVM

[23]:
def hinge_loss(y_true, y_pred, p):
    return np.power(np.max(np.c_[1 - (y_true * y_pred), np.zeros_like(y_true)], axis=1), p)

Correct classification

[29]:
hinge_loss(-1, -1, 1), hinge_loss(-1, -1, 2)
[29]:
(array([0]), array([0]))

Incorrect Classification

[31]:
hinge_loss(-1, 1, 1), hinge_loss(1, -1, 1)
[31]:
(array([2]), array([2]))
[41]:
y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

metrics.hinge_loss(y_pred, y_true), hinge_loss(y_true, y_pred, 1).mean()
[41]:
(0.4444444444444444, 0.4444444444444444)
[42]:
metrics.hinge_loss([-1], [-1]), metrics.hinge_loss([-1], [1])
[42]:
(0.0, 2.0)

Log Loss#

\begin{align} L(h_w(x_i), y_i) = \log(1 + e^{- y_i h_w(x_i)}) \end{align}

  • Used for Logistic Regression

[43]:
def logistic_loss(y_true, y_pred):
    return np.log(1 + np.exp(-(y_true * y_pred)))

def log_loss(y_true, y_pred):
    return ((y_true * np.log(y_pred)) + ((1 - y_true) * np.log(1 - y_pred)))

Correct classification

[45]:
logistic_loss(-1, -1), logistic_loss(1, 1)
[45]:
(0.31326168751822286, 0.31326168751822286)

Incorrect Classification

[46]:
logistic_loss(-1, 1), logistic_loss(1, -1)
[46]:
(1.3132616875182228, 1.3132616875182228)
[55]:
y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

logistic_loss(y_true, y_pred).mean()
[55]:
0.535483909740445

Exponential Loss#

\begin{align} L(h_w(x_i), y_i) = e^{-h_w(x_i) y_i} \end{align}

  • Adaboost

  • Very aggressive, Loss of misprediction increases exponentially

[56]:
def exp_loss(y_true, y_pred):
    return np.exp(-(y_pred * y_true))

Correct classification

[57]:
exp_loss(-1, -1), exp_loss(1, 1)
[57]:
(0.36787944117144233, 0.36787944117144233)

Incorrect Classification

[58]:
exp_loss(-1, 1), exp_loss(1, -1)
[58]:
(2.718281828459045, 2.718281828459045)
[59]:
y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

exp_loss(y_true, y_pred).mean()
[59]:
0.8901910827909096

Zero-One Loss#

\begin{align} L(h_w(x_i), y_i) &= \delta(sign({h_w(x_i)) \ne y_i}) \\ \\ \delta &= \begin{cases} 1 & \text{if } sign(h_w(x_i)) \ne y_i \text{ i.e. missclassification}\\ \\ 0 & \text{otherwise} \end{cases} \end{align}

  • Actual Classification loss

[60]:
def zero_one_loss(y_true, y_pred):
    return np.int32(np.sign(y_pred) != y_true)

Correct classification

[61]:
zero_one_loss(-1, -1), zero_one_loss(1, 1)
[61]:
(0, 0)

Incorrect Classification

[62]:
zero_one_loss(-1, 1), zero_one_loss(-1, 1)
[62]:
(1, 1)
[63]:
y_true = np.array([1, -1, 1, -1, 1, -1, 1, -1, 1])
y_pred = np.array([-1, -1, 1, -1, 1, -1, -1, -1, 1])

zero_one_loss(y_true, y_pred).mean()
[63]:
0.2222222222222222

Graphical Representation#

[70]:
n_samples = 100

x = np.linspace(-2, 2, n_samples)
y = np.ones_like(x)

fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(x, logistic_loss(x, y), label='Logistic Loss')
ax.plot(x, exp_loss(x, y), label='Exponential Loss')
ax.plot(np.int32(x), zero_one_loss(y, x), label='Zero-One Loss')
ax.plot(x, hinge_loss(x, y, 1), label='Hinge Loss')

plt.legend()
plt.show()
../_images/Intuition_loss_35_0.png

Regression Loss#

Squared Loss#

\begin{align} L(h_w(x_i), y_i) &= (h_w(x_i) - y_i)^2 \end{align}

  • also known as Ordinary Least Square

  • estimates mean label

  • Differenciable easily

  • affected by noisy data

[17]:
def squared_loss(y_true, y_pred):
    return np.square(y_pred - y_true)
[72]:
y_true = np.array([12, 11, 10, 12.3, 11.2, 10.3])
y_pred = np.array([11.9, 11, 9.6, 12, 11, 10.1])

squared_loss(y_true, y_pred).mean(), metrics.mean_squared_error(y_true, y_pred)
[72]:
(0.05666666666666679, 0.05666666666666679)

Absolute Loss#

\begin{align} L(h_w(x_i), y_i) &= |h_w(x_i) - y_i| \end{align}

  • Estimates median label

  • Less sensitive to noise

[73]:
def absolute_loss(y_true, y_pred):
    return np.abs(y_pred - y_true)
[74]:
y_true = np.array([12, 11, 10, 12.3, 11.2, 10.3])
y_pred = np.array([11.9, 11, 9.6, 12, 11, 10.1])

absolute_loss(y_true, y_pred).mean(), metrics.mean_absolute_error(y_true, y_pred)
[74]:
(0.20000000000000018, 0.20000000000000018)

Graphical Representation#

[84]:
n_samples = 50

x = np.linspace(-3, 3, n_samples)
y = np.zeros_like(x)

fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(x, squared_loss(x, y), label='Squared Loss')
ax.plot(x, absolute_loss(x, y), label='Absolute Loss')

plt.legend()
plt.show()
../_images/Intuition_loss_45_0.png
[ ]: