### Equations

(1) Hyperplane Centroid:

$ HC_{k} = {\bf w}^{T}{\bf x}- \frac{1}{ n_{k} } \sum\limits_{y_{i}=k} {\bf w}^{T}{\bf x_{i}}=0 $

(2) Hyperplane Centroid Loss:

$ HCL = \sum\limits_{i=1}^{k-1}=max(HC_{i} -HC_{i+1} + \delta,0) $

(3) Hyperplane Point Loss:

(4) $ HPL_{i}^{+}= max(f(x)-HC)-(HC_{+1}-HC)+\gamma (HC_{+1} - HC),0) $

$ =max(f(x_{i})-\gamma HC - (1-\gamma)HC_{+1},0) $ 



(5)
$ HPL_{i}^{-}= max(\gamma HC - f(x_{i}) + (1-\gamma)HC_{-1},0) $

(6)
$ HPL = \sum\limits_{x_{i}\in S} HPL_{i}^{+} + HPL_{i}^{-}$

(7) $ OHPL = \alpha HCL + HPL $

### Define Loss Functions

In [72]:
"""Metrics to assess performance on ordinal classification task given class prediction
 using hyper plane loss techniques 
"""

# Authors: Bob Vanderheyden 
# Ying Xie 
# 
# Contributor: Shayan Shamskolahi

import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import numpy as np

def hp_ordering_loss(y_true, y_pred, min_label, max_label):

 """ Evaluate the ordinal loss of the predictions y_pred.
 
 Parameters
 ----------
 y_true : array-like
 y_pred : array-like
 
 Returns
 -------
 loss: float
 A non-negative floating point value (best value is 0.0)
 
 Usage
 -------
 loss = hp_ordering_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7])
 print('Loss: ', loss.numpy()) # Loss: 2.8
 
 Usage with the `compile` API:
 
 ```python
 model = tf.keras.Model(inputs, outputs)
 model.compile(loss=hp_ordering_loss, optimizer='adam', loss=hp_ordering_loss)
 ```
 
 """

 y_pred = tf.convert_to_tensor(y_pred)
 y_true = tf.dtypes.cast(y_true, y_pred.dtype)
 y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])

 # one hot vector for y_true
 ords, idx = tf.unique(tf.reshape(y_true, [-1])) 
 num = tf.shape(ords)[0]
 y_true_1hot = tf.one_hot(idx, num)

 # mean distance for each class
 yO = tf.transpose(y_pred) @ y_true_1hot
 yc = tf.reduce_sum(y_true_1hot,0)
 class_mean = tf.divide(yO,yc) 

 # min. distance
 ords = tf.dtypes.cast(ords, tf.float32)
 ords0 = tf.reshape(ords, [-1,1])
 ords1 = tf.reshape(ords, [1,-1])
 
 min_distance = tf.subtract(ords0, ords1)
 # apply ReLU
 min_distance = tf.nn.relu (min_distance)
 
 # keeps min. distance
 keep = tf.minimum(min_distance,1)

 # distance to centroid 
 class_mean0 = tf.reshape(class_mean, [-1,1])
 class_mean1 = tf.reshape(class_mean, [1,-1])
 class_mean = tf.subtract(class_mean0, class_mean1) 
 # apply ReLU 
 class_mean = tf.nn.relu(class_mean)
 centroid_distance = tf.multiply(keep, class_mean)
 
 order_loss = tf.subtract(min_distance,centroid_distance)
 # apply ReLU
 order_loss = tf.nn.relu(order_loss)
 
 return tf.reduce_sum(order_loss)


def hp_point_loss(y_true, y_pred, L, U):
 """ Evaluate the point loss of the predictions y_pred.

 Parameters
 ----------
 y_true : array-like
 y_pred : array-like
 minlabel : integer
 maxlabel : integer
 margin : float

 Returns
 -------
 loss: float
 A non-negative floating point value (best value is 0.0)
 
 Usage
 -------
 loss = hp_point_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3)
 print('Loss: ', loss.numpy()) # Loss: 3.1
 
 
 Usage with the `compile` API:
 
 ```python
 
 Example Keras wrapper for hp_point_loss:
 
 def get_ohpl_wrapper (min_label, max_label, margin):
 def ohpl(y_true, y_pred):
 return hp_point_loss(y_true, y_pred, min_label, max_label, margin)
 return ohpl

 point_loss = get_ohpl_wrapper(0,4,0.01)
 
 model = tf.keras.Model(inputs, outputs)
 model.compile(loss=ohpl_point_loss, optimizer='adam', loss=ohpl_point_loss)
 ```
 
 """
 y_pred = tf.reshape(tf.convert_to_tensor(y_pred), [-1,1])
 y_true = tf.cast(y_true, dtype=tf.float32)
 
 upper = tf.reshape(tf.constant(U, dtype=tf.float32), [-1,1])
 lower = tf.reshape(tf.constant(L, dtype=tf.float32), [-1,1])
 uthreshold = tf.matmul(y_true, upper, name='upper_MM')
 lthreshold = tf.matmul(y_true, lower, name='lower_MM')

 upper_loss = tf.nn.relu(tf.subtract(y_pred, uthreshold))
 lower_loss = tf.nn.relu(tf.subtract(lthreshold, y_pred))
 
 hp_point_loss = tf.add(upper_loss, lower_loss)
 
 return tf.reduce_sum(hp_point_loss)
 
 """ 
 References
 ----------
 .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). 
 2018 IEEE International Conference on Big Data (Big Data), 
 2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079
 """

### Test the result:

In [2]:
loss = hp_ordering_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7])
print('Loss: ', loss.numpy()) # Loss: 2.8

Loss: 2.8000002


In [47]:
y_pred = [6.0,3.1,5.2,1.0,4.0,2.2,3.7]

y_true = np.array([[0,0,0,1],[0,1,0,0],[0,0,1,0],[1,0,0,0],[0,0,0,1],[0,0,1,0],[0,1,0,0]])

centers = np.array([1.0, 3.4, 3.7, 5.0])
lo = np.array([-1e9, 3.3, 3.6, 5.9])
up = np.array([1.1, 3.5, 3.7, 1e9])

loss = hp_point_loss(y_true, y_pred, lo, up)
print('Loss: ', loss.numpy()) # Loss: 5,2


Loss: 5.2


### Example wrapper for Keras:

In [88]:
# example Keras wrapper for hp_ordering_loss

def get_ordering_wrapper(min_label, max_label):
 def hohpl(y_true, y_pred):
 return hp_ordering_loss(y_true, y_pred, min_label, max_label)
 return hohpl

ordering_loss = get_ordering_wrapper(0,4)


# example Keras wrapper for hp_point_loss

def get_point_wrapper (l, u):
 def ohpl(y_true, y_pred):
 return hp_point_loss(y_true, y_pred, l, u)
 return ohpl


# centers is training set centers see 'train_cent' above
# margin is the point margin
# num_ords is the number of unique ordinal label values

def ohpl_margins(centers, num_ords, margin=0.1):
 inner_ord_array = np.ones(num_ords - 1).reshape(-1,1)
 up = np.concatenate((inner_ord_array*margin, [[1e9]]), axis=0)+centers
 lo = centers-np.concatenate(([[1e9]], inner_ord_array*margin), axis=0)

 return lo, up


### Wrapper in action - Keras sequential model:

In [168]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

df = pd.read_csv('datasets-arie_ben_david-era.csv', header=None, sep = ',')
#df = pd.read_csv('fred.csv', header=None, sep = ',')

X = df.iloc[:,:4]
y = df.iloc[:,4]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

y_train = np.array(y_train)
y_test = np.array(y_test)

labels, freq = np.unique(y_train, return_counts=True)
ords = labels.reshape(-1, 1)
l = min(labels)
u = max(labels)
labs = len(labels)

eyes = np.eye(labs)
cDiff = eyes[1:labs,:]-eyes[:(labs-1),:]

# Create matrix from on hot encoded training labels to use to calculate class centroids
from sklearn.preprocessing import OneHotEncoder
onehot_encoder = OneHotEncoder(sparse=False)
onehot = onehot_encoder.fit_transform(y_train.reshape((-1, 1)))
testhot = onehot_encoder.fit_transform(y_test.reshape((-1, 1)))
onehot_inverse = 1/np.sum((onehot.T), axis=1)
new_y_train = onehot.T*onehot_inverse.reshape(-1,1)

model = Sequential()
model.add(Dense(40, activation='relu', input_shape=(4, )))
model.add(Dropout(0.1))
model.add(Dense(28, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(20, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1))

# # Stage 1: hyperplane ordering loss
opt = Adam(lr=1e-2, beta_1=0.9, beta_2=0.999, decay=1e-4)
model.compile(loss=ordering_loss, optimizer="adam")

# Run model fit until the proper ordering is achieved and the minimum spacing between centroids is 1.0 or greater
mingap = 0.0
while mingap < 1.0:
 model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=10, batch_size=64, shuffle=True)

 # Calculate hyperplane centroid constant value
 pred = model.predict(X_train)
 train_cent = np.matmul(new_y_train, pred)
 mingap = np.min(np.matmul(cDiff,train_cent))
 print('Minumum spacing is: ', mingap)

Train on 670 samples, validate on 330 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Minumum spacing is: 0.24087745118987458
Train on 670 samples, validate on 330 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Minumum spacing is: 0.42570958922549007
Train on 670 samples, validate on 330 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Minumum spacing is: 0.5557570425777243
Train on 670 samples, validate on 330 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Minumum spacing is: 0.8330272856364322
Train on 670 samples, validate on 330 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Minumum spacing is: 0.7831922810828136
Train on 670 sampl

In [169]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential

# Create point loss functiona
# train_cent from stage 1
# ords from model fit loop in stage one

lower_threshold, upper_threshold = ohpl_margins(train_cent, len(ords), 0.1)
point_loss = get_point_wrapper(lower_threshold, upper_threshold)

# Recompile Model with point loss 
opt = tf.keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, decay=1e-4)
model.compile(loss=point_loss, optimizer=opt)

# model fit uses onehot encoded labels
model.fit(X_train, onehot, validation_data=(X_test,testhot), epochs=100, shuffle=True, batch_size=16)



Train on 670 samples, validate on 330 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
E



In [170]:
# Calculate MZE and MAE

pred = model.predict(X_train)
new_pred = model.predict(X_test)

# Identify the closest centroid
rcenter = train_cent.T # create row matrix of centroids from Stage 1
y_pred = np.argmin(abs(new_pred - rcenter), axis=1) + l # l is the min class from Stage 1

mae = np.mean(abs(y_pred - y_test))
mze = np.mean(abs(y_pred - y_test) > 0) 
print(mae, mze)

1.3181818181818181 0.7696969696969697


4