{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Define Loss Functions" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "#######step 1: Define the loss function (Mean loss, frequenctly used one)\n", "\n", "\"\"\"Metrics to assess performance on ordinal classification task given class prediction\n", " using hyper plane loss techniques \n", "\"\"\"\n", "\n", "# Authors: Bob Vanderheyden \n", "# Ying Xie \n", "# \n", "# Contributor: Shayan Shamskolahi\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "import tensorflow as tf\n", "import numpy as np\n", "\n", "def hpall_mean_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):\n", " \"\"\" Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\\\n", " (using reduce mean).\n", "\n", " Parameters\n", " ----------\n", " y_true : array-like\n", " y_pred : array-like\n", " minlabel : integer\n", " maxlabel : integer\n", " margin : float\n", " ordering_loss_weight : float\n", "\n", " Returns\n", " -------\n", " loss: float\n", " A non-negative floating point value (best value is 0.0)\n", " \n", " Usage\n", " -------\n", " loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)\n", " print('Loss: ', loss.numpy()) # Loss: 0.7228571\n", " \n", " \n", " Usage with the `compile` API:\n", " \n", " ```python\n", " \n", " Example Keras wrapper for hp_all_loss:\n", " \n", " def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):\n", " def ohpl(y_true, y_pred):\n", " return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)\n", " return ohpl\n", "\n", " loss = get_ohpl_wrapper(2,7,.3,1) # ordering_loss_weight must not be less that 1\n", " \n", " model = tf.keras.Model(inputs, outputs)\n", " model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)\n", " ```\n", " \n", " \"\"\"\n", " \n", " min_label = tf.constant(minlabel, dtype=tf.float32)\n", " max_label = tf.constant(maxlabel, dtype=tf.float32)\n", " margin = tf.constant(margin, dtype=tf.float32) # centroid margin\n", " ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) \n", " \n", " y_pred = tf.convert_to_tensor(y_pred)\n", " y_true = tf.dtypes.cast(y_true, y_pred.dtype)\n", " y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])\n", " \n", " # OHPL ordering loss\n", " # one hot vector for y_true\n", " ords, idx = tf.unique(tf.reshape(y_true, [-1])) \n", " num = tf.shape(ords)[0]\n", " y_true_1hot = tf.one_hot(idx, num)\n", "\n", " # mean distance for each class\n", " yO = tf.matmul(tf.transpose(y_pred),y_true_1hot)\n", " yc = tf.reduce_sum(y_true_1hot,0)\n", " class_mean = tf.divide(yO,yc) \n", "\n", " # min. distance\n", " ords = tf.dtypes.cast(ords, tf.float32)\n", " ords0 = tf.reshape(ords, [-1,1])\n", " ords1 = tf.reshape(ords, [1,-1])\n", " \n", " min_distance = tf.subtract(ords0, ords1)\n", " # apply ReLU\n", " min_distance = tf.nn.relu (min_distance)\n", " \n", " # keeps min. distance\n", " keep = tf.minimum(min_distance,1)\n", "\n", " # distance to centroid \n", " class_mean0 = tf.reshape(class_mean, [-1,1])\n", " class_mean1 = tf.reshape(class_mean, [1,-1])\n", " class_mean = tf.subtract(class_mean0, class_mean1) \n", " # apply ReLU \n", " class_mean = tf.nn.relu(class_mean)\n", " centroid_distance = tf.multiply(keep, class_mean)\n", " \n", " hp_ordering_loss = tf.subtract(min_distance,centroid_distance)\n", " # apply ReLU\n", " hp_ordering_loss = tf.nn.relu(hp_ordering_loss)\n", " hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)\n", " \n", " # OHPL point loss\n", " # Centroid for point\n", " point_cent = tf.matmul(y_true_1hot, class_mean0)\n", " \n", " lower_bound = tf.subtract(min_label,y_true)\n", " lower_bound = tf.add(lower_bound,1)\n", " lower_bound = tf.multiply(lower_bound,1e9)\n", " # apply ReLU \n", " lower_bound = tf.nn.relu(lower_bound)\n", " lower_bound = tf.add(margin, lower_bound)\n", "\n", " upper_bound = tf.subtract(y_true,max_label)\n", " upper_bound = tf.add(upper_bound,1)\n", " upper_bound = tf.multiply(upper_bound,1e9)\n", " # apply ReLU \n", " upper_bound = tf.nn.relu(upper_bound)\n", " upper_bound = tf.add(margin, upper_bound) \n", "\n", " upper_loss = tf.add(point_cent,upper_bound[:,tf.newaxis])\n", " upper_loss = tf.subtract(y_pred,upper_loss)\n", " # apply ReLU \n", " upper_loss = tf.nn.relu(upper_loss)\n", " \n", " lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)\n", " lower_loss = tf.subtract(point_cent,lower_loss)\n", " # apply ReLU \n", " lower_loss = tf.nn.relu(lower_loss)\n", " \n", " hp_point_loss = tf.add(upper_loss, lower_loss)\n", " hp_point_loss = tf.reduce_mean(hp_point_loss)\n", "\n", " # aggregate ordering loss and point loss \n", " mean_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, hp_ordering_loss))\n", " \n", " return mean_loss\n", "\n", " \n", " \"\"\" \n", " References\n", " ----------\n", " .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). \n", " 2018 IEEE International Conference on Big Data (Big Data), \n", " 2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079\n", " \"\"\"" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "#######step 1 - alternative: Define the loss function (sum loss, frequenctly used one)\n", "\n", "def hpall_sum_loss(y_true, y_pred, minlabel, maxlabel, margin=0.1, ordering_loss_weight=1):\n", " \"\"\" Evaluate the ordinal hyperplane ordering loss and point loss of the predictions y_pred\\\n", " (using reduce sum).\n", "\n", " Parameters\n", " ----------\n", " y_true : array-like\n", " y_pred : array-like\n", " minlabel : integer\n", " maxlabel : integer\n", " margin : float\n", " ordering_loss_weight : float\n", "\n", " Returns\n", " -------\n", " loss: float\n", " A non-negative floating point value (best value is 0.0)\n", " \n", " Usage\n", " -------\n", " loss = hp_all_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)\n", " print('Loss: ', loss.numpy()) # Loss: 3.48\n", " \n", " \n", " Usage with the `compile` API:\n", " \n", " ```python\n", " \n", " Example Keras wrapper for hp_all_loss:\n", " \n", " def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):\n", " def ohpl(y_true, y_pred):\n", " return hpall_sum_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)\n", " return ohpl\n", "\n", " loss = get_ohpl_wrapper(0,4,1,1)\n", " \n", " model = tf.keras.Model(inputs, outputs)\n", " model.compile(loss=hp_all_loss, optimizer='adam', loss=ohpl_point_loss)\n", " ```\n", " \n", " \"\"\"\n", " \n", " min_label = tf.constant(minlabel, dtype=tf.float32)\n", " max_label = tf.constant(maxlabel, dtype=tf.float32)\n", " margin = tf.constant(margin, dtype=tf.float32) # centroid margin\n", " ordering_loss_weight = tf.constant(ordering_loss_weight, dtype=tf.float32) \n", " \n", " y_pred = tf.convert_to_tensor(y_pred)\n", " y_true = tf.dtypes.cast(y_true, y_pred.dtype)\n", " y_pred = tf.reshape(tf.transpose(y_pred),[-1,1])\n", " \n", " # OHPL ordering loss\n", " # one hot vector for y_true\n", " ords, idx = tf.unique(tf.reshape(y_true, [-1])) \n", " num = tf.shape(ords)[0]\n", " y_true_1hot = tf.one_hot(idx, num)\n", "\n", " # mean distance for each class\n", " yO = tf.matmul(tf.transpose(y_pred),y_true_1hot)\n", " yc = tf.reduce_sum(y_true_1hot,0)\n", " class_mean = tf.divide(yO,yc) \n", "\n", " # min. distance\n", " ords = tf.dtypes.cast(ords, tf.float32)\n", " ords0 = tf.reshape(ords, [-1,1])\n", " ords1 = tf.reshape(ords, [1,-1])\n", " \n", " min_distance = tf.subtract(ords0, ords1)\n", " # apply ReLU\n", " min_distance = tf.nn.relu (min_distance)\n", " \n", " # keeps min. distance\n", " keep = tf.minimum(min_distance,1)\n", "\n", " # distance to centroid \n", " class_mean0 = tf.reshape(class_mean, [-1,1])\n", " class_mean1 = tf.reshape(class_mean, [1,-1])\n", " class_mean = tf.subtract(class_mean0, class_mean1) \n", " # apply ReLU \n", " class_mean = tf.nn.relu(class_mean)\n", " centroid_distance = tf.multiply(keep, class_mean)\n", " \n", " hp_ordering_loss = tf.subtract(min_distance,centroid_distance)\n", " # apply ReLU\n", " hp_ordering_loss = tf.nn.relu(hp_ordering_loss)\n", " hp_ordering_loss = tf.reduce_sum(hp_ordering_loss)\n", " \n", " # OHPL point loss\n", " # Centroid for point\n", " point_cent = tf.matmul(y_true_1hot, class_mean0)\n", " \n", " lower_bound = tf.subtract(min_label,y_true)\n", " lower_bound = tf.add(lower_bound,1)\n", " lower_bound = tf.multiply(lower_bound,1e9)\n", " # apply ReLU \n", " lower_bound = tf.nn.relu(lower_bound)\n", " lower_bound = tf.add(margin, lower_bound)\n", "\n", " upper_bound = tf.subtract(y_true,max_label)\n", " upper_bound = tf.add(upper_bound,1)\n", " upper_bound = tf.multiply(upper_bound,1e9)\n", " # apply ReLU \n", " upper_bound = tf.nn.relu(upper_bound)\n", " upper_bound = tf.add(margin, upper_bound) \n", "\n", " upper_loss = tf.add(point_cent,upper_bound[:,tf.newaxis])\n", " upper_loss = tf.subtract(y_pred,upper_loss)\n", " # apply ReLU \n", " upper_loss = tf.nn.relu(upper_loss)\n", " \n", " lower_loss = tf.add(lower_bound[:,tf.newaxis],y_pred)\n", " lower_loss = tf.subtract(point_cent,lower_loss)\n", " # apply ReLU \n", " lower_loss = tf.nn.relu(lower_loss)\n", " \n", " hp_point_loss = tf.add(upper_loss, lower_loss)\n", " hp_point_loss = tf.reduce_sum(hp_point_loss)\n", "\n", " # aggregate ordering loss and point loss \n", " sum_loss = tf.add(hp_point_loss,tf.multiply(ordering_loss_weight, hp_ordering_loss))\n", " \n", " return sum_loss\n", "\n", "\n", " \"\"\" \n", " References\n", " ----------\n", " .. [1] Vanderheyden, Bob and Ying Xie. Ordinal Hyperplane Loss. (2018). \n", " 2018 IEEE International Conference on Big Data (Big Data), \n", " 2018 IEEE International Conference On, 2337. https://doi-org.proxy.kennesaw.edu/10.1109/BigData.2018.8622079\n", " \"\"\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Test the result:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loss: 0.7228571\n" ] } ], "source": [ "loss = hpall_mean_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)\n", "print('Loss: ', loss.numpy()) # Loss: 0.7228571" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loss: 3.48\n" ] } ], "source": [ "loss = hpall_sum_loss([4,1,2,0,4,2,1], [6.0,3.1,5.2,1.0,4.0,2.2,3.7],0,4,.3,0.1)\n", "print('Loss: ', loss.numpy()) # Loss: 3.48" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Application in Keras (mean loss):" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import OneHotEncoder\n", "from tensorflow.keras import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 781 entries, 0 to 781\n", "Data columns (total 7 columns):\n", "Score 781 non-null int32\n", "GDP per capita 781 non-null float64\n", "Social support 781 non-null float64\n", "Healthy life expectancy 781 non-null float64\n", "Freedom to make life choices 781 non-null float64\n", "Generosity 781 non-null float64\n", "Perceptions of corruption 781 non-null float64\n", "dtypes: float64(6), int32(1)\n", "memory usage: 45.8 KB\n" ] } ], "source": [ "# prepare the data\n", "pwd = !pwd\n", "df = pd.read_csv('world_happiness_2015_2019.csv')\n", "df.Score = df.Score.astype('int32')\n", "df.drop(['Year'], axis=1, inplace=True)\n", "df = df.dropna()\n", "df.info()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "X = df.iloc[:,1:]/df.iloc[:,1:].max()\n", "y = df.iloc[:,0]\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "GDP per capita 1.000000\n", "Social support 1.000000\n", "Healthy life expectancy 0.983348\n", "Freedom to make life choices 0.943370\n", "Generosity 1.000000\n", "Perceptions of corruption 1.000000\n", "dtype: float64 GDP per capita 0.0\n", "Social support 0.0\n", "Healthy life expectancy 0.0\n", "Freedom to make life choices 0.0\n", "Generosity 0.0\n", "Perceptions of corruption 0.0\n", "dtype: float64\n", "(array([2, 3, 4, 5, 6, 7], dtype=int32), array([ 7, 89, 202, 249, 162, 72]))\n" ] } ], "source": [ "print(X_train.max(), X_train.min())\n", "print(np.unique(y, return_counts=True))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Wrapper in action - Keras sequential model:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "######Step 2: Define the loss instance. 2 - lowest label 7 - highest lable 0.1 - point marge loss 1 - ordering_loss_weight (consider the batch size and number of class labels)\n", "\n", "# example Keras wrapper for hpall_mean_loss\n", "\n", "def get_ohpl_wrapper (min_label, max_label, margin, ordering_loss_weight):\n", " def ohpl(y_true, y_pred):\n", " return hpall_mean_loss(y_true, y_pred, min_label, max_label, margin, ordering_loss_weight)\n", " return ohpl\n", "\n", "loss = get_ohpl_wrapper(2,7,.1,1) # ordering_loss_weight must not be less that 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Determine/confirm Hyperparameter value for Ordering Loss\n", "### Code frequently (every 5 epochs) calculates the centroids and calculates the minimum and maximum gap (distance between adjcent centroids)\n", "### If the algorithm requires more than 50 epochs to establish the ordering and spacing, increase the ordering_loss_weight value above. ## Determine/confirm Hyperparameter value for Ordering Loss
### Code frequently (every 5 epochs) calculates the centroids and calculates the minimum and maximum gap (distance between adjcent centroids)
### If the algorithm requires more than 50 epochs to establish the ordering and spacing, increase the ordering_loss_weight value above. 0s 180us/sample - loss: 0.4305\n", "Epoch 2000/2000\n", "523/523 [==============================] - 0s 172us/sample - loss: 0.5000\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#########Remove this cell?\n", "# Define and compile the model \n", "from tensorflow.keras.optimizers import Adam\n", "\n", "model = Sequential()\n", "model.add(Dense(25, activation='relu', input_shape=(6, )))\n", "model.add(Dropout(0.1))\n", "model.add(Dense(30, activation='relu'))\n", "model.add(Dropout(0.1))\n", "model.add(Dense(6, activation='relu'))\n", "model.add(Dropout(0.1))\n", "model.add(Dense(1))\n", "\n", "opt = Adam(lr=0.003, decay=1e-5)\n", "model.compile(loss=loss, optimizer=opt)\n", "\n", "\n", "model.fit(X_train, y_train, epochs=2000, batch_size=16, shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "###########Step 7: the following is to predict on X_test and finally generate the confusion matrix. " ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# Define the minimum class\n", "min_class = min(np.unique(y_train))\n" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-8.53362376],\n", " [-3.62544511],\n", " [-1.27808337],\n", " [ 0.70373358],\n", " [ 2.55631305],\n", " [ 4.84041836]])" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Multiply centroid calculation matrix, new_y_train, by training set scores\n", "train_cent = np.matmul(new_y_train, pred)\n", "train_cent" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "# Calculate new data model score\n", "new_pred = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "# Identify the closest centroid\n", "rcenter = train_cent.T # create row matrix of centroids\n", "y_pred = np.argmin(abs(new_pred - rcenter), axis=1) + min_class " ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.4573643410852713 0.43023255813953487\n" ] } ], "source": [ "# calculate the mean absolute error and mean zero one error\n", "mae = np.mean(abs(y_pred - y_test))\n", "mze = np.mean(abs(y_pred - y_test) > 0) \n", "print(mae, mze)" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 0, 0, 0, 0, 0],\n", " [ 1, 19, 7, 0, 0, 0],\n", " [ 1, 10, 37, 16, 1, 0],\n", " [ 0, 1, 11, 43, 28, 2],\n", " [ 0, 0, 1, 10, 30, 14],\n", " [ 0, 0, 0, 1, 7, 18]])" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Confusion matrix\n", "from sklearn.metrics import confusion_matrix \n", "confusion_matrix(y_test, y_pred) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 }