Tensorflow Advanced : Part 2

Custom Loss Function

2 way to declare

  • string

    1
    model.compile(loss='mse',optimizer='sgd')
  • loss object

    1
    2
    from tensorflow.keras.losses import mean_squared_error
    model.compile(loss = mean_squared_error , optimizer = 'sgd')

    we can add parameters in this second way

Custom Loss Function Template

1
2
3
def my_loss_function(y_true,y_pred):

return losses

Huber Loss

In statistics, the Huber loss is a loss function used in robust regression, that is less sensitive to outliers in data than the squared error loss.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# inputs
xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)

# labels
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

def my_huber_loss(y_true, y_pred):
threshold = 1
error = y_true - y_pred
is_small_error = tf.abs(error) <= threshold
small_error_loss = tf.square(error) / 2
big_error_loss = threshold * (tf.abs(error) - (0.5 * threshold))
return tf.where(is_small_error, small_error_loss, big_error_loss)

## tf.where
# is_small_error : boolean to check
# small_error_loss : value if True
# big_error_loss : value if False

model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])])
model.compile(optimizer='sgd', loss=my_huber_loss)
model.fit(xs, ys, epochs=500,verbose=0)
print(model.predict([10.0]))

Custom Loss Function Hyper Parameter Tuning (using wrapper function)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
# wrapper function that accepts the hyperparameter
def my_huber_loss_with_threshold(threshold):
# function that accepts the ground truth and predictions
def my_huber_loss(y_true, y_pred):
error = y_true - y_pred
is_small_error = tf.abs(error) <= threshold
small_error_loss = tf.square(error) / 2
big_error_loss = threshold * (tf.abs(error) - (0.5 * threshold))
return tf.where(is_small_error, small_error_loss, big_error_loss)
# return the inner function tuned by the hyperparameter
return my_huber_loss

###
model.compile(optimizer='sgd', loss=my_huber_loss_with_threshold(threshold=))

Custom Loss Function using classes

inherits from the Keras Loss class and the syntax and required methods are shown below.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from tensorflow.keras.losses import Loss

class MyHuberLoss(Loss):

# initialize instance attributes
def __init__(self, threshold=1):
super().__init__()
self.threshold = threshold

# compute loss
def call(self, y_true, y_pred):
error = y_true - y_pred
is_small_error = tf.abs(error) <= self.threshold
small_error_loss = tf.square(error) / 2
big_error_loss = self.threshold * (tf.abs(error) - (0.5 * self.threshold))
return tf.where(is_small_error, small_error_loss, big_error_loss)

###
model.compile(optimizer='sgd', loss=MyHuberLoss(threshold=1.02))

Contrastive Loss Function

State-of-the-art siamese networks tend to use some form of either contrastive loss or triplet loss when training — these loss functions are better suited for siamese networks and tend to improve accuracy. The goal of a siamese network isn’t to classify a set of image pairs but instead to differentiate between them. Essentially, contrastive loss is evaluating how good a job the siamese network is distinguishing between the image pairs. The difference is subtle but incredibly important.

$$
Y * D^2 + (1-Y) * \max(margin-D,0)^2
$$

  • Y : 1 if image similar , 0 otherwise
  • D : tensor of Euclidean distance between the pair of images
  • margin : constant by which we can define if they are similar

When Y = 1 , Loss = D^2 (high value)

and when Y = 0 , Loss = max(margin-D,0)^2 (small value)

Finally the formula becomes ,

$$
Y_{true} * Y_{pred}^{2} + (1-Y_{true}) * \max(margin-Y_{pred},0)^2
$$

1
2
3
4
5
6
7
8
9
10
11
12
def contrastive_loss_with_margin(margin):
def contrastive_loss(y_true, y_pred):
'''Contrastive loss from Hadsell-et-al.'06
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
'''
square_pred = K.square(y_pred)
margin_square = K.square(K.maximum(margin - y_pred, 0))

# doing the mean is not strictly necessary as keras automatically does that
# basically , we always need a scaler instead of tensor
return K.mean(y_true * square_pred + (1 - y_true) * margin_square)
return contrastive_loss

Custom Layers

  • way 1

    1
    2
    3
    4
    5
    6
    model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Lambda(lambda x: tf.abs(x)),
    tf.keras.layers.Dense(10, activation='softmax')
    ])
  • way 2

    1
    2
    3
    4
    5
    6
    7
    8
    9
    def my_relu(x):
    return K.maximum(-0.1, x)

    model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128),
    tf.keras.layers.Lambda(my_relu),
    tf.keras.layers.Dense(10, activation='softmax')
    ])

Layers

Custom Dense Layer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# inherit from this base class
from tensorflow.keras.layers import Layer

class SimpleDense(Layer): # inherit from keras Layer class

def __init__(self, units=32): # initialization
'''Initializes the instance attributes'''
super(SimpleDense, self).__init__()
self.units = units

def build(self, input_shape): # will run when instance is created
'''Create the state of the layer (weights)'''
# initialize the weights
w_init = tf.random_normal_initializer()
self.w = tf.Variable(name="kernel",
initial_value=w_init(shape=(input_shape[-1], self.units),
dtype='float32'),
trainable=True)

# initialize the biases
b_init = tf.zeros_initializer()
self.b = tf.Variable(name="bias",
initial_value=b_init(shape=(self.units,), dtype='float32'),
trainable=True)

def call(self, inputs): # performs computation and calls during training
'''Defines the computation from inputs to outputs'''
return tf.matmul(inputs, self.w) + self.b ## WX+B
1
2
3
4
5
6
7
8
9
# declare an instance of the class
my_dense = SimpleDense(units=1)

# define an input and feed into the layer
x = tf.ones((1, 1))
y = my_dense(x)

# parameters of the base Layer class like `variables` can be used
print(my_dense.variables)

usage 1

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# define the dataset
xs = np.array([-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], dtype=float)
ys = np.array([-3.0, -1.0, 1.0, 3.0, 5.0, 7.0], dtype=float)

# use the Sequential API to build a model with our custom layer
my_layer = SimpleDense(units=1)
model = tf.keras.Sequential([my_layer])

# configure and train the model
model.compile(optimizer='sgd', loss='mean_squared_error')
model.fit(xs, ys, epochs=500,verbose=0)

# perform inference
print(model.predict([10.0]))

# see the updated state of the variables
print(my_layer.variables)

usage 2

1
2
3
4
5
6
7
8
9
def my_relu(x):
return K.maximum(-0.1, x)

model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
SimpleDense(128),
tf.keras.layers.Lambda(my_relu),
tf.keras.layers.Dense(10, activation='softmax')
])

Activating Custom Layer

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class SimpleDense(Layer):

# add an activation parameter
def __init__(self, units=32, activation=None):
super(SimpleDense, self).__init__()
self.units = units

# define the activation to get from the built-in activation layers in Keras
self.activation = tf.keras.activations.get(activation)

def build(self, input_shape):
w_init = tf.random_normal_initializer()
self.w = tf.Variable(name="kernel",
initial_value=w_init(shape=(input_shape[-1], self.units),
dtype='float32'),
trainable=True)
b_init = tf.zeros_initializer()
self.b = tf.Variable(name="bias",
initial_value=b_init(shape=(self.units,), dtype='float32'),
trainable=True)
super().build(input_shape)

def call(self, inputs):

# pass the computation to the activation layer
return self.activation(tf.matmul(inputs, self.w) + self.b)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)),
SimpleDense(128, activation='relu'),
tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)

Custom Model

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# define inputs
input_a = Input(shape=[1], name="Wide_Input")
input_b = Input(shape=[1], name="Deep_Input")

# define deep path
hidden_1 = Dense(30, activation="relu")(input_b)
hidden_2 = Dense(30, activation="relu")(hidden_1)

# define merged path
concat = concatenate([input_a, hidden_2])
output = Dense(1, name="Output")(concat)

# define another output for the deep path
aux_output = Dense(1,name="aux_Output")(hidden_2)

# build the model
model = Model(inputs=[input_a, input_b], outputs=[output, aux_output])

# visualize the architecture
plot_model(model)

Implement As Class

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# inherit from the Model base class
class WideAndDeepModel(Model):
def __init__(self, units=30, activation='relu', **kwargs):
'''initializes the instance attributes'''
super().__init__(**kwargs)
self.hidden1 = Dense(units, activation=activation)
self.hidden2 = Dense(units, activation=activation)
self.main_output = Dense(1)
self.aux_output = Dense(1)

def call(self, inputs):
'''defines the network architecture'''
input_A, input_B = inputs
hidden1 = self.hidden1(input_B)
hidden2 = self.hidden2(hidden1)
concat = concatenate([input_A, hidden2])
main_output = self.main_output(concat)
aux_output = self.aux_output(hidden2)

return main_output, aux_output

# create an instance of the model
model = WideAndDeepModel()

Model Class

  • Built-in training, evaluation, and prediction loops
    • model.fit() , model.evaluate() , model.predict()
  • Saving and serialization APIs
    • model.save() , model.save_weights()
  • Built-in training, evaluation, and prediction loops
    • model.summary() , tf.keras.utils.plot_model()

Benefits of subclassing models

  • Extends how you’ve been building models
  • Continue to use functional and sequential code
  • Modular architecture
  • Try out experiments quickly
  • Control flow in the network

Residual Networks

Implementing Mini Resnet

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
class IdentityBlock(tf.keras.Model):
def __init__(self, filters, kernel_size):
super(IdentityBlock, self).__init__(name='')

self.conv1 = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
self.bn1 = tf.keras.layers.BatchNormalization()

self.conv2 = tf.keras.layers.Conv2D(filters, kernel_size, padding='same')
self.bn2 = tf.keras.layers.BatchNormalization()

self.act = tf.keras.layers.Activation('relu')
self.add = tf.keras.layers.Add()

def call(self, input_tensor):
x = self.conv1(input_tensor)
x = self.bn1(x)
x = self.act(x)

x = self.conv2(x)
x = self.bn2(x)

x = self.add([x, input_tensor])
x = self.act(x)
return x

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class ResNet(tf.keras.Model):
def __init__(self, num_classes): # generic resnet , so num_classes is a parameter
super(ResNet, self).__init__()
self.conv = tf.keras.layers.Conv2D(64, 7, padding='same')
self.bn = tf.keras.layers.BatchNormalization()
self.act = tf.keras.layers.Activation('relu')
self.max_pool = tf.keras.layers.MaxPool2D((3, 3))

# Use the Identity blocks that you just defined
self.id1a = IdentityBlock(64, 3)
self.id1b = IdentityBlock(64, 3)

self.global_pool = tf.keras.layers.GlobalAveragePooling2D()
self.classifier = tf.keras.layers.Dense(num_classes, activation='softmax')

def call(self, inputs):
x = self.conv(inputs)
x = self.bn(x)
x = self.act(x)
x = self.max_pool(x)

# insert the identity blocks in the middle of the network
x = self.id1a(x)
x = self.id1b(x)

x = self.global_pool(x)
return self.classifier(x)