## tf.where # is_small_error : boolean to check # small_error_loss : value if True # big_error_loss : value if False
model = tf.keras.Sequential([keras.layers.Dense(units=1, input_shape=[1])]) model.compile(optimizer='sgd', loss=my_huber_loss) model.fit(xs, ys, epochs=500,verbose=0) print(model.predict([10.0]))
Custom Loss Function Hyper Parameter Tuning (using wrapper function)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# wrapper function that accepts the hyperparameter defmy_huber_loss_with_threshold(threshold): # function that accepts the ground truth and predictions defmy_huber_loss(y_true, y_pred): error = y_true - y_pred is_small_error = tf.abs(error) <= threshold small_error_loss = tf.square(error) / 2 big_error_loss = threshold * (tf.abs(error) - (0.5 * threshold)) return tf.where(is_small_error, small_error_loss, big_error_loss) # return the inner function tuned by the hyperparameter return my_huber_loss
State-of-the-art siamese networks tend to use some form of either contrastive loss or triplet loss when training — these loss functions are better suited for siamese networks and tend to improve accuracy. The goal of a siamese network isn’t to classify a set of image pairs but instead to differentiate between them. Essentially, contrastive loss is evaluating how good a job the siamese network is distinguishing between the image pairs. The difference is subtle but incredibly important.
$$ Y * D^2 + (1-Y) * \max(margin-D,0)^2 $$
Y : 1 if image similar , 0 otherwise
D : tensor of Euclidean distance between the pair of images
margin : constant by which we can define if they are similar
When Y = 1 , Loss = D^2 (high value)
and when Y = 0 , Loss = max(margin-D,0)^2 (small value)
defcontrastive_loss_with_margin(margin): defcontrastive_loss(y_true, y_pred): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf ''' square_pred = K.square(y_pred) margin_square = K.square(K.maximum(margin - y_pred, 0)) # doing the mean is not strictly necessary as keras automatically does that # basically , we always need a scaler instead of tensor return K.mean(y_true * square_pred + (1 - y_true) * margin_square) return contrastive_loss
# inherit from this base class from tensorflow.keras.layers import Layer
classSimpleDense(Layer):# inherit from keras Layer class
def__init__(self, units=32):# initialization '''Initializes the instance attributes''' super(SimpleDense, self).__init__() self.units = units
defbuild(self, input_shape):# will run when instance is created '''Create the state of the layer (weights)''' # initialize the weights w_init = tf.random_normal_initializer() self.w = tf.Variable(name="kernel", initial_value=w_init(shape=(input_shape[-1], self.units), dtype='float32'), trainable=True)
defcall(self, inputs):# performs computation and calls during training '''Defines the computation from inputs to outputs''' return tf.matmul(inputs, self.w) + self.b ## WX+B
1 2 3 4 5 6 7 8 9
# declare an instance of the class my_dense = SimpleDense(units=1)
# define an input and feed into the layer x = tf.ones((1, 1)) y = my_dense(x)
# parameters of the base Layer class like `variables` can be used print(my_dense.variables)
# add an activation parameter def__init__(self, units=32, activation=None): super(SimpleDense, self).__init__() self.units = units # define the activation to get from the built-in activation layers in Keras self.activation = tf.keras.activations.get(activation)