Tensorflow Basics : Part 2

1
import tensorflow as tf

Gradient : tf.GradientTape

1
2
3
tf.GradientTape(
persistent=False, watch_accessed_variables=True
)
1
2
3
4
x = tf.constant(2.0)
with tf.GradientTape() as tape:
tape.watch(x)
y = x**2 + 2*x + 5
1
2
gradient = tape.gradient(y,x).numpy()
print(gradient)
6.0

if tensor is of variable type , no need to add watch

1
2
3
4
5
x = tf.Variable(2.0)
with tf.GradientTape() as tape:
y = x**2 + 2*x + 5
gradient = tape.gradient(y, x).numpy()
print(gradient)
6.0

gradient tape can be used for multiple variable simultaneously

1
2
3
4
5
6
7
8
9
10
11
12
# (x, y) = (2.0, 5.0)
# for x = 2, 2x + 2 = 2*2 + 2 = 6
# for y = 5, 2y + 2 = 5*2 + 2 = 12

x = tf.Variable(2.0)
y = tf.Variable(5.0)
with tf.GradientTape() as tape:
eq_x = x**2 + 2*x + 5
eq_y = y**2 + 2*y + 5
greds = tape.gradient([eq_x, eq_y], [x, y])
for gred in greds:
print('Gradient is:', gred.numpy())
Gradient is: 6.0
Gradient is: 12.0

higher order derivatives can also be computed with nested loops

1
2
3
4
5
6
7
8
9
x = tf.Variable(3.0)
with tf.GradientTape() as g:
with tf.GradientTape() as gg:
y = x * x * x
dy_dx = gg.gradient(y, x)
d2y_dx2 = g.gradient(dy_dx, x)

print(dy_dx.numpy())
print(d2y_dx2.numpy())
27.0
18.0

we can set set the default watch condition to false , by that we can compute
gradient for only the variables we want

Usage : in transfer learning

1
2
3
4
5
6
## code same as before , but nothing is computed this time
x = tf.Variable(2.0)
with tf.GradientTape(watch_accessed_variables=False) as tape:
y = x**2 + 2*x + 5
gradient = tape.gradient(y, x)
print(gradient)
None

gradient tapes are by default not presistent

the resources held by a GradientTape are released as soon as GradientTape.gradient() method is called

1
2
3
4
5
6
7
8
9
a = tf.Variable(6.0, trainable=True)
b = tf.Variable(2.0, trainable=True)
with tf.GradientTape() as tape:
y1 = a * a * a
y2 = b ** 3 # will give error

print(tape.gradient(y1, a).numpy())
print(tape.gradient(y2, b).numpy())

108.0



---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

<ipython-input-19-dfc355cfc885> in <module>
      6 
      7 print(tape.gradient(y1, a).numpy())
----> 8 print(tape.gradient(y2, b).numpy())
      9 
     10 


D:\Anaconda3\envs\tf_env\lib\site-packages\tensorflow_core\python\eager\backprop.py in gradient(self, target, sources, output_gradients, unconnected_gradients)
    978     """
    979     if self._tape is None:
--> 980       raise RuntimeError("GradientTape.gradient can only be called once on "
    981                          "non-persistent tapes.")
    982     if self._recording:


RuntimeError: GradientTape.gradient can only be called once on non-persistent tapes.
1
2
3
4
5
6
7
8
a = tf.Variable(6.0)
b = tf.Variable(2.0)
with tf.GradientTape(persistent=True) as tape:
y1 = a * a * a
y2 = b ** 3

print(tape.gradient(y1, a).numpy())
print(tape.gradient(y2, b).numpy())
108.0
12.0

tape.stop_recording()

temporarily pauses the tapes recording, leading to greater computation speed

in long functions, it is more readable to use stop_recording blocks multiple times to calculate gradients in the middle of a function, than to calculate all the gradients at the end of a function

1
2
3
4
5
6
x = tf.Variable(3.0, trainable=True)
with tf.GradientTape() as tape:
y = x**3
with tape.stop_recording():
print(tape.gradient(y, x).numpy()) # -> 27.0

27.0
1
2
3
4
5
6
7
8
9
10
11
a = tf.Variable(6.0, trainable=True)
b = tf.Variable(2.0, trainable=True)
with tf.GradientTape(persistent=True) as tape:
y1 = a ** 2
with tape.stop_recording():
print(tape.gradient(y1, a).numpy())

y2 = b ** 3
with tape.stop_recording():
print(tape.gradient(y2, b).numpy())

12.0
12.0

Linear Regression using GradientTape

  • prepare train data
  • define train variables
  • define step/update function
    • define loss function
  • train
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import random
import numpy as np

# Training data
x_train = np.asarray([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
y_train = np.asarray([i*10+5 for i in x_train]) # y = 10x+5

# Loss function
def loss(real_y, pred_y):
return tf.abs(real_y - pred_y)

losses = []

# Trainable variables
a = tf.Variable(random.random(), trainable=True)
b = tf.Variable(random.random(), trainable=True)

# Step function
def step(real_x, real_y,e):
with tf.GradientTape(persistent=True) as tape:
# Make prediction
pred_y = a * real_x + b
# Calculate loss
reg_loss = loss(real_y, pred_y)
losses.append(tf.reduce_sum(reg_loss))

# Calculate gradients (for both of the variables)
a_gradients, b_gradients = tape.gradient(reg_loss, [a, b])

# Update variables
learning_rate = 0.001
a.assign_sub(a_gradients * learning_rate)
b.assign_sub(b_gradients * learning_rate)
# print(f'epochs={e} y ≈ {a.numpy():.3f}x + {b.numpy():.3f}')

# Training loop
EPOCHS = 10000
for e in range(EPOCHS):
step(x_train, y_train,e)

print(f'y ≈ {a.numpy()}x + {b.numpy()}')

from matplotlib import pyplot as plt

ep = list(range(EPOCHS))
plt.plot(ep, losses)
plt.xlabel('epochs')
plt.ylabel('losss')
plt.show()
y ≈ 10.053751945495605x + 5.000401973724365

png

Neural Networks with GradientTape

  • prepare train data
  • define hyperparameters
  • define neural network model
  • define step/update function
    • define loss function
  • train
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# 2.2 - Classifying MNIST
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.datasets import mnist
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import random
import math
%matplotlib inline

# Load and pre-process training data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = (x_train / 255).reshape((-1, 28, 28, 1))
y_train = tf.keras.utils.to_categorical(y_train, 10)
x_test = (x_test / 255).reshape((-1, 28, 28, 1))
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Hyperparameters
batch_size = 128
epochs = 25
optimizer = Adam(lr=0.001)
weight_init = RandomNormal()

# Build model
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer=weight_init, input_shape=(28, 28, 1)))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer=weight_init))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer=weight_init))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax', kernel_initializer=weight_init))

# Step function
def step(real_x, real_y):
with tf.GradientTape() as tape:
# Make prediction
pred_y = model(real_x.reshape((-1, 28, 28, 1)))
# Calculate loss
model_loss = tf.keras.losses.categorical_crossentropy(real_y, pred_y)

# Calculate gradients
model_gradients = tape.gradient(model_loss, model.trainable_variables)
# Update model
optimizer.apply_gradients(zip(model_gradients, model.trainable_variables))


# Training loop
bat_per_epoch = math.floor(len(x_train) / batch_size)
for epoch in range(epochs):
print('=', end='')
for i in range(bat_per_epoch):
n = i*batch_size
step(x_train[n:n+batch_size], y_train[n:n+batch_size])


# Calculate accuracy
model.compile(optimizer=optimizer, loss=tf.losses.categorical_crossentropy, metrics=['acc']) # Compile just for evaluation
print('\nAccuracy:', model.evaluate(x_test, y_test, verbose=0)[1])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
11493376/11490434 [==============================] - 14s 1us/step
=WARNING:tensorflow:Layer conv2d is casting an input tensor from dtype float64 to the layer's dtype of float32, which is new behavior in TensorFlow 2.  The layer has dtype float32 because it's dtype defaults to floatx.

If you intended to run this layer in float32, you can safely ignore this warning. If in doubt, this warning is likely only an issue if you are porting a TensorFlow 1.X model to TensorFlow 2.

To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

========================
Accuracy: 0.9911