Table Of Contents
- Multiclass Classification
- Import Data: Fashion Items
- Prep A Label-List
- Visualize An Image And It's label
- Build A Model
- First Go
- View Model Stats
- Normalize: Scale The Training Data between 0-1
- Model 2: Use Normalized Data
- Find The Ideal Learning Rate
- Use A Learning-Rate Callback function
- Visualize The Learning-Rate Change
- Visualize: Confusion Matrix
- Model Inspection: Takeaways
- Inspect Layers & Weights
- View Stats about each layer
Multiclass Classification
Classifying more-than-two labels/classes. Could be 3, could be 100.
Steps:
- get data ready (create tensors from dataset)
- build a model: diy or pre-trained
- fit the model to the data, make predictions
- evaluate the model: how'd it do?
- Experimentation: do better
- save & reload the model
In [92]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from sklearn.datasets import make_circles
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import itertools
import random
from tensorflow.keras.utils import plot_modelImport Data: Fashion Items
60K items.
28x28 images.
fashion_mnist dataset
In [2]:
(train_data, train_labels), (test_data, test_labels) = fashion_mnist.load_data()In [3]:
print(f'Training Data:\n{train_data[0]}')
print(f'Training Label:\n{train_labels[0]}')In [4]:
print(f'data shape: {train_data[0].shape}')In [5]:
plt.imshow(train_data[0])Out [5]:
In [6]:
plt.imshow(train_data[250])Out [6]:
Prep A Label-List
The labels can be found in the readme
In [7]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']In [8]:
len(class_names)Out [8]:
Visualize An Image And It's label
In [9]:
#
# A SINGLE image
#
# itm_idx = 122
# imgItm = train_data[itm_idx]
# labelItm = train_labels[itm_idx]
# label = class_names[labelItm]
# plt.imshow(imgItm, cmap=plt.cm.binary)
# plt.title(label)
# MULTIPLE
import random
plt.figure(figsize=(7, 7))
for i in range(4):
ax = plt.subplot(2, 2, i + 1)
rand_index = random.choice(range(len(train_data)))
plt.imshow(train_data[rand_index], cmap=plt.cm.binary)
plt.title(class_names[train_labels[rand_index]])
plt.axis(False)Build A Model
First Go
In [10]:
# Set random seed
tf.random.set_seed(42)
# Create the model
m1 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)), # input layer (we had to reshape 28x28 to 784, the Flatten layer does this for us)
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax") # output shape is 10, activation is softmax
])
# Compile the model
m1.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(), # different loss function for multiclass classifcation
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])
# Fit the model
non_norm_history = m1.fit(train_data,
train_labels,
epochs=10,
validation_data=(test_data, test_labels)) # see how the model performs on the test set during trainingView Model Stats
In [11]:
m1.summary()In [12]:
train_data.min(), train_data.max()Out [12]:
Normalize: Scale The Training Data between 0-1
In [13]:
# normalizedTrainData = train_data / train_data.max()
# normalizedTestData = test_data / train_data.max()
train_data = train_data / train_data.max()
test_data = test_data / train_data.max()In [14]:
train_data.min(), train_data.max()Out [14]:
Model 2: Use Normalized Data
In [15]:
tf.random.set_seed(42)
# Create the model
# m2 = tf.keras.Sequential()
# m2.add(inputLayer)
# m2.add(dense4Relu)
# m2.add(dense4Relu2)
# m2.add(outputLayer)
m2 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)), # input layer (we had to reshape 28x28 to 784, the Flatten layer does this for us)
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax") # output shape is 10, activation is softmax
])
# Compile the model
m2.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])
# Fit the model
m2History = m2.fit(train_data,
train_labels,
epochs=10,
validation_data=(test_data, test_labels)) # see how the model performs on the test set during trainingIn [16]:
# Plot non-normalized data loss curves
pd.DataFrame(non_norm_history.history).plot(title="Non-normalized Data")
# Plot normalized data loss curves
pd.DataFrame(m2History.history).plot(title="Normalized data");Find The Ideal Learning Rate
Use A Learning-Rate Callback function
In [17]:
# Set random seed
tf.random.set_seed(42)
# Create the model
m3 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)), # input layer (we had to reshape 28x28 to 784)
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax") # output shape is 10, activation is softmax
])
# Compile the model
m3.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])
# Create the learning rate callback
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3 * 10**(epoch/20))
# Fit the model
find_lr_history = m3.fit(train_data,
train_labels,
epochs=40,
validation_data=(test_data, test_labels),
callbacks=[lr_scheduler])Visualize The Learning-Rate Change
In [18]:
lrFunction = 1e-3 * (10**(np.arange(40)/20))
plt.semilogx(lrFunction, find_lr_history.history["loss"]) # want the x-axis to be log-scale
plt.xlabel("Learning rate")
plt.ylabel("Loss")
plt.title("Finding the ideal learning rate");Looks like the lowest learning-rate-to-loss ratio the default learning rate of the adam optimizer, .001.
In [19]:
## Model WIth Defined Learning Rate
# Set random seed
tf.random.set_seed(42)
# Create the model
m4 = tf.keras.Sequential([
tf.keras.layers.Flatten(input_shape=(28, 28)), # input layer (we had to reshape 28x28 to 784)
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(4, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax") # output shape is 10, activation is softmax
])
# Compile the model
m4.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(),
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), # ideal learning rate (same as default)
metrics=["accuracy"])
# Fit the model
history = m4.fit(train_data,
train_labels,
epochs=20,
validation_data=(test_data, test_labels))In [20]:
predictions = m4.predict(test_data)
firstPrediction = predictions[0]
print(f'first prediction: {firstPrediction}')
firstPrediction.argmax(), class_names[firstPrediction.argmax()]Out [20]:
Visualize: Confusion Matrix
In [21]:
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=15):
"""Makes a labelled confusion matrix comparing predictions and ground truth labels.
If classes is passed, confusion matrix will be labelled, if not, integer class values
will be used.
Args:
y_true: Array of truth labels (must be same shape as y_pred).
y_pred: Array of predicted labels (must be same shape as y_true).
classes: Array of class labels (e.g. string form). If `None`, integer labels are used.
figsize: Size of output figure (default=(10, 10)).
text_size: Size of output figure text (default=15).
Returns:
A labelled confusion matrix plot comparing y_true and y_pred.
"""
#
# Create the confusion matrix
#
cm = confusion_matrix(y_true, y_pred)
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
n_classes = cm.shape[0] # find the number of classes we're dealing with
#
# Plot the figure and make it pretty
#
fig, ax = plt.subplots(figsize=figsize)
cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
fig.colorbar(cax)
#
# Are there a list of classes?
#
if classes:
labels = classes
else:
labels = np.arange(cm.shape[0])
#
# Label the axes
#
ax.set(title="Confusion Matrix",
xlabel="Predicted label",
ylabel="True label",
xticks=np.arange(n_classes), # create enough axis slots for each class
yticks=np.arange(n_classes),
xticklabels=labels, # axes will labeled with class names (if they exist) or ints
yticklabels=labels)
#
# Make x-axis labels appear on bottom
#
ax.xaxis.set_label_position("bottom")
ax.xaxis.tick_bottom()
#
# Set the threshold for different colors
#
threshold = (cm.max() + cm.min()) / 2.
#
# Plot the text on each cell
#
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
horizontalalignment="center",
color="white" if cm[i, j] > threshold else "black",
size=text_size)In [22]:
predictions[:10]Out [22]:
In [28]:
predictionsForMatrix = predictions.argmax(axis=1)
confusion_matrix(test_labels,predictionsForMatrix)Out [28]:
In [29]:
# Make a prettier confusion matrix
make_confusion_matrix(y_true=test_labels,
y_pred=predictionsForMatrix,
classes=class_names,
figsize=(15, 15),
text_size=10)In [31]:
# Create a function for plotting a random image along with its prediction
def plot_random_image(model, images, true_labels, classes):
"""Picks a random image, plots it and labels it with a predicted and truth label.
Args:
model: a trained model (trained on data similar to what's in images).
images: a set of random images (in tensor form).
true_labels: array of ground truth labels for images.
classes: array of class names for images.
Returns:
A plot of a random image from `images` with a predicted class label from `model`
as well as the truth class label from `true_labels`.
"""
# Setup random integer
i = random.randint(0, len(images))
# Create predictions and targets
target_image = images[i]
pred_probs = model.predict(target_image.reshape(1, 28, 28)) # have to reshape to get into right size for model
pred_label = classes[pred_probs.argmax()]
true_label = classes[true_labels[i]]
# Plot the target image
plt.imshow(target_image, cmap=plt.cm.binary)
# Change the color of the titles depending on if the prediction is right or wrong
if pred_label == true_label:
color = "green"
else:
color = "red"
# Add xlabel information (prediction/true label)
plt.xlabel("Pred: {} {:2.0f}% (True: {})".format(pred_label,
100*tf.reduce_max(pred_probs),
true_label),
color=color) # set the color to green or redIn [37]:
# Check out a random image as well as its prediction
plot_random_image(model=m4,
images=test_data,
true_labels=test_labels,
classes=class_names)Model Inspection: Takeaways
Inspect Layers & Weights
- input data gets transformed into a tensor, & passed to neural network
- layer weights are RANDOM at the beginning (unless manually set)
- as epochs & examples go-by, layer weights get updated by tensorflow
The Flatten input layer will not show weights & biases.
Hidden layers have weights & biases.
In [38]:
m4.layersOut [38]:
In [39]:
# view a specific layer
m4.layers[0]Out [39]:
In [41]:
m4.layers[0].get_weights()Out [41]:
In [48]:
m4.layers[1].get_weights()Out [48]:
In [50]:
# a weight SHAPE is the same as the input image shape WxH
print(f'model layer 1 weight matrix shape: {len(m4.layers[1].get_weights()[0])}')
print(f'input image shape WxH: {28*28}')In [51]:
print(f'how many weights in layer 2: {len(m4.layers[1].get_weights())}')In [68]:
layerTwoWeights, layerTwoBiases = m4.layers[1].get_weights()
print(f'layerTwoWeights Shape: {layerTwoWeights.shape}')
layerTwoWeights[0]Out [68]:
In [55]:
layerTwoBiases[0]Out [55]:
In [57]:
layerTwoBiases.shapeOut [57]:
View Stats about each layer
In [91]:
for idx in range(0,len(m4.layers)):
print(f'-----L{idx}')
if(idx != 0 and idx != len(m4.layers) - 1):
lWeights, lBiases = m4.layers[idx].get_weights()
print('HIDDEN')
print(f' weights shape: {lWeights.shape}')
print(f' biases shape: {lBiases.shape}')
if(idx == 0):
print(f'Flat layer')
if(idx == len(m4.layers) - 1):
print(f' weights shape: {lWeights.shape}')
print(f' biases shape: {lBiases.shape}')
print('-------------')In [93]:
plot_model(m4)In [94]:
# INTERESTING HERE!
m4.get_config()Out [94]: