Image Object Detection: Multi-Class Convolutional Neural Network
Notebook Goals
- use a pre-built set of images from the web
- build & experiment with machine-learning models
- build a multi(10)-classification CNN
- Analyze Model performance
- fit a model several times and review the impact: increased accuracy?!
Building A CNN
- familiarize with the data
- pre-process the data
- decide on model parameters
- create & fit the model
- evaluate model performance
- repeat previous 3 steps (parameters, create, fit, evaluate) to best performance
Imports
In [1]:
import tensorflow as tf
import zipfile
import os
import pathlib
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, DenseBeware overfitting
Overfitting can happen when...
- too "large" a number of convolutional layers is present
- too "large" a number of convolutional filters is present
- the "shape" of the accuracy-over-epochs curve has changed from going up to flat &/or going down
- the "shape" of the validation-loss-over-epochs curve has changed from going down to either "flattening out" or even going up
- a "large" gap between training & validation curves of the same metric
Preventing Overfitting: Regularization
- use more data
- simplify the model
- use data augmentation
- use transfer learning
Get Data
In [2]:
# FOOD images!
# based on a HUGE "food101" dataset from kaggle
# https://www.kaggle.com/datasets/dansbecker/food-101
# https://github.com/mrdbourke/tensorflow-deep-learning/blob/main/extras/image_data_modification.ipynb
!wget https://storage.googleapis.com/ztm_tf_course/food_vision/10_food_classes_all_data.zip
fileName = "10_food_classes_all_data"
# Unzip the downloaded file
zip_ref = zipfile.ZipFile(fileName + '.zip', "r")
zip_ref.extractall()
zip_ref.close()Inspect
In [3]:
# Walk through 10_food_classes directory and list number of files
for dirpath, dirnames, filenames in os.walk(fileName):
print(f'walk: {dirpath.split("/")[-1].upper()}')
if(len(dirnames) > 0):
print(f' dirs: {len(dirnames)}')
if(len(filenames) > 0):
print(f' images: {len(filenames)}')
# print(f"There are {len(dirnames)} directories and {len(filenames)} images in '{dirpath}'.")Create Variables
In [4]:
#
# training & testing directory paths
#
training_dir_path = fileName + '/train/'
testing_dir_path = fileName + '/test/'
trainingPath = pathlib.Path(training_dir_path)
#
# classification names in arr
#
class_names = np.array(sorted([item.name for item in trainingPath.glob('*')]))
print(class_names)Preview Image(s)
In [5]:
def view_random_image(target_dir, target_class):
# Setup target directory (we'll view images from here)
target_folder = target_dir+target_class
# Get a random image path
random_image = random.sample(os.listdir(target_folder), 1)
# Read in the image and plot it using matplotlib
img = mpimg.imread(target_folder + "/" + random_image[0])
plt.imshow(img)
plt.title(target_class)
plt.axis("off");
print(f"Image shape: {img.shape}") # show the shape of the image
return imgIn [6]:
img = view_random_image(target_dir=training_dir_path,
target_class=random.choice(class_names)) # get a random class nameData Cleanup
- convert training & testing images to tensors
- SHUFFLE the images in each directory (training & testing) to remove any patterns learned based on order
In [7]:
#
# create data-generators
# which include re-scaling
#
train_datagen = ImageDataGenerator(rescale=1/255.)
test_datagen = ImageDataGenerator(rescale=1/255.)
# batchSize = 32
# i get Allocation of 63083520 exceeds 10% of free system memory.
batchSize = 16
#
# Load images into variables:
# rescale the output sizes to 244x244
# store in "batches" of 32 to minimize images-stored-in-memory during training
# shuffle the data
#
#
training_data = train_datagen.flow_from_directory(training_dir_path,
target_size=(224, 224),
batch_size=batchSize,
class_mode='categorical',
shuffle=True)
testing_data = train_datagen.flow_from_directory(testing_dir_path,
target_size=(224, 224),
batch_size=batchSize,
class_mode='categorical',
shuffle=True)Model: Baseline
In [8]:
#
# data-driven output unit-count
#
outputUnitCount = len(class_names)In [9]:
#
# Create
#
# based on the TinyVGG model
# https://github.com/poloclub/cnn-explainer/blob/master/tiny-vgg/tiny-vgg.py#L179
baseline = Sequential([
Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Conv2D(10, 3, activation='relu'),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Flatten(),
Dense(outputUnitCount, activation='softmax') # changed to have 10 neurons (same as number of classes) and 'softmax' activation
])In [10]:
#
# Compile
#
baseline.compile(loss="categorical_crossentropy",
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])In [11]:
#
# FIT
#
baselineHistory = baseline.fit(training_data,
epochs=5,
steps_per_epoch=len(training_data),
validation_data=testing_data,
validation_steps=len(testing_data))Inspect Results
Summary
In [12]:
baseline.summary() Visualize Loss & Accuracy
In [13]:
pd.DataFrame(baselineHistory.history).plot(figsize=(10, 7));In [90]:
# Plot the validation and training data separately
def plot_loss_curves(history):
chartW = 12
chartH = 3
# history.history
loss = history['history']['loss']
# history.history
val_loss = history['history']['val_loss']
# history.history
accuracy = history['history']['accuracy']
# history.history
val_accuracy = history['history']['val_accuracy']
# history.history
epochCount = range(len(history['history']['loss']))
plt.figure(figsize=(chartW, chartH))
plt.subplot(1, 2, 1)
plt.ylim(bottom=0) #ymin is your value
plt.ylim(top=3) #ymin is your value
# Plot loss
plt.plot(epochCount, loss, label='training_loss')
plt.plot(epochCount, val_loss, label='val_loss')
plt.title('Loss')
plt.xlabel('Epochs')
plt.legend()
# Plot accuracy
# plt.figure(figsize=(chartW, chartH))
plt.subplot(1, 2, 2)
plt.ylim(bottom=0) #ymin is your value
plt.ylim(top=3) #ymin is your value
plt.plot(epochCount, accuracy, label='training_accuracy')
plt.plot(epochCount, val_accuracy, label='val_accuracy')
plt.title('Accuracy')
plt.xlabel('Epochs')
plt.legend();In [66]:
plot_loss_curves(baselineHistory)Analysis Notes
- the
val_accuracy, or "validation accuracy" looks to be29: not very good - the
val_loss, or "validation loss", goes up from 2 to 4: this is usually a sign of over-fitting: not good
Adjust Model: Change Layers
Try Conv2d then MaxPool2D alternating?! see if that helps?!
Model II: Change Layers
Less layers
In [16]:
m2 = Sequential([
Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
MaxPool2D(),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Flatten(),
Dense(outputUnitCount, activation='softmax') # changed to have 10 neurons (same as number of classes) and 'softmax' activation
])In [17]:
#
# Compile
#
m2.compile(loss="categorical_crossentropy",
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])In [18]:
#
# FIT
#
m2History = m2.fit(training_data,
epochs=5,
steps_per_epoch=len(training_data),
validation_data=testing_data,
validation_steps=len(testing_data))Inspect Results
Summary
In [19]:
m2.summary()Visualize Loss & Accuracy
In [20]:
pd.DataFrame(m2History.history).plot(figsize=(8, 5));In [93]:
plot_loss_curves(m2History)Adjust Model: Data Augmentation
Model III: Data Augmentation
Augment Data
In [43]:
# Create ImageDataGenerator training instance with data augmentation
train_datagen_augmented = ImageDataGenerator(rescale=1/255.,
rotation_range=20, # note: this is an int not a float
width_shift_range=0.2,
height_shift_range=0.2,
zoom_range=0.2,
horizontal_flip=True)
# Create ImageDataGenerator test instance without data augmentation
test_datagen = ImageDataGenerator(rescale=1/255.)
#
# will re-use "train_datagen" from above
#
# Create ImageDataGenerator test instance without data augmentation
test_datagen = ImageDataGenerator(rescale=1/255.)
# Import data and augment the data from training directory
print("Augmented training images:")
train_data_augmented = train_datagen_augmented.flow_from_directory(training_dir_path,
target_size=(224, 224),
batch_size=32,
class_mode='categorical',
shuffle=True,
seed=42)
# Create non-augmented data batches
print("Non-augmented training images:")
train_data = train_datagen.flow_from_directory(training_dir_path,
target_size=(224, 224),
batch_size=batchSize,
class_mode='categorical',
shuffle=True,
seed=42)
print("Unchanged test images:")
test_data = test_datagen.flow_from_directory(testing_dir_path,
target_size=(224, 224),
batch_size=batchSize,
class_mode='categorical',
shuffle=True,
seed=42)Preview some Augmented Images
In [44]:
# get data to preview
images, labels = train_data.next()
augmented_images, augmented_labels = augmented_train_data.next() # Note: labels aren't augmented, they stay the sameIn [48]:
random_number = random.randint(0, batchSize - 1) # we're making batches of size 32, so we'll get a random instance
#
# Show original image and augmented image
#
# plt.imshow(images[random_number])
# plt.title(f"Original")
# plt.axis(False)
# plt.figure()
plt.imshow(augmented_images[random_number])
plt.title(f"Augmented")
plt.axis(False);Build Model
This is the same as the "baseline", but with augmented data instead of the starting data
In [49]:
#
# Create
#
# based on the TinyVGG model
# https://github.com/poloclub/cnn-explainer/blob/master/tiny-vgg/tiny-vgg.py#L179
augmentedModel = Sequential([
Conv2D(10, 3, activation='relu', input_shape=(224, 224, 3)),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Conv2D(10, 3, activation='relu'),
Conv2D(10, 3, activation='relu'),
MaxPool2D(),
Flatten(),
Dense(outputUnitCount, activation='softmax')
])In [50]:
#
# Compile
#
augmentedModel.compile(loss="categorical_crossentropy",
optimizer=tf.keras.optimizers.Adam(),
metrics=["accuracy"])In [51]:
#
# FIT
#
augmentedModelHistory = augmentedModel.fit(train_data_augmented,
epochs=5,
steps_per_epoch=len(train_data_augmented),
validation_data=testing_data,
validation_steps=len(testing_data))Inspect Results
Summary
In [52]:
augmentedModel.summary()Visualize loss & Accuracy
In [53]:
pd.DataFrame(augmentedModelHistory.history).plot(figsize=(8, 5));Trying adding more epochs to the third augmented model?!
Updating Fit on Model III: More Epochs, augmentedData
In [69]:
#
# FIT
#
# Interesting detail here, calling fit() on the same augmentedModel for the 4th model history here...
augmentedLongerModelHistory = augmentedModel.fit(train_data_augmented,
epochs=7,
steps_per_epoch=len(train_data_augmented),
validation_data=testing_data,
validation_steps=len(testing_data))Inspect Results
In [71]:
pd.DataFrame(augmentedLongerModelHistory.history).plot(figsize=(8, 5));In [75]:
plot_loss_curves(augmentedLongerModelHistory)Comparing 4 models
validation accuracy
- I baseline:
28% - II changing layers:
34% - III augmented data:
37% - IV: augmented-longer:
46%
Curves
- I: BAD - loss curve look terrible, and the validation-accuracy goes DOWN
- II: BAD - loss curve looks bad (not as bad as I) and validation-acc stays stagnant
- III: not so bad!
Re-Fitting same model
fit() was called on the same augmentedModel for the 3rd and 4th model fitting.
I believe this made the "overall" model training accuracy higher, rather than starting from "scratch" with the 4th model
Refitting model AGAIN
In [73]:
#
# FIT
#
# Interesting detail here, calling fit() on the same augmentedModel for the 4th model history here...
thirdCallHistory = augmentedModel.fit(train_data_augmented,
epochs=7,
steps_per_epoch=len(train_data_augmented),
validation_data=testing_data,
validation_steps=len(testing_data))In [77]:
pd.DataFrame(thirdCallHistory.history).plot(figsize=(8, 4));In [78]:
plot_loss_curves(thirdCallHistory)In [92]:
# loss = history.history['loss']
# val_loss = history.history['val_loss']
# accuracy = history.history['accuracy']
# val_accuracy = history.history['val_accuracy']
# epochCount = range(len(history.history['loss']))
# augmentedModelHistory
# # augmentedLongerModelHistory
# thirdCallHistory
mergedValLoss = augmentedModelHistory.history['val_loss'] + augmentedLongerModelHistory.history['val_loss'] + thirdCallHistory.history['val_loss']
mergedAcc = augmentedModelHistory.history['accuracy'] + augmentedLongerModelHistory.history['accuracy'] + thirdCallHistory.history['accuracy']
mergedValAcc = augmentedModelHistory.history['val_accuracy'] + augmentedLongerModelHistory.history['val_accuracy'] + thirdCallHistory.history['val_accuracy']
mergedLoss = augmentedModelHistory.history['loss'] + augmentedLongerModelHistory.history['loss'] + thirdCallHistory.history['loss']
mergedHistory = {
'history': {
'loss': mergedLoss,
'val_loss': mergedValLoss,
'accuracy': mergedAcc,
'val_accuracy': mergedValAcc
}
}
# mergedHistory.history
plot_loss_curves(mergedHistory)Predict New Images With The model
In [94]:
# download imags
# -q is for "quiet"
!wget -q https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-pizza-dad.jpeg
!wget -q https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-steak.jpeg
!wget -q https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-hamburger.jpeg
!wget -q https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/images/03-sushi.jpegIn [105]:
def load_and_prep_image(filename, img_shape=224):
"""
Reads an image from filename, turns it into a tensor
and reshapes it to (img_shape, img_shape, colour_channel).
"""
# Read in target file (an image)
img = tf.io.read_file(filename)
# Decode the read file into a tensor & ensure 3 colour channels
# (our model is trained on images with 3 colour channels and sometimes images have 4 colour channels)
img = tf.image.decode_image(img, channels=3)
# Resize the image (to the same size our model was trained on)
img = tf.image.resize(img, size = [img_shape, img_shape])
# Rescale the image (get all values between 0 and 1)
img = img/255.
return imgIn [106]:
def pred_and_plot(model, filename, class_names):
"""
Imports an image located at filename, makes a prediction on it with
a trained model and plots the image with the predicted class as the title.
"""
# Import the target image and preprocess it
img = load_and_prep_image(filename)
# Make a prediction
pred = model.predict(tf.expand_dims(img, axis=0))
# Get the predicted class
if len(pred[0]) > 1: # check for multi-class
pred_class = class_names[pred.argmax()] # if more than one output, take the max
else:
pred_class = class_names[int(tf.round(pred)[0][0])] # if only one output, round
# Plot the image and predicted class
plt.imshow(img)
plt.title(f"Prediction: {pred_class}")
plt.axis(False);In [107]:
pred_and_plot(augmentedModel, "03-sushi.jpeg", class_names)In [108]:
pred_and_plot(augmentedModel, "03-pizza-dad.jpeg", class_names)In [109]:
pred_and_plot(augmentedModel, "03-hamburger.jpeg", class_names)In [ ]:
## Saving The Model
# augmentedModel.save("low_prediction_food_recognition")
# loading the model
# loaded_model_11 = tf.keras.models.load_model("saved_trained_model")
# loaded_model_11.evaluate(test_data)