vertopal.com_3D_Convolutional_Autoencoder
vertopal.com_3D_Convolutional_Autoencoder
gz_file_path ='/content/drive/MyDrive/UCSD_Anomaly_Dataset.tar.gz'
import tarfile
import cv2
import numpy as np
import os
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D,
UpSampling2D, Input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
if os.path.isdir(video_path):
frames = []
if filename.lower().endswith('.tif'):
# Open the image, resize, and convert to grayscale
if needed
with Image.open(frame_path) as img:
img = img.resize((frame_width, frame_height))
# Resize to (128, 128)
frame = np.array(img) # Convert to numpy
array
frame = np.expand_dims(frame, axis=-1) # Add
channel dimension for grayscale
frame = frame / 255.0 # Normalize to [0, 1]
frames.append(frame)
if frames:
all_videos.append(np.array(frames))
return all_videos
# Example usage
train_directory_ped1 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped1/
Train'
train_directory_ped2 =
'/content/UCSD_Anomaly_Dataset/UCSD_Anomaly_Dataset.v1p2/UCSDped2/
Train'
train_videos_ped1 = load_and_preprocess_frames(train_directory_ped1)
train_videos_ped2 = load_and_preprocess_frames(train_directory_ped2)
train_videos = train_videos_ped1 + train_videos_ped2
display_sample_frames(train_videos)
import numpy as np
Parameters:
- video_frames: np.array, the frames of a single video (e.g.,
shape (num_frames, height, width, channels))
- sequence_length: int, the number of frames per sequence
Returns:
- sequences: np.array, shape (num_sequences, sequence_length,
height, width, channels)
"""
num_frames = len(video_frames)
sequences = []
return np.array(sequences)
# Encoder
x = Conv3D(32, (3, 3, 3), padding='same')(input_layer)
x = Activation('relu')(x)
x = MaxPooling3D((2, 2, 2), padding='same')(x)
# Decoder
x = Conv3D(32, (3, 3, 3), padding='same')(x)
x = Activation('relu')(x)
x = UpSampling3D((2, 2, 2))(x)
# Autoencoder Model
autoencoder = Model(input_layer, decoded)
autoencoder.compile(optimizer=Adam(learning_rate=0.001),
loss='mse')
return autoencoder
# Example usage
input_shape = (16, 160, 160, 1) # 16 consecutive frames, 160x160
resolution, 1 channel (grayscale)
cnn_3d_autoencoder = build_3d_cnn_autoencoder(input_shape=input_shape)
# Encoder
# x = Conv3D(32, (3, 3, 3), activation='relu', padding='same')
(input_layer)
# x = MaxPooling3D((2, 2, 2), padding='same')(x)
# x = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
# x = MaxPooling3D((2, 2, 2), padding='same')(x)
# encoded = Conv3D(277, (3, 3, 3), activation='relu',
padding='same')(x)
# # Decoder
# x = UpSampling3D((2, 2, 2))(encoded)
# x = Conv3D(64, (3, 3, 3), activation='relu', padding='same')(x)
# x = UpSampling3D((2, 2, 2))(x)
# decoded = Conv3D(1, (3, 3, 3), activation='sigmoid',
padding='same')(x) # Single channel for grayscale output
cnn_3d_autoencoder.compile(optimizer='adam', loss='mse')
cnn_3d_autoencoder.fit(
train_data, train_data,
epochs=10,
batch_size=2,
shuffle=True,
validation_data=(val_data, val_data),
callbacks=[early_stopping]
)
Epoch 1/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 27s 84ms/step - loss: 0.0158 - val_loss:
0.0013
Epoch 2/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 29s 58ms/step - loss: 0.0023 - val_loss:
9.8407e-04
Epoch 3/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 13s 58ms/step - loss: 0.0018 - val_loss:
8.5347e-04
Epoch 4/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 21s 59ms/step - loss: 0.0017 - val_loss:
7.8221e-04
Epoch 5/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 14s 61ms/step - loss: 0.0015 - val_loss:
7.4683e-04
Epoch 6/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 20s 60ms/step - loss: 0.0014 - val_loss:
7.0392e-04
Epoch 7/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 21s 61ms/step - loss: 0.0014 - val_loss:
7.2750e-04
Epoch 8/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 20s 61ms/step - loss: 0.0013 - val_loss:
6.5005e-04
Epoch 9/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 21s 61ms/step - loss: 0.0012 - val_loss:
6.2748e-04
Epoch 10/10
225/225 ━━━━━━━━━━━━━━━━━━━━ 14s 62ms/step - loss: 0.0012 - val_loss:
6.3976e-04
<keras.src.callbacks.history.History at 0x79e7ed803910>
if os.path.isdir(video_path):
frames = []
if filename.lower().endswith('.tif'):
try:
# Attempt to open, resize, and normalize the
image
with Image.open(frame_path) as img:
img = img.resize((frame_width,
frame_height)) # Resize to (128, 128)
frame = np.array(img) # Convert to numpy
array
frame = np.expand_dims(frame, axis=-1) #
Add channel dimension for grayscale
frame = frame / 255.0 # Normalize to [0,
1]
frames.append(frame)
except Exception as e:
# Print an error message for any file that
fails to load
print(f"Error loading file {frame_path}: {e}")
if frames:
all_videos.append(np.array(frames))
return all_videos
test_videos_ped1 = load_and_preprocess_frames(test_directory_ped1)
test_videos_ped2 = load_and_preprocess_frames(test_directory_ped2)
test_3d_data = np.concatenate([split_into_sequences(video,
sequence_length=16) for video in test_videos], axis=0)
print("Prepared test data shape:", test_3d_data.shape)
reconstructed_test_data = cnn_3d_autoencoder.predict(test_3d_data)
import numpy as np
threshold = np.percentile(reconstruction_errors, 30
)
print("Anomaly threshold:", threshold)
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
Precision: 0.6432
Recall: 0.7471
F1 Score: 0.6912
Recall (0.7471): This moderately high recall indicates that the model successfully detects most
of the actual anomalies, although it may miss a few. A recall of 0.7471 means that the model is
generally effective in identifying anomalous sequences but may occasionally let some anomalies
go undetected.
Precision (0.6432): With a precision of 0.6432, the model is reasonably selective in identifying
anomalies. However, it does flag some normal sequences as anomalies, which suggests there
are still false positives. This precision level indicates a good balance where the model avoids
being overly sensitive, but it could still be improved if false alarms are a concern.
F1 Score (0.6912): The F1 score of 0.6912 reflects a solid balance between recall and precision.
This score indicates that the model is fairly good at both capturing actual anomalies and
avoiding false positives, making it a well-rounded choice for general anomaly detection.