Computer Vision: 3D CNN in Keras

Sunday, 19 June 2016

3D CNN in Keras - Action Recognition

# The code for 3D CNN for Action Recognition
# Please refer to the youtube video for this lesson

3D CNN-Action Recognition Part-1

3D CNN-Action Recognition Part-2

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D

from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils, generic_utils

import theano
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation
from sklearn import preprocessing

# image specification
img_rows,img_cols,img_depth=16,16,15

# Training data

X_tr=[] # variable to store entire dataset

#Reading boxing action class

listing = os.listdir('kth dataset/boxing')

for vid in listing:
vid = 'kth dataset/boxing/'+vid
frames = []
cap = cv2.VideoCapture(vid)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()

input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

#Reading hand clapping action class

listing2 = os.listdir('kth dataset/handclapping')

for vid2 in listing2:
vid2 = 'kth dataset/handclapping/'+vid2
frames = []
cap = cv2.VideoCapture(vid2)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

#Reading hand waving action class

listing3 = os.listdir('kth dataset/handwaving')

for vid3 in listing3:
vid3 = 'kth dataset/handwaving/'+vid3
frames = []
cap = cv2.VideoCapture(vid3)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

#Reading jogging action class

listing4 = os.listdir('kth dataset/jogging')

for vid4 in listing4:
vid4 = 'kth dataset/jogging/'+vid4
frames = []
cap = cv2.VideoCapture(vid4)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

#Reading running action class

listing5 = os.listdir('kth dataset/running')

for vid5 in listing5:
vid5 = 'kth dataset/running/'+vid5
frames = []
cap = cv2.VideoCapture(vid5)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

#Reading walking action class

listing6 = os.listdir('kth dataset/walking')

for vid6 in listing6:
vid6 = 'kth dataset/walking/'+vid6
frames = []
cap = cv2.VideoCapture(vid6)
fps = cap.get(5)
print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
ret, frame = cap.read()
frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

X_tr_array = np.array(X_tr) # convert the frames read into array

num_samples = len(X_tr_array)
print num_samples

#Assign Label to each class

label=np.ones((num_samples,),dtype = int)
label[0:100]= 0
label[100:199] = 1
label[199:299] = 2
label[299:399] = 3
label[399:499]= 4
label[499:] = 5

train_data = [X_tr_array,label]

(X_train, y_train) = (train_data[0],train_data[1])
print('X_Train shape:', X_train.shape)

train_set = np.zeros((num_samples, 1, img_rows,img_cols,img_depth))

for h in xrange(num_samples):
train_set[h][0][:][:][:]=X_train[h,:,:,:]

patch_size = 15 # img_depth or number of frames used for each video

print(train_set.shape, 'train samples')

# CNN Training parameters

batch_size = 2
nb_classes = 6
nb_epoch =50

# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)

# number of convolutional filters to use at each layer
nb_filters = [32, 32]

# level of pooling to perform at each layer (POOL x POOL)
nb_pool = [3, 3]

# level of convolution to perform at each layer (CONV x CONV)
nb_conv = [5,5]

# Pre-processing

train_set = train_set.astype('float32')

train_set -= np.mean(train_set)

train_set /=np.max(train_set)

# Define model

model = Sequential()
model.add(Convolution3D(nb_filters[0],nb_depth=nb_conv[0], nb_row=nb_conv[0], nb_col=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))

model.add(MaxPooling3D(pool_size=(nb_pool[0], nb_pool[0], nb_pool[0])))

model.add(Dropout(0.5))

model.add(Flatten())

model.add(Dense(128, init='normal', activation='relu'))

model.add(Dropout(0.5))

model.add(Dense(nb_classes,init='normal'))

model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='RMSprop')

# Split the data

X_train_new, X_val_new, y_train_new,y_val_new = train_test_split(train_set, Y_train, test_size=0.2, random_state=4)

# Train the model

hist = model.fit(X_train_new, y_train_new, validation_data=(X_val_new,y_val_new),
batch_size=batch_size,nb_epoch = nb_epoch,show_accuracy=True,shuffle=True)

#hist = model.fit(train_set, Y_train, batch_size=batch_size,
# nb_epoch=nb_epoch,validation_split=0.2, show_accuracy=True,
# shuffle=True)

# Evaluate the model
score = model.evaluate(X_val_new, y_val_new, batch_size=batch_size, show_accuracy=True)
print('Test score:', score[0])
print('Test accuracy:', score[1])

# Plot the results
train_loss=hist.history['loss']
val_loss=hist.history['val_loss']
train_acc=hist.history['acc']
val_acc=hist.history['val_acc']
xc=range(100)

plt.figure(1,figsize=(7,5))
plt.plot(xc,train_loss)
plt.plot(xc,val_loss)
plt.xlabel('num of Epochs')
plt.ylabel('loss')
plt.title('train_loss vs val_loss')
plt.grid(True)
plt.legend(['train','val'])
print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])

plt.figure(2,figsize=(7,5))
plt.plot(xc,train_acc)
plt.plot(xc,val_acc)
plt.xlabel('num of Epochs')
plt.ylabel('accuracy')
plt.title('train_acc vs val_acc')
plt.grid(True)
plt.legend(['train','val'],loc=4)
#print plt.style.available # use bmh, classic,ggplot for big pictures
plt.style.use(['classic'])

43 comments:

Unknown30 June 2016 at 06:12
Hi man! Thanks a lot for your post. But I have a problem I can't solve by google for a long time. That's "TypeError: __init__() takes at least 5 arguments (5 given)" When model add Convolution3D. Do u have this problem solved? Thanks in advance!
ReplyDelete
Replies
Unknown23 July 2016 at 04:47
This comment has been removed by the author.
ReplyDelete
Replies
Roma25 August 2016 at 07:00
This comment has been removed by the author.
ReplyDelete
Replies
Unknown20 October 2016 at 08:50
Even I tried the identical code written here, I am not getting loss value and my accuracy does not change, like this:
2s - loss: nan - acc: 0.1729 - val_loss: nan - val_acc: 0.1417.
Also, testing took 0 second: 120/120 [==============================] - 0s
So, at the end, I have the output like this:
('Test score:', nan)
('Test accuracy:', 0.14166666865348815).
What would be the reason, I couldn't solve the problem here, any help would be appreciated.
Thanks for the tutorial and the code !
ReplyDelete
Replies
alfabeton8 November 2016 at 22:05
This comment has been removed by the author.
ReplyDelete
Replies
alfabeton8 November 2016 at 22:06
This comment has been removed by the author.
ReplyDelete
Replies
Unknown24 December 2016 at 11:59
Just add the following code bro:

from keras import backend as K
K.set_image_dim_ordering('th')
ReplyDelete
Replies
KAJAL KANSAL26 February 2017 at 10:16
Very Nice work..
ReplyDelete
Replies
Unknown8 March 2017 at 12:35
Hi there,

As per your suggestion I updated line model.add(Convolution3D...
Now I am getting following error
('X_Train shape:', (599, 16, 16, 15))
((599, 1, 16, 16, 15), 'train samples')
Traceback (most recent call last):
File "test3d.py", line 302, in
model.add(Convolution3D(nb_filters[0], kernel_dim1=nb_conv[0], kernel_dim2=nb_conv[0], kernel_dim3=nb_conv[0],input_shape=(1, img_rows, img_cols, img_depth), activation='relu'))
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/models.py", line 299, in add
layer.create_input_layer(batch_input_shape, input_dtype)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 401, in create_input_layer
self(x)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 572, in __call__
self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 635, in add_inbound_node
Node.create_node(self, inbound_layers, node_indices, tensor_indices)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/engine/topology.py", line 166, in create_node
output_tensors = to_list(outbound_layer.call(input_tensors[0], mask=input_masks[0]))
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/layers/convolutional.py", line 1234, in call
filter_shape=self.W_shape)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/keras/backend/tensorflow_backend.py", line 2831, in conv3d
x = tf.nn.conv3d(x, kernel, strides, padding)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 522, in conv3d
strides=strides, padding=padding, name=name)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 763, in apply_op
op_def=op_def)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 2397, in create_op
set_shapes_for_outputs(ret)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1757, in set_shapes_for_outputs
shapes = shape_func(op)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1707, in call_with_requiring
return call_cpp_shape_fn(op, require_shape_fn=True)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 610, in call_cpp_shape_fn
debug_python_shape_fn, require_shape_fn)
File "/home/dejan/anaconda2/lib/python2.7/site-packages/tensorflow/python/framework/common_shapes.py", line 675, in _call_cpp_shape_fn_impl
raise ValueError(err.message)
ValueError: Negative dimension size caused by subtracting 5 from 1 for 'Conv3D' (op: 'Conv3D') with input shapes: [?,1,16,16,15], [5,5,5,15,32].

Any idea what is causing this error.

Thanks,

Dejan
ReplyDelete
Replies
Unknown8 March 2017 at 20:19
Great tutorial!!!

What about color video. What changes are necessary for color video clips.
ReplyDelete
Replies
Unknown3 August 2017 at 04:56
I'm sorry could you explain that Why we have to roll our video dimention ??
ReplyDelete
Replies
Unknown26 October 2017 at 04:04
This comment has been removed by the author.
ReplyDelete
Replies
Unknown13 November 2017 at 06:23
Hi Anuj
can you tell me how to move from tensorflow backend to theano backend because i have install thenao backend and i am using anaconda3 and python3.6 when i am running first cell (means from keras....) i am getting like using tensorflow as backend in IPython console
ReplyDelete
Replies
Unknown3 December 2017 at 15:54
Hey do you have a pretrained model of this cnn?
ReplyDelete
Replies
Akash Panchal25 December 2017 at 05:51
After training how to predict in new video???
ReplyDelete
Replies
Chandni13 January 2018 at 23:30
how much is the accuracy for this?
have u used GPU or trained on CPU?
ReplyDelete
Replies
Reem Alfaifi7 April 2018 at 08:03
I face error in this line: model.add(Convolution3D(nb_filters[0],nb_depth=nb_conv[0], nb_row=nb_conv[0], nb_col=nb_conv[0], input_shape=(1, img_rows, img_cols, patch_size), activation='relu'))

the error is TypeError: __init__() takes at least 3 arguments (3 given)

can you please help me
ReplyDelete
Replies
Simon Nguyen9 April 2018 at 04:47
I can't run this line: model.compile(loss = 'categorical_crossentropy', optimizer = 'RMSprop')
It shows that no conv3d function. Anyone know how to solve?
ReplyDelete
Replies
Unknown4 June 2018 at 05:19
how a can test a video for this train script can you give me a script of prediction
ReplyDelete
Replies
wayne1 January 2019 at 23:04
i face error TypeError: __init__() missing 1 required positional argument: 'kernel_size'
can you please help me ?
ReplyDelete
Replies
Unknown1 April 2019 at 07:20
if cv2.waitKey(1) & 0xFF == ord('q'):
break

I am getting error at this line. function is not implemented. rebuild the libraries.
ReplyDelete
Replies
Unknown17 September 2019 at 08:36
its working
ReplyDelete
Replies
Rohit Nale18 November 2019 at 05:05
train_data = [X_tr_array,label]

(X_train, y_train) = (train_data[0],train_data[1])
#print('X_Train shape:', X_train.shape)
#print('y_Train shape:', y_train.shape)

train_set = np.zeros((num_samples, 1,img_rows,img_cols,img_depth))
print (X_train.shape)
print (train_set.shape)

for h in range(num_samples):
train_set[h][0][:][:][:]=X_train[h,:,:,:]
-----------------------------------------------------------------------
IndexError Traceback (most recent call last)
in
18
19 for h in range(num_samples):
---> 20 train_set[h][0][:][:][:]=X_train[h,:,:,:]

IndexError: too many indices for array

Can anyone help me with this error.
ReplyDelete
Replies
Nits..13 March 2020 at 00:05
model.add(Dense(128, activation='relu', kernel_initializer='normal'))

ValueError: ('Non-positive dimensions not allowed in size.', (-512, 128), -512)

I'm getting this error. Need help
ReplyDelete
Replies
Unknown27 July 2020 at 05:12
how to test it on real time video
ReplyDelete
Replies
Ashwin29 August 2020 at 11:34
Thanks for the information. keep sharing.
ReplyDelete
Replies
Anonymous20 September 2020 at 21:18

can you help me to solve this issue..
DisabledFunctionError Traceback (most recent call last)
in ()
45 plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
46 plt.show()
---> 47 cv2.imshow('frame',gray)
48 if cv2.waitKey(1) & 0xFF == ord('q'):
49 break

/usr/local/lib/python3.6/dist-packages/google/colab/_import_hooks/_cv2.py in wrapped(*args, **kwargs)
50 def wrapped(*args, **kwargs):
51 if not os.environ.get(env_var, False):
---> 52 raise DisabledFunctionError(message, name or func.__name__)
53 return func(*args, **kwargs)
54

DisabledFunctionError: cv2.imshow() is disabled in Colab, because it causes Jupyter sessions
to crash; see https://github.com/jupyter/notebook/issues/3935.
As a substitution, consider using
from google.colab.patches import cv2_imshow
ReplyDelete
Replies
Unknown7 October 2020 at 01:36
for this code

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution3D, MaxPooling3D

from keras.optimizers import SGD, RMSprop
from keras.utils import np_utils, generic_utils

import theano
import os
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import cv2
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation
from sklearn import preprocessing

# image specification
img_rows,img_cols,img_depth=128,128,15

# Training data

X_tr=[] # variable to store entire dataset

#Reading boxing action class

listing = os.listdir('F:/UT INTERACTION/KTH/boxing')

for vid in listing:
vid = 'F:/UT INTERACTION/KTH/boxing/'+vid
frames = []
cap = cv2.VideoCapture(vid)
#fps = cap.get(5)
#print "Frames per second using video.get(cv2.cv.CV_CAP_PROP_FPS): {0}".format(fps)

for k in xrange(15):
frame = cap.read()
frame = np.asarray(frame, dtype=np.uint8)

frame=cv2.resize(frame,(img_rows,img_cols),interpolation=cv2.INTER_AREA)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

frames.append(gray)

#plt.imshow(gray, cmap = plt.get_cmap('gray'))
#plt.xticks([]), plt.yticks([]) # to hide tick values on X and Y axis
#plt.show()
#cv2.imshow('frame',gray)

cv2.imshow('frame',gray)

if cv2.waitKey(1) & 0xFF == ord('q'):
break

cap.release()
cv2.destroyAllWindows()

input=np.array(frames)

print input.shape
ipt=np.rollaxis(np.rollaxis(input,2,0),2,0)
print ipt.shape

X_tr.append(ipt)

getting error

File "C:\Users\LENOVO\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)

File "", line 1, in
runfile('F:/activity datasets/readVideo.py', wdir='F:/activity datasets')

File "C:\Users\LENOVO\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 704, in runfile
execfile(filename, namespace)

File "C:\Users\LENOVO\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 108, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)

File "F:/activity datasets/readVideo.py", line 44
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
^
IndentationError: unexpected indent

could you please help me
ReplyDelete
Replies
Unknown16 November 2020 at 02:03
model.add(Conv3D(nb_filters[0],kernel_dim1=nb_conv[0],kernel_dim2=nb_conv[0],kernel_dim3=nb_conv[0],input_shape=(1, img_rows, img_cols, img_depth), activation='relu'))

TypeError: __init__() missing 1 required positional argument: 'kernel_size'
ReplyDelete
Replies
Unknown3 July 2021 at 12:43
hist = model.fit(train_set, Y_train, batch_size=batch_size, epochs=nb_epoch,validation_split=0.2,shuffle=True)

getting error when i run this code
error is
ValueError: Input 0 of layer sequential_5 is incompatible with the layer: expected axis -1 of input shape to have value 5 but received input with shape (None, 1, 16, 16, 15)
ReplyDelete
Replies
Unknown7 September 2021 at 20:51
how can i get this original image sources?
ReplyDelete
Replies

Add comment